Skip to content

Commit 4f102d7

Browse files
Paolo Abenikuba-moo
authored andcommitted
mptcp: avoid unneeded subflow-level drops
The rcv window is shared among all the subflows. Currently, MPTCP sync the TCP-level rcv window with the MPTCP one at tcp_transmit_skb() time. The above means that incoming data may sporadically observe outdated TCP-level rcv window and being wrongly dropped by TCP. Address the issue checking for the edge condition before queuing the data at TCP level, and eventually syncing the rcv window as needed. Note that the issue is actually present from the very first MPTCP implementation, but backports older than the blamed commit below will range from impossible to useless. Before: $ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow TcpExtBeyondWindow 14 0.0 After: $ nstat -n; sleep 1; nstat -z TcpExtBeyondWindow TcpExtBeyondWindow 0 0.0 Fixes: fa3fe2b ("mptcp: track window announced to peer") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Link: https://patch.msgid.link/20251118-net-mptcp-misc-fixes-6-18-rc6-v1-2-806d3781c95f@kernel.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 5e15395 commit 4f102d7

2 files changed

Lines changed: 32 additions & 0 deletions

File tree

net/mptcp/options.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,31 @@ static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
10441044
WRITE_ONCE(msk->snd_una, new_snd_una);
10451045
}
10461046

1047+
static void rwin_update(struct mptcp_sock *msk, struct sock *ssk,
1048+
struct sk_buff *skb)
1049+
{
1050+
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1051+
struct tcp_sock *tp = tcp_sk(ssk);
1052+
u64 mptcp_rcv_wnd;
1053+
1054+
/* Avoid touching extra cachelines if TCP is going to accept this
1055+
* skb without filling the TCP-level window even with a possibly
1056+
* outdated mptcp-level rwin.
1057+
*/
1058+
if (!skb->len || skb->len < tcp_receive_window(tp))
1059+
return;
1060+
1061+
mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent);
1062+
if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent))
1063+
return;
1064+
1065+
/* Some other subflow grew the mptcp-level rwin since rcv_wup,
1066+
* resync.
1067+
*/
1068+
tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent;
1069+
subflow->rcv_wnd_sent = mptcp_rcv_wnd;
1070+
}
1071+
10471072
static void ack_update_msk(struct mptcp_sock *msk,
10481073
struct sock *ssk,
10491074
struct mptcp_options_received *mp_opt)
@@ -1211,6 +1236,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
12111236
*/
12121237
if (mp_opt.use_ack)
12131238
ack_update_msk(msk, sk, &mp_opt);
1239+
rwin_update(msk, sk, skb);
12141240

12151241
/* Zero-data-length packets are dropped by the caller and not
12161242
* propagated to the MPTCP layer, so the skb extension does not
@@ -1297,6 +1323,10 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
12971323

12981324
if (rcv_wnd_new != rcv_wnd_old) {
12991325
raise_win:
1326+
/* The msk-level rcv wnd is after the tcp level one,
1327+
* sync the latter.
1328+
*/
1329+
rcv_wnd_new = rcv_wnd_old;
13001330
win = rcv_wnd_old - ack_seq;
13011331
tp->rcv_wnd = min_t(u64, win, U32_MAX);
13021332
new_win = tp->rcv_wnd;
@@ -1320,6 +1350,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
13201350

13211351
update_wspace:
13221352
WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
1353+
subflow->rcv_wnd_sent = rcv_wnd_new;
13231354
}
13241355

13251356
static void mptcp_track_rwin(struct tcp_sock *tp)

net/mptcp/protocol.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ struct mptcp_subflow_context {
509509
u64 remote_key;
510510
u64 idsn;
511511
u64 map_seq;
512+
u64 rcv_wnd_sent;
512513
u32 snd_isn;
513514
u32 token;
514515
u32 rel_write_seq;

0 commit comments

Comments
 (0)