Skip to content

Commit 98e9587

Browse files
committed
Merge branch 'mptcp-expose-more-info-and-small-improvements'
Matthieu Baerts says: ==================== mptcp: expose more info and small improvements Patch 1-3/9 track and expose some aggregated data counters at the MPTCP level: the number of retransmissions and the bytes that have been transferred. The first patch prepares the work by moving where snd_una is updated for fallback sockets while the last patch adds some tests to cover the new code. Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO. This new socket option allows to combine info from MPTCP_INFO, MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be needed to have all info in one because the path-manager can close and re-create subflows between getsockopt() and fooling the accounting. The first patch introduces a unique subflow ID to easily detect when subflows are being re-created with the same 5-tuple while the last patch adds some tests to cover the new code. Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt") can reveal a bug that were there for a bit of time, see [1]. A fix has recently been fixed to netdev for the -net tree: "mptcp: ensure listener is unhashed before updating the sk status", see [2]. There is no conflicts between the two patches but it might be better to apply this series after the one for -net and after having merged "net" into "net-next". Patch 7/9 is similar to commit 47867f0 ("selftests: mptcp: join: skip check if MIB counter not supported") recently applied in the -net tree but here it adapts the new code that is only in net-next (and it fixes a merge conflict resolution which didn't have any impact). Patch 8 and 9/9 are two simple refactoring. One to consolidate the transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated code. The other one reduces the scope of an argument passed to mptcp_pm_alloc_anno_list() function. Link: multipath-tcp/mptcp_net-next#407 [1] Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2] ==================== Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 5dfbbaa + 528cb5f commit 98e9587

10 files changed

Lines changed: 356 additions & 46 deletions

File tree

include/uapi/linux/mptcp.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ struct mptcp_info {
123123
__u8 mptcpi_local_addr_used;
124124
__u8 mptcpi_local_addr_max;
125125
__u8 mptcpi_csum_enabled;
126+
__u32 mptcpi_retransmits;
127+
__u64 mptcpi_bytes_retrans;
128+
__u64 mptcpi_bytes_sent;
129+
__u64 mptcpi_bytes_received;
130+
__u64 mptcpi_bytes_acked;
126131
};
127132

128133
/*
@@ -244,9 +249,33 @@ struct mptcp_subflow_addrs {
244249
};
245250
};
246251

252+
struct mptcp_subflow_info {
253+
__u32 id;
254+
struct mptcp_subflow_addrs addrs;
255+
};
256+
257+
struct mptcp_full_info {
258+
__u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
259+
__u32 size_tcpinfo_user;
260+
__u32 size_sfinfo_kernel; /* must be 0, set by kernel */
261+
__u32 size_sfinfo_user;
262+
__u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
263+
__u32 size_arrays_user; /* max subflows that userspace is interested in;
264+
* the buffers at subflow_info/tcp_info
265+
* are respectively at least:
266+
* size_arrays * size_sfinfo_user
267+
* size_arrays * size_tcpinfo_user
268+
* bytes wide
269+
*/
270+
__aligned_u64 subflow_info;
271+
__aligned_u64 tcp_info;
272+
struct mptcp_info mptcp_info;
273+
};
274+
247275
/* MPTCP socket options */
248276
#define MPTCP_INFO 1
249277
#define MPTCP_TCPINFO 2
250278
#define MPTCP_SUBFLOW_ADDRS 3
279+
#define MPTCP_FULL_INFO 4
251280

252281
#endif /* _UAPI_MPTCP_H */

net/mptcp/options.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
10261026
return cur_seq;
10271027
}
10281028

1029+
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
1030+
{
1031+
msk->bytes_acked += new_snd_una - msk->snd_una;
1032+
msk->snd_una = new_snd_una;
1033+
}
1034+
10291035
static void ack_update_msk(struct mptcp_sock *msk,
10301036
struct sock *ssk,
10311037
struct mptcp_options_received *mp_opt)
@@ -1057,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
10571063
__mptcp_check_push(sk, ssk);
10581064

10591065
if (after64(new_snd_una, old_snd_una)) {
1060-
msk->snd_una = new_snd_una;
1066+
__mptcp_snd_una_update(msk, new_snd_una);
10611067
__mptcp_data_acked(sk);
10621068
}
10631069
mptcp_data_unlock(sk);
@@ -1119,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
11191125
mptcp_data_lock(subflow->conn);
11201126
if (sk_stream_memory_free(sk))
11211127
__mptcp_check_push(subflow->conn, sk);
1128+
1129+
/* on fallback we just need to ignore the msk-level snd_una, as
1130+
* this is really plain TCP
1131+
*/
1132+
__mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));
1133+
11221134
__mptcp_data_acked(subflow->conn);
11231135
mptcp_data_unlock(subflow->conn);
11241136
return true;

net/mptcp/pm_netlink.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -341,15 +341,15 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
341341
}
342342

343343
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
344-
const struct mptcp_pm_addr_entry *entry)
344+
const struct mptcp_addr_info *addr)
345345
{
346346
struct mptcp_pm_add_entry *add_entry = NULL;
347347
struct sock *sk = (struct sock *)msk;
348348
struct net *net = sock_net(sk);
349349

350350
lockdep_assert_held(&msk->pm.lock);
351351

352-
add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
352+
add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
353353

354354
if (add_entry) {
355355
if (mptcp_pm_is_kernel(msk))
@@ -366,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
366366

367367
list_add(&add_entry->list, &msk->pm.anno_list);
368368

369-
add_entry->addr = entry->addr;
369+
add_entry->addr = *addr;
370370
add_entry->sock = msk;
371371
add_entry->retrans_times = 0;
372372

@@ -576,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
576576
return;
577577

578578
if (local) {
579-
if (mptcp_pm_alloc_anno_list(msk, local)) {
579+
if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
580580
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
581581
msk->pm.add_addr_signaled++;
582582
mptcp_pm_announce_addr(msk, &local->addr, false);

net/mptcp/pm_userspace.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
193193
lock_sock((struct sock *)msk);
194194
spin_lock_bh(&msk->pm.lock);
195195

196-
if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
196+
if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
197197
msk->pm.add_addr_signaled++;
198198
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
199199
mptcp_pm_nl_addr_send_ack(msk);

net/mptcp/protocol.c

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
9696
list_add(&subflow->node, &msk->conn_list);
9797
sock_hold(ssock->sk);
9898
subflow->request_mptcp = 1;
99+
subflow->subflow_id = msk->subflow_id++;
99100

100101
/* This is the first subflow, always with id 0 */
101102
subflow->local_id_valid = 1;
@@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
377378

378379
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
379380
/* in sequence */
381+
msk->bytes_received += copy_len;
380382
WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
381383
tail = skb_peek_tail(&sk->sk_receive_queue);
382384
if (tail && mptcp_try_coalesce(sk, tail, skb))
@@ -760,6 +762,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
760762
MPTCP_SKB_CB(skb)->map_seq += delta;
761763
__skb_queue_tail(&sk->sk_receive_queue, skb);
762764
}
765+
msk->bytes_received += end_seq - msk->ack_seq;
763766
msk->ack_seq = end_seq;
764767
moved = true;
765768
}
@@ -845,6 +848,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
845848
if (sk->sk_socket && !ssk->sk_socket)
846849
mptcp_sock_graft(ssk, sk->sk_socket);
847850

851+
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
848852
mptcp_sockopt_sync_locked(msk, ssk);
849853
mptcp_subflow_joined(msk, ssk);
850854
return true;
@@ -1004,12 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk)
10041008
struct mptcp_data_frag *dtmp, *dfrag;
10051009
u64 snd_una;
10061010

1007-
/* on fallback we just need to ignore snd_una, as this is really
1008-
* plain TCP
1009-
*/
1010-
if (__mptcp_check_fallback(msk))
1011-
msk->snd_una = READ_ONCE(msk->snd_nxt);
1012-
10131011
snd_una = msk->snd_una;
10141012
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
10151013
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -1537,8 +1535,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
15371535
* that has been handed to the subflow for transmission
15381536
* and skip update in case it was old dfrag.
15391537
*/
1540-
if (likely(after64(snd_nxt_new, msk->snd_nxt)))
1538+
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
1539+
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
15411540
msk->snd_nxt = snd_nxt_new;
1541+
}
15421542
}
15431543

15441544
void mptcp_check_and_set_pending(struct sock *sk)
@@ -2596,6 +2596,7 @@ static void __mptcp_retrans(struct sock *sk)
25962596
}
25972597
if (copied) {
25982598
dfrag->already_sent = max(dfrag->already_sent, info.sent);
2599+
msk->bytes_retrans += copied;
25992600
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
26002601
info.size_goal);
26012602
WRITE_ONCE(msk->allow_infinite_fallback, false);
@@ -2654,6 +2655,7 @@ static void mptcp_do_fastclose(struct sock *sk)
26542655
struct mptcp_subflow_context *subflow, *tmp;
26552656
struct mptcp_sock *msk = mptcp_sk(sk);
26562657

2658+
inet_sk_state_store(sk, TCP_CLOSE);
26572659
mptcp_for_each_subflow_safe(msk, subflow, tmp)
26582660
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
26592661
subflow, MPTCP_CF_FASTCLOSE);
@@ -2691,10 +2693,9 @@ static void mptcp_worker(struct work_struct *work)
26912693
* even if it is orphaned and in FIN_WAIT2 state
26922694
*/
26932695
if (sock_flag(sk, SOCK_DEAD)) {
2694-
if (mptcp_should_close(sk)) {
2695-
inet_sk_state_store(sk, TCP_CLOSE);
2696+
if (mptcp_should_close(sk))
26962697
mptcp_do_fastclose(sk);
2697-
}
2698+
26982699
if (sk->sk_state == TCP_CLOSE) {
26992700
__mptcp_destroy_sock(sk);
27002701
goto unlock;
@@ -2733,6 +2734,7 @@ static int __mptcp_init_sock(struct sock *sk)
27332734
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
27342735
WRITE_ONCE(msk->allow_infinite_fallback, true);
27352736
msk->recovery = false;
2737+
msk->subflow_id = 1;
27362738

27372739
mptcp_pm_data_init(msk);
27382740

@@ -2936,7 +2938,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
29362938
void __mptcp_unaccepted_force_close(struct sock *sk)
29372939
{
29382940
sock_set_flag(sk, SOCK_DEAD);
2939-
inet_sk_state_store(sk, TCP_CLOSE);
29402941
mptcp_do_fastclose(sk);
29412942
__mptcp_destroy_sock(sk);
29422943
}
@@ -2978,7 +2979,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
29782979
/* If the msk has read data, or the caller explicitly ask it,
29792980
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
29802981
*/
2981-
inet_sk_state_store(sk, TCP_CLOSE);
29822982
mptcp_do_fastclose(sk);
29832983
timeout = 0;
29842984
} else if (mptcp_close_state(sk)) {
@@ -3108,6 +3108,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
31083108
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
31093109
mptcp_pm_data_reset(msk);
31103110
mptcp_ca_reset(sk);
3111+
msk->bytes_acked = 0;
3112+
msk->bytes_received = 0;
3113+
msk->bytes_sent = 0;
3114+
msk->bytes_retrans = 0;
31113115

31123116
WRITE_ONCE(sk->sk_shutdown, 0);
31133117
sk_error_report(sk);
@@ -3157,6 +3161,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
31573161
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
31583162
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
31593163

3164+
/* passive msk is created after the first/MPC subflow */
3165+
msk->subflow_id = 2;
3166+
31603167
sock_reset_flag(nsk, SOCK_RCU_FREE);
31613168
security_inet_csk_clone(nsk, req);
31623169

net/mptcp/protocol.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,13 @@ struct mptcp_sock {
262262
u64 local_key;
263263
u64 remote_key;
264264
u64 write_seq;
265+
u64 bytes_sent;
265266
u64 snd_nxt;
267+
u64 bytes_received;
266268
u64 ack_seq;
267269
atomic64_t rcv_wnd_sent;
268270
u64 rcv_data_fin_seq;
271+
u64 bytes_retrans;
269272
int rmem_fwd_alloc;
270273
struct sock *last_snd;
271274
int snd_burst;
@@ -274,6 +277,7 @@ struct mptcp_sock {
274277
* recovery related fields are under data_lock
275278
* protection
276279
*/
280+
u64 bytes_acked;
277281
u64 snd_una;
278282
u64 wnd_end;
279283
unsigned long timer_ival;
@@ -319,7 +323,8 @@ struct mptcp_sock {
319323
u64 rtt_us; /* last maximum rtt of subflows */
320324
} rcvq_space;
321325

322-
u32 setsockopt_seq;
326+
u32 subflow_id;
327+
u32 setsockopt_seq;
323328
char ca_name[TCP_CA_NAME_MAX];
324329
struct mptcp_sock *dl_next;
325330
};
@@ -500,6 +505,8 @@ struct mptcp_subflow_context {
500505
u8 reset_reason:4;
501506
u8 stale_count;
502507

508+
u32 subflow_id;
509+
503510
long delegated_status;
504511
unsigned long fail_tout;
505512

@@ -810,7 +817,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
810817
struct mptcp_addr_info *rem,
811818
u8 bkup);
812819
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
813-
const struct mptcp_pm_addr_entry *entry);
820+
const struct mptcp_addr_info *addr);
814821
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
815822
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
816823
struct mptcp_pm_add_entry *

0 commit comments

Comments
 (0)