Skip to content

Commit de40537

Browse files
author
Martin KaFai Lau
committed
Merge branch 'bpf: Force to MPTCP'
Geliang Tang says: ==================== As is described in the "How to use MPTCP?" section in MPTCP wiki [1]: "Your app should create sockets with IPPROTO_MPTCP as the proto: ( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be forced to create and use MPTCP sockets instead of TCP ones via the mptcpize command bundled with the mptcpd daemon." But the mptcpize (LD_PRELOAD technique) command has some limitations [2]: - it doesn't work if the application is not using libc (e.g. GoLang apps) - in some envs, it might not be easy to set env vars / change the way apps are launched, e.g. on Android - mptcpize needs to be launched with all apps that want MPTCP: we could have more control from BPF to enable MPTCP only for some apps or all the ones of a netns or a cgroup, etc. - it is not in BPF, we cannot talk about it at netdev conf. So this patchset attempts to use BPF to implement functions similer to mptcpize. The main idea is to add a hook in sys_socket() to change the protocol id from IPPROTO_TCP (or 0) to IPPROTO_MPTCP. [1] https://github.com/multipath-tcp/mptcp_net-next/wiki [2] multipath-tcp/mptcp_net-next#79 v14: - Use getsockopt(MPTCP_INFO) to verify mptcp protocol intead of using nstat command. v13: - drop "Use random netns name for mptcp" patch. v12: - update diag_* log of update_socket_protocol. - add 'ip netns show' after 'ip netns del' to check if there is a test did not clean up its netns. - return libbpf_get_error() instead of -EIO for the error from open_and_load(). - Use getsockopt(SOL_PROTOCOL) to verify mptcp protocol intead of using 'ss -tOni'. v11: - add comments about outputs of 'ss' and 'nstat'. - use "err = verify_mptcpify()" instead of using =+. v10: - drop "#ifdef CONFIG_BPF_JIT". - include vmlinux.h and bpf_tracing_net.h to avoid defining some macros. - drop unneeded checks for mptcp. v9: - update comment for 'update_socket_protocol'. v8: - drop the additional checks on the 'protocol' value after the 'update_socket_protocol()' call. v7: - add __weak and __diag_* for update_socket_protocol. v6: - add update_socket_protocol. v5: - add bpf_mptcpify helper. v4: - use lsm_cgroup/socket_create v3: - patch 8: char cmd[128]; -> char cmd[256]; v2: - Fix build selftests errors reported by CI Closes: multipath-tcp/mptcp_net-next#79 ==================== Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2 parents 053bbf9 + ddba122 commit de40537

4 files changed

Lines changed: 221 additions & 20 deletions

File tree

net/mptcp/bpf.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,18 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
1919

2020
return NULL;
2121
}
22+
23+
BTF_SET8_START(bpf_mptcp_fmodret_ids)
24+
BTF_ID_FLAGS(func, update_socket_protocol)
25+
BTF_SET8_END(bpf_mptcp_fmodret_ids)
26+
27+
static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
28+
.owner = THIS_MODULE,
29+
.set = &bpf_mptcp_fmodret_ids,
30+
};
31+
32+
static int __init bpf_mptcp_kfunc_init(void)
33+
{
34+
return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
35+
}
36+
late_initcall(bpf_mptcp_kfunc_init);

net/socket.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1657,12 +1657,36 @@ struct file *__sys_socket_file(int family, int type, int protocol)
16571657
return sock_alloc_file(sock, flags, NULL);
16581658
}
16591659

1660+
/* A hook for bpf progs to attach to and update socket protocol.
1661+
*
1662+
* A static noinline declaration here could cause the compiler to
1663+
* optimize away the function. A global noinline declaration will
1664+
* keep the definition, but may optimize away the callsite.
1665+
* Therefore, __weak is needed to ensure that the call is still
1666+
* emitted, by telling the compiler that we don't know what the
1667+
* function might eventually be.
1668+
*
1669+
* __diag_* below are needed to dismiss the missing prototype warning.
1670+
*/
1671+
1672+
__diag_push();
1673+
__diag_ignore_all("-Wmissing-prototypes",
1674+
"A fmod_ret entry point for BPF programs");
1675+
1676+
__weak noinline int update_socket_protocol(int family, int type, int protocol)
1677+
{
1678+
return protocol;
1679+
}
1680+
1681+
__diag_pop();
1682+
16601683
int __sys_socket(int family, int type, int protocol)
16611684
{
16621685
struct socket *sock;
16631686
int flags;
16641687

1665-
sock = __sys_socket_create(family, type, protocol);
1688+
sock = __sys_socket_create(family, type,
1689+
update_socket_protocol(family, type, protocol));
16661690
if (IS_ERR(sock))
16671691
return PTR_ERR(sock);
16681692

tools/testing/selftests/bpf/prog_tests/mptcp.c

Lines changed: 161 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,59 @@
22
/* Copyright (c) 2020, Tessares SA. */
33
/* Copyright (c) 2022, SUSE. */
44

5+
#include <linux/const.h>
6+
#include <netinet/in.h>
57
#include <test_progs.h>
68
#include "cgroup_helpers.h"
79
#include "network_helpers.h"
810
#include "mptcp_sock.skel.h"
11+
#include "mptcpify.skel.h"
912

1013
#define NS_TEST "mptcp_ns"
1114

15+
#ifndef IPPROTO_MPTCP
16+
#define IPPROTO_MPTCP 262
17+
#endif
18+
19+
#ifndef SOL_MPTCP
20+
#define SOL_MPTCP 284
21+
#endif
22+
#ifndef MPTCP_INFO
23+
#define MPTCP_INFO 1
24+
#endif
25+
#ifndef MPTCP_INFO_FLAG_FALLBACK
26+
#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0)
27+
#endif
28+
#ifndef MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED
29+
#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1)
30+
#endif
31+
1232
#ifndef TCP_CA_NAME_MAX
1333
#define TCP_CA_NAME_MAX 16
1434
#endif
1535

36+
struct __mptcp_info {
37+
__u8 mptcpi_subflows;
38+
__u8 mptcpi_add_addr_signal;
39+
__u8 mptcpi_add_addr_accepted;
40+
__u8 mptcpi_subflows_max;
41+
__u8 mptcpi_add_addr_signal_max;
42+
__u8 mptcpi_add_addr_accepted_max;
43+
__u32 mptcpi_flags;
44+
__u32 mptcpi_token;
45+
__u64 mptcpi_write_seq;
46+
__u64 mptcpi_snd_una;
47+
__u64 mptcpi_rcv_nxt;
48+
__u8 mptcpi_local_addr_used;
49+
__u8 mptcpi_local_addr_max;
50+
__u8 mptcpi_csum_enabled;
51+
__u32 mptcpi_retransmits;
52+
__u64 mptcpi_bytes_retrans;
53+
__u64 mptcpi_bytes_sent;
54+
__u64 mptcpi_bytes_received;
55+
__u64 mptcpi_bytes_acked;
56+
};
57+
1658
struct mptcp_storage {
1759
__u32 invoked;
1860
__u32 is_mptcp;
@@ -22,6 +64,24 @@ struct mptcp_storage {
2264
char ca_name[TCP_CA_NAME_MAX];
2365
};
2466

67+
static struct nstoken *create_netns(void)
68+
{
69+
SYS(fail, "ip netns add %s", NS_TEST);
70+
SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
71+
72+
return open_netns(NS_TEST);
73+
fail:
74+
return NULL;
75+
}
76+
77+
static void cleanup_netns(struct nstoken *nstoken)
78+
{
79+
if (nstoken)
80+
close_netns(nstoken);
81+
82+
SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST);
83+
}
84+
2585
static int verify_tsk(int map_fd, int client_fd)
2686
{
2787
int err, cfd = client_fd;
@@ -100,24 +160,14 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
100160

101161
sock_skel = mptcp_sock__open_and_load();
102162
if (!ASSERT_OK_PTR(sock_skel, "skel_open_load"))
103-
return -EIO;
163+
return libbpf_get_error(sock_skel);
104164

105165
err = mptcp_sock__attach(sock_skel);
106166
if (!ASSERT_OK(err, "skel_attach"))
107167
goto out;
108168

109169
prog_fd = bpf_program__fd(sock_skel->progs._sockops);
110-
if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) {
111-
err = -EIO;
112-
goto out;
113-
}
114-
115170
map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map);
116-
if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) {
117-
err = -EIO;
118-
goto out;
119-
}
120-
121171
err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
122172
if (!ASSERT_OK(err, "bpf_prog_attach"))
123173
goto out;
@@ -147,11 +197,8 @@ static void test_base(void)
147197
if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
148198
return;
149199

150-
SYS(fail, "ip netns add %s", NS_TEST);
151-
SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
152-
153-
nstoken = open_netns(NS_TEST);
154-
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
200+
nstoken = create_netns();
201+
if (!ASSERT_OK_PTR(nstoken, "create_netns"))
155202
goto fail;
156203

157204
/* without MPTCP */
@@ -174,16 +221,111 @@ static void test_base(void)
174221
close(server_fd);
175222

176223
fail:
177-
if (nstoken)
178-
close_netns(nstoken);
224+
cleanup_netns(nstoken);
225+
close(cgroup_fd);
226+
}
179227

180-
SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
228+
static void send_byte(int fd)
229+
{
230+
char b = 0x55;
231+
232+
ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte");
233+
}
234+
235+
static int verify_mptcpify(int server_fd, int client_fd)
236+
{
237+
struct __mptcp_info info;
238+
socklen_t optlen;
239+
int protocol;
240+
int err = 0;
241+
242+
optlen = sizeof(protocol);
243+
if (!ASSERT_OK(getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen),
244+
"getsockopt(SOL_PROTOCOL)"))
245+
return -1;
246+
247+
if (!ASSERT_EQ(protocol, IPPROTO_MPTCP, "protocol isn't MPTCP"))
248+
err++;
181249

250+
optlen = sizeof(info);
251+
if (!ASSERT_OK(getsockopt(client_fd, SOL_MPTCP, MPTCP_INFO, &info, &optlen),
252+
"getsockopt(MPTCP_INFO)"))
253+
return -1;
254+
255+
if (!ASSERT_GE(info.mptcpi_flags, 0, "unexpected mptcpi_flags"))
256+
err++;
257+
if (!ASSERT_FALSE(info.mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK,
258+
"MPTCP fallback"))
259+
err++;
260+
if (!ASSERT_TRUE(info.mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED,
261+
"no remote key received"))
262+
err++;
263+
264+
return err;
265+
}
266+
267+
static int run_mptcpify(int cgroup_fd)
268+
{
269+
int server_fd, client_fd, err = 0;
270+
struct mptcpify *mptcpify_skel;
271+
272+
mptcpify_skel = mptcpify__open_and_load();
273+
if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load"))
274+
return libbpf_get_error(mptcpify_skel);
275+
276+
err = mptcpify__attach(mptcpify_skel);
277+
if (!ASSERT_OK(err, "skel_attach"))
278+
goto out;
279+
280+
/* without MPTCP */
281+
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
282+
if (!ASSERT_GE(server_fd, 0, "start_server")) {
283+
err = -EIO;
284+
goto out;
285+
}
286+
287+
client_fd = connect_to_fd(server_fd, 0);
288+
if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
289+
err = -EIO;
290+
goto close_server;
291+
}
292+
293+
send_byte(client_fd);
294+
295+
err = verify_mptcpify(server_fd, client_fd);
296+
297+
close(client_fd);
298+
close_server:
299+
close(server_fd);
300+
out:
301+
mptcpify__destroy(mptcpify_skel);
302+
return err;
303+
}
304+
305+
static void test_mptcpify(void)
306+
{
307+
struct nstoken *nstoken = NULL;
308+
int cgroup_fd;
309+
310+
cgroup_fd = test__join_cgroup("/mptcpify");
311+
if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
312+
return;
313+
314+
nstoken = create_netns();
315+
if (!ASSERT_OK_PTR(nstoken, "create_netns"))
316+
goto fail;
317+
318+
ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify");
319+
320+
fail:
321+
cleanup_netns(nstoken);
182322
close(cgroup_fd);
183323
}
184324

185325
void test_mptcp(void)
186326
{
187327
if (test__start_subtest("base"))
188328
test_base();
329+
if (test__start_subtest("mptcpify"))
330+
test_mptcpify();
189331
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2023, SUSE. */
3+
4+
#include "vmlinux.h"
5+
#include <bpf/bpf_tracing.h>
6+
#include "bpf_tracing_net.h"
7+
8+
char _license[] SEC("license") = "GPL";
9+
10+
SEC("fmod_ret/update_socket_protocol")
11+
int BPF_PROG(mptcpify, int family, int type, int protocol)
12+
{
13+
if ((family == AF_INET || family == AF_INET6) &&
14+
type == SOCK_STREAM &&
15+
(!protocol || protocol == IPPROTO_TCP)) {
16+
return IPPROTO_MPTCP;
17+
}
18+
19+
return protocol;
20+
}

0 commit comments

Comments
 (0)