Skip to content

Commit f54a2a1

Browse files
committed
Daniel Borkmann says: ==================== pull-request: bpf-next 2023-08-16 We've added 17 non-merge commits during the last 6 day(s) which contain a total of 20 files changed, 1179 insertions(+), 37 deletions(-). The main changes are: 1) Add a BPF hook in sys_socket() to change the protocol ID from IPPROTO_TCP to IPPROTO_MPTCP to cover migration for legacy applications, from Geliang Tang. 2) Follow-up/fallout fix from the SO_REUSEPORT + bpf_sk_assign work to fix a splat on non-fullsock sks in inet[6]_steal_sock, from Lorenz Bauer. 3) Improvements to struct_ops links to avoid forcing presence of update/validate callbacks. Also add bpf_struct_ops fields documentation, from David Vernet. 4) Ensure libbpf sets close-on-exec flag on gzopen, from Marco Vedovati. 5) Several new tcx selftest additions and bpftool link show support for tcx and xdp links, from Daniel Borkmann. 6) Fix a smatch warning on uninitialized symbol in bpf_perf_link_fill_kprobe, from Yafang Shao. 7) BPF selftest fixes e.g. misplaced break in kfunc_call test, from Yipeng Zou. 8) Small cleanup to remove unused declaration bpf_link_new_file, from Yue Haibing. 9) Small typo fix to bpftool's perf help message, from Daniel T. Lee. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: selftests/bpf: Add mptcpify test selftests/bpf: Fix error checks of mptcp open_and_load selftests/bpf: Add two mptcp netns helpers bpf: Add update_socket_protocol hook bpftool: Implement link show support for xdp bpftool: Implement link show support for tcx selftests/bpf: Add selftest for fill_link_info bpf: Fix uninitialized symbol in bpf_perf_link_fill_kprobe() net: Fix slab-out-of-bounds in inet[6]_steal_sock bpf: Document struct bpf_struct_ops fields bpf: Support default .validate() and .update() behavior for struct_ops links selftests/bpf: Add various more tcx test cases selftests/bpf: Clean up fmod_ret in bench_rename test script selftests/bpf: Fix repeat option when kfunc_call verification fails libbpf: Set close-on-exec flag on gzopen bpftool: fix perf help message bpf: Remove unused declaration bpf_link_new_file() ==================== Link: https://lore.kernel.org/r/20230816212840.1539-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 42b118c + de40537 commit f54a2a1

20 files changed

Lines changed: 1179 additions & 37 deletions

File tree

include/linux/bpf.h

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1550,6 +1550,53 @@ struct bpf_struct_ops_value;
15501550
struct btf_member;
15511551

15521552
#define BPF_STRUCT_OPS_MAX_NR_MEMBERS 64
1553+
/**
1554+
* struct bpf_struct_ops - A structure of callbacks allowing a subsystem to
1555+
* define a BPF_MAP_TYPE_STRUCT_OPS map type composed
1556+
* of BPF_PROG_TYPE_STRUCT_OPS progs.
1557+
* @verifier_ops: A structure of callbacks that are invoked by the verifier
1558+
* when determining whether the struct_ops progs in the
1559+
* struct_ops map are valid.
1560+
* @init: A callback that is invoked a single time, and before any other
1561+
* callback, to initialize the structure. A nonzero return value means
1562+
* the subsystem could not be initialized.
1563+
* @check_member: When defined, a callback invoked by the verifier to allow
1564+
* the subsystem to determine if an entry in the struct_ops map
1565+
* is valid. A nonzero return value means that the map is
1566+
* invalid and should be rejected by the verifier.
1567+
* @init_member: A callback that is invoked for each member of the struct_ops
1568+
* map to allow the subsystem to initialize the member. A nonzero
1569+
* value means the member could not be initialized. This callback
1570+
* is exclusive with the @type, @type_id, @value_type, and
1571+
* @value_id fields.
1572+
* @reg: A callback that is invoked when the struct_ops map has been
1573+
* initialized and is being attached to. Zero means the struct_ops map
1574+
* has been successfully registered and is live. A nonzero return value
1575+
* means the struct_ops map could not be registered.
1576+
* @unreg: A callback that is invoked when the struct_ops map should be
1577+
* unregistered.
1578+
* @update: A callback that is invoked when the live struct_ops map is being
1579+
* updated to contain new values. This callback is only invoked when
1580+
* the struct_ops map is loaded with BPF_F_LINK. If not defined, the
1581+
* it is assumed that the struct_ops map cannot be updated.
1582+
* @validate: A callback that is invoked after all of the members have been
1583+
* initialized. This callback should perform static checks on the
1584+
* map, meaning that it should either fail or succeed
1585+
* deterministically. A struct_ops map that has been validated may
1586+
* not necessarily succeed in being registered if the call to @reg
1587+
* fails. For example, a valid struct_ops map may be loaded, but
1588+
* then fail to be registered due to there being another active
1589+
* struct_ops map on the system in the subsystem already. For this
1590+
* reason, if this callback is not defined, the check is skipped as
1591+
* the struct_ops map will have final verification performed in
1592+
* @reg.
1593+
* @type: BTF type.
1594+
* @value_type: Value type.
1595+
* @name: The name of the struct bpf_struct_ops object.
1596+
* @func_models: Func models
1597+
* @type_id: BTF type id.
1598+
* @value_id: BTF value id.
1599+
*/
15531600
struct bpf_struct_ops {
15541601
const struct bpf_verifier_ops *verifier_ops;
15551602
int (*init)(struct btf *btf);
@@ -2120,7 +2167,6 @@ void bpf_link_cleanup(struct bpf_link_primer *primer);
21202167
void bpf_link_inc(struct bpf_link *link);
21212168
void bpf_link_put(struct bpf_link *link);
21222169
int bpf_link_new_fd(struct bpf_link *link);
2123-
struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
21242170
struct bpf_link *bpf_link_get_from_fd(u32 ufd);
21252171
struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
21262172

include/net/inet6_hashtables.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff,
116116
if (!sk)
117117
return NULL;
118118

119-
if (!prefetched)
119+
if (!prefetched || !sk_fullsock(sk))
120120
return sk;
121121

122122
if (sk->sk_protocol == IPPROTO_TCP) {

include/net/inet_hashtables.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff,
462462
if (!sk)
463463
return NULL;
464464

465-
if (!prefetched)
465+
if (!prefetched || !sk_fullsock(sk))
466466
return sk;
467467

468468
if (sk->sk_protocol == IPPROTO_TCP) {

kernel/bpf/bpf_struct_ops.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
509509
}
510510

511511
if (st_map->map.map_flags & BPF_F_LINK) {
512-
err = st_ops->validate(kdata);
513-
if (err)
514-
goto reset_unlock;
512+
err = 0;
513+
if (st_ops->validate) {
514+
err = st_ops->validate(kdata);
515+
if (err)
516+
goto reset_unlock;
517+
}
515518
set_memory_rox((long)st_map->image, 1);
516519
/* Let bpf_link handle registration & unregistration.
517520
*
@@ -663,9 +666,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
663666
if (attr->value_size != vt->size)
664667
return ERR_PTR(-EINVAL);
665668

666-
if (attr->map_flags & BPF_F_LINK && (!st_ops->validate || !st_ops->update))
667-
return ERR_PTR(-EOPNOTSUPP);
668-
669669
t = st_ops->type;
670670

671671
st_map_size = sizeof(*st_map) +
@@ -823,6 +823,9 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
823823
if (!bpf_struct_ops_valid_to_reg(new_map))
824824
return -EINVAL;
825825

826+
if (!st_map->st_ops->update)
827+
return -EOPNOTSUPP;
828+
826829
mutex_lock(&update_mutex);
827830

828831
old_map = rcu_dereference_protected(st_link->map, lockdep_is_held(&update_mutex));

kernel/bpf/syscall.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3378,14 +3378,13 @@ static int bpf_perf_link_fill_common(const struct perf_event *event,
33783378

33793379
if (!ulen ^ !uname)
33803380
return -EINVAL;
3381-
if (!uname)
3382-
return 0;
33833381

33843382
err = bpf_get_perf_event_info(event, &prog_id, fd_type, &buf,
33853383
probe_offset, probe_addr);
33863384
if (err)
33873385
return err;
3388-
3386+
if (!uname)
3387+
return 0;
33893388
if (buf) {
33903389
len = strlen(buf);
33913390
err = bpf_copy_to_user(uname, buf, ulen, len);

net/mptcp/bpf.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,18 @@ struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
1919

2020
return NULL;
2121
}
22+
23+
BTF_SET8_START(bpf_mptcp_fmodret_ids)
24+
BTF_ID_FLAGS(func, update_socket_protocol)
25+
BTF_SET8_END(bpf_mptcp_fmodret_ids)
26+
27+
static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
28+
.owner = THIS_MODULE,
29+
.set = &bpf_mptcp_fmodret_ids,
30+
};
31+
32+
static int __init bpf_mptcp_kfunc_init(void)
33+
{
34+
return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
35+
}
36+
late_initcall(bpf_mptcp_kfunc_init);

net/socket.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1657,12 +1657,36 @@ struct file *__sys_socket_file(int family, int type, int protocol)
16571657
return sock_alloc_file(sock, flags, NULL);
16581658
}
16591659

1660+
/* A hook for bpf progs to attach to and update socket protocol.
1661+
*
1662+
* A static noinline declaration here could cause the compiler to
1663+
* optimize away the function. A global noinline declaration will
1664+
* keep the definition, but may optimize away the callsite.
1665+
* Therefore, __weak is needed to ensure that the call is still
1666+
* emitted, by telling the compiler that we don't know what the
1667+
* function might eventually be.
1668+
*
1669+
* __diag_* below are needed to dismiss the missing prototype warning.
1670+
*/
1671+
1672+
__diag_push();
1673+
__diag_ignore_all("-Wmissing-prototypes",
1674+
"A fmod_ret entry point for BPF programs");
1675+
1676+
__weak noinline int update_socket_protocol(int family, int type, int protocol)
1677+
{
1678+
return protocol;
1679+
}
1680+
1681+
__diag_pop();
1682+
16601683
int __sys_socket(int family, int type, int protocol)
16611684
{
16621685
struct socket *sock;
16631686
int flags;
16641687

1665-
sock = __sys_socket_create(family, type, protocol);
1688+
sock = __sys_socket_create(family, type,
1689+
update_socket_protocol(family, type, protocol));
16661690
if (IS_ERR(sock))
16671691
return PTR_ERR(sock);
16681692

tools/bpf/bpftool/link.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,18 @@ static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
150150
jsonw_uint_field(wtr, "attach_type", attach_type);
151151
}
152152

153+
static void show_link_ifindex_json(__u32 ifindex, json_writer_t *wtr)
154+
{
155+
char devname[IF_NAMESIZE] = "(unknown)";
156+
157+
if (ifindex)
158+
if_indextoname(ifindex, devname);
159+
else
160+
snprintf(devname, sizeof(devname), "(detached)");
161+
jsonw_string_field(wtr, "devname", devname);
162+
jsonw_uint_field(wtr, "ifindex", ifindex);
163+
}
164+
153165
static bool is_iter_map_target(const char *target_name)
154166
{
155167
return strcmp(target_name, "bpf_map_elem") == 0 ||
@@ -433,6 +445,13 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
433445
case BPF_LINK_TYPE_NETFILTER:
434446
netfilter_dump_json(info, json_wtr);
435447
break;
448+
case BPF_LINK_TYPE_TCX:
449+
show_link_ifindex_json(info->tcx.ifindex, json_wtr);
450+
show_link_attach_type_json(info->tcx.attach_type, json_wtr);
451+
break;
452+
case BPF_LINK_TYPE_XDP:
453+
show_link_ifindex_json(info->xdp.ifindex, json_wtr);
454+
break;
436455
case BPF_LINK_TYPE_STRUCT_OPS:
437456
jsonw_uint_field(json_wtr, "map_id",
438457
info->struct_ops.map_id);
@@ -509,6 +528,22 @@ static void show_link_attach_type_plain(__u32 attach_type)
509528
printf("attach_type %u ", attach_type);
510529
}
511530

531+
static void show_link_ifindex_plain(__u32 ifindex)
532+
{
533+
char devname[IF_NAMESIZE * 2] = "(unknown)";
534+
char tmpname[IF_NAMESIZE];
535+
char *ret = NULL;
536+
537+
if (ifindex)
538+
ret = if_indextoname(ifindex, tmpname);
539+
else
540+
snprintf(devname, sizeof(devname), "(detached)");
541+
if (ret)
542+
snprintf(devname, sizeof(devname), "%s(%d)",
543+
tmpname, ifindex);
544+
printf("ifindex %s ", devname);
545+
}
546+
512547
static void show_iter_plain(struct bpf_link_info *info)
513548
{
514549
const char *target_name = u64_to_ptr(info->iter.target_name);
@@ -745,6 +780,15 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
745780
case BPF_LINK_TYPE_NETFILTER:
746781
netfilter_dump_plain(info);
747782
break;
783+
case BPF_LINK_TYPE_TCX:
784+
printf("\n\t");
785+
show_link_ifindex_plain(info->tcx.ifindex);
786+
show_link_attach_type_plain(info->tcx.attach_type);
787+
break;
788+
case BPF_LINK_TYPE_XDP:
789+
printf("\n\t");
790+
show_link_ifindex_plain(info->xdp.ifindex);
791+
break;
748792
case BPF_LINK_TYPE_KPROBE_MULTI:
749793
show_kprobe_multi_plain(info);
750794
break;

tools/bpf/bpftool/perf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ static int do_help(int argc, char **argv)
236236
{
237237
fprintf(stderr,
238238
"Usage: %1$s %2$s { show | list }\n"
239-
" %1$s %2$s help }\n"
239+
" %1$s %2$s help\n"
240240
"\n"
241241
" " HELP_SPEC_OPTIONS " }\n"
242242
"",

tools/lib/bpf/libbpf.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,9 +1978,9 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
19781978
return -ENAMETOOLONG;
19791979

19801980
/* gzopen also accepts uncompressed files. */
1981-
file = gzopen(buf, "r");
1981+
file = gzopen(buf, "re");
19821982
if (!file)
1983-
file = gzopen("/proc/config.gz", "r");
1983+
file = gzopen("/proc/config.gz", "re");
19841984

19851985
if (!file) {
19861986
pr_warn("failed to open system Kconfig\n");

0 commit comments

Comments
 (0)