Skip to content

Commit b611b77

Browse files
author
Paolo Abeni
committed
Merge tag 'nf-24-02-29' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net Patch #1 restores NFPROTO_INET with nft_compat, from Ignat Korchagin. Patch #2 fixes an issue with bridge netfilter and broadcast/multicast packets. There is a day 0 bug in br_netfilter when used with connection tracking. Conntrack assumes that an nf_conn structure that is not yet added to hash table ("unconfirmed"), is only visible by the current cpu that is processing the sk_buff. For bridge this isn't true, sk_buff can get cloned in between, and clones can be processed in parallel on different cpu. This patch disables NAT and conntrack helpers for multicast packets. Patch #3 adds a selftest to cover for the br_netfilter bug. netfilter pull request 24-02-29 * tag 'nf-24-02-29' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf: selftests: netfilter: add bridge conntrack + multicast test case netfilter: bridge: confirm multicast packets before passing them up the stack netfilter: nf_tables: allow NFPROTO_INET in nft_(match/target)_validate() ==================== Link: https://lore.kernel.org/r/20240229000135.8780-1-pablo@netfilter.org Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2 parents 51dd4ee + 6523cf5 commit b611b77

7 files changed

Lines changed: 338 additions & 1 deletion

File tree

include/linux/netfilter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ struct nf_ct_hook {
474474
const struct sk_buff *);
475475
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
476476
void (*set_closing)(struct nf_conntrack *nfct);
477+
int (*confirm)(struct sk_buff *skb);
477478
};
478479
extern const struct nf_ct_hook __rcu *nf_ct_hook;
479480

net/bridge/br_netfilter_hooks.c

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@
4343
#include <linux/sysctl.h>
4444
#endif
4545

46+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
47+
#include <net/netfilter/nf_conntrack_core.h>
48+
#endif
49+
4650
static unsigned int brnf_net_id __read_mostly;
4751

4852
struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
553557
return NF_STOLEN;
554558
}
555559

560+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
561+
/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
562+
* the same nf_conn entry, which will happen for multicast (broadcast)
563+
* Frames on bridges.
564+
*
565+
* Example:
566+
* macvlan0
567+
* br0
568+
* ethX ethY
569+
*
570+
* ethX (or Y) receives multicast or broadcast packet containing
571+
* an IP packet, not yet in conntrack table.
572+
*
573+
* 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
574+
* -> skb->_nfct now references a unconfirmed entry
575+
* 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
576+
* interface.
577+
* 3. skb gets passed up the stack.
578+
* 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
579+
* and schedules a work queue to send them out on the lower devices.
580+
*
581+
* The clone skb->_nfct is not a copy, it is the same entry as the
582+
* original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
583+
* 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
584+
*
585+
* The Macvlan broadcast worker and normal confirm path will race.
586+
*
587+
* This race will not happen if step 2 already confirmed a clone. In that
588+
* case later steps perform skb_clone() with skb->_nfct already confirmed (in
589+
* hash table). This works fine.
590+
*
591+
* But such confirmation won't happen when eb/ip/nftables rules dropped the
592+
* packets before they reached the nf_confirm step in postrouting.
593+
*
594+
* Work around this problem by explicit confirmation of the entry at
595+
* LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
596+
* entry.
597+
*
598+
*/
599+
static unsigned int br_nf_local_in(void *priv,
600+
struct sk_buff *skb,
601+
const struct nf_hook_state *state)
602+
{
603+
struct nf_conntrack *nfct = skb_nfct(skb);
604+
const struct nf_ct_hook *ct_hook;
605+
struct nf_conn *ct;
606+
int ret;
607+
608+
if (!nfct || skb->pkt_type == PACKET_HOST)
609+
return NF_ACCEPT;
610+
611+
ct = container_of(nfct, struct nf_conn, ct_general);
612+
if (likely(nf_ct_is_confirmed(ct)))
613+
return NF_ACCEPT;
614+
615+
WARN_ON_ONCE(skb_shared(skb));
616+
WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
617+
618+
/* We can't call nf_confirm here, it would create a dependency
619+
* on nf_conntrack module.
620+
*/
621+
ct_hook = rcu_dereference(nf_ct_hook);
622+
if (!ct_hook) {
623+
skb->_nfct = 0ul;
624+
nf_conntrack_put(nfct);
625+
return NF_ACCEPT;
626+
}
627+
628+
nf_bridge_pull_encap_header(skb);
629+
ret = ct_hook->confirm(skb);
630+
switch (ret & NF_VERDICT_MASK) {
631+
case NF_STOLEN:
632+
return NF_STOLEN;
633+
default:
634+
nf_bridge_push_encap_header(skb);
635+
break;
636+
}
637+
638+
ct = container_of(nfct, struct nf_conn, ct_general);
639+
WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
640+
641+
return ret;
642+
}
643+
#endif
556644

557645
/* PF_BRIDGE/FORWARD *************************************************/
558646
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
9641052
.hooknum = NF_BR_PRE_ROUTING,
9651053
.priority = NF_BR_PRI_BRNF,
9661054
},
1055+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
1056+
{
1057+
.hook = br_nf_local_in,
1058+
.pf = NFPROTO_BRIDGE,
1059+
.hooknum = NF_BR_LOCAL_IN,
1060+
.priority = NF_BR_PRI_LAST,
1061+
},
1062+
#endif
9671063
{
9681064
.hook = br_nf_forward,
9691065
.pf = NFPROTO_BRIDGE,

net/bridge/netfilter/nf_conntrack_bridge.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
291291
return nf_conntrack_in(skb, &bridge_state);
292292
}
293293

294+
static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
295+
const struct nf_hook_state *state)
296+
{
297+
enum ip_conntrack_info ctinfo;
298+
struct nf_conn *ct;
299+
300+
if (skb->pkt_type == PACKET_HOST)
301+
return NF_ACCEPT;
302+
303+
/* nf_conntrack_confirm() cannot handle concurrent clones,
304+
* this happens for broad/multicast frames with e.g. macvlan on top
305+
* of the bridge device.
306+
*/
307+
ct = nf_ct_get(skb, &ctinfo);
308+
if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
309+
return NF_ACCEPT;
310+
311+
/* let inet prerouting call conntrack again */
312+
skb->_nfct = 0;
313+
nf_ct_put(ct);
314+
315+
return NF_ACCEPT;
316+
}
317+
294318
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
295319
struct nf_bridge_frag_data *data)
296320
{
@@ -385,6 +409,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
385409
.hooknum = NF_BR_PRE_ROUTING,
386410
.priority = NF_IP_PRI_CONNTRACK,
387411
},
412+
{
413+
.hook = nf_ct_bridge_in,
414+
.pf = NFPROTO_BRIDGE,
415+
.hooknum = NF_BR_LOCAL_IN,
416+
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
417+
},
388418
{
389419
.hook = nf_ct_bridge_post,
390420
.pf = NFPROTO_BRIDGE,

net/netfilter/nf_conntrack_core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
27562756
.get_tuple_skb = nf_conntrack_get_tuple_skb,
27572757
.attach = nf_conntrack_attach,
27582758
.set_closing = nf_conntrack_set_closing,
2759+
.confirm = __nf_conntrack_confirm,
27592760
};
27602761

27612762
void nf_conntrack_init_end(void)

net/netfilter/nft_compat.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,
359359

360360
if (ctx->family != NFPROTO_IPV4 &&
361361
ctx->family != NFPROTO_IPV6 &&
362+
ctx->family != NFPROTO_INET &&
362363
ctx->family != NFPROTO_BRIDGE &&
363364
ctx->family != NFPROTO_ARP)
364365
return -EOPNOTSUPP;
365366

367+
ret = nft_chain_validate_hooks(ctx->chain,
368+
(1 << NF_INET_PRE_ROUTING) |
369+
(1 << NF_INET_LOCAL_IN) |
370+
(1 << NF_INET_FORWARD) |
371+
(1 << NF_INET_LOCAL_OUT) |
372+
(1 << NF_INET_POST_ROUTING));
373+
if (ret)
374+
return ret;
375+
366376
if (nft_is_base_chain(ctx->chain)) {
367377
const struct nft_base_chain *basechain =
368378
nft_base_chain(ctx->chain);
@@ -610,10 +620,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,
610620

611621
if (ctx->family != NFPROTO_IPV4 &&
612622
ctx->family != NFPROTO_IPV6 &&
623+
ctx->family != NFPROTO_INET &&
613624
ctx->family != NFPROTO_BRIDGE &&
614625
ctx->family != NFPROTO_ARP)
615626
return -EOPNOTSUPP;
616627

628+
ret = nft_chain_validate_hooks(ctx->chain,
629+
(1 << NF_INET_PRE_ROUTING) |
630+
(1 << NF_INET_LOCAL_IN) |
631+
(1 << NF_INET_FORWARD) |
632+
(1 << NF_INET_LOCAL_OUT) |
633+
(1 << NF_INET_POST_ROUTING));
634+
if (ret)
635+
return ret;
636+
617637
if (nft_is_base_chain(ctx->chain)) {
618638
const struct nft_base_chain *basechain =
619639
nft_base_chain(ctx->chain);

tools/testing/selftests/netfilter/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
77
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
88
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
99
conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
10-
conntrack_sctp_collision.sh xt_string.sh
10+
conntrack_sctp_collision.sh xt_string.sh \
11+
bridge_netfilter.sh
1112

1213
HOSTPKG_CONFIG := pkg-config
1314

0 commit comments

Comments
 (0)