Skip to content

Commit 25c800b

Browse files
committed
Merge branch 'bridge-neigh-suppression'
Ido Schimmel says: ==================== bridge: Add per-{Port, VLAN} neighbor suppression Background ========== In order to minimize the flooding of ARP and ND messages in the VXLAN network, EVPN includes provisions [1] that allow participating VTEPs to suppress such messages in case they know the MAC-IP binding and can reply on behalf of the remote host. In Linux, the above is implemented in the bridge driver using a per-port option called "neigh_suppress" that was added in kernel version 4.15 [2]. Motivation ========== Some applications use ARP messages as keepalives between the application nodes in the network. This works perfectly well when two nodes are connected to the same VTEP. When a node goes down it will stop responding to ARP requests and the other node will notice it immediately. However, when the two nodes are connected to different VTEPs and neighbor suppression is enabled, the local VTEP will reply to ARP requests even after the remote node went down, until certain timers expire and the EVPN control plane decides to withdraw the MAC/IP Advertisement route for the address. Therefore, some users would like to be able to disable neighbor suppression on VLANs where such applications reside and keep it enabled on the rest. Implementation ============== The proposed solution is to allow user space to control neighbor suppression on a per-{Port, VLAN} basis, in a similar fashion to other per-port options that gained per-{Port, VLAN} counterparts such as "mcast_router". This allows users to benefit from the operational simplicity and scalability associated with shared VXLAN devices (i.e., external / collect-metadata mode), while still allowing for per-VLAN/VNI neighbor suppression control. The user interface is extended with a new "neigh_vlan_suppress" bridge port option that allows user space to enable per-{Port, VLAN} neighbor suppression on the bridge port. When enabled, the existing "neigh_suppress" option has no effect and neighbor suppression is controlled using a new "neigh_suppress" VLAN option. Example usage: # bridge link set dev vxlan0 neigh_vlan_suppress on # bridge vlan add vid 10 dev vxlan0 # bridge vlan set vid 10 dev vxlan0 neigh_suppress on Testing ======= Tested using existing bridge selftests. Added a dedicated selftest in the last patch. Patchset overview ================= Patches #1-#5 are preparations. Patch #6 adds per-{Port, VLAN} neighbor suppression support to the bridge's data path. Patches #7-#8 add the required netlink attributes to enable the feature. Patch #9 adds a selftest. iproute2 patches can be found here [3]. Changelog ========= Since RFC [4]: No changes. [1] https://www.rfc-editor.org/rfc/rfc7432#section-10 [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a42317785c898c0ed46db45a33b0cc71b671bf29 [3] https://github.com/idosch/iproute2/tree/submit/neigh_suppress_v1 [4] https://lore.kernel.org/netdev/20230413095830.2182382-1-idosch@nvidia.com/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 1cf3fe1 + 7648ac7 commit 25c800b

15 files changed

Lines changed: 936 additions & 19 deletions

File tree

include/linux/if_bridge.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct br_ip_list {
6060
#define BR_TX_FWD_OFFLOAD BIT(20)
6161
#define BR_PORT_LOCKED BIT(21)
6262
#define BR_PORT_MAB BIT(22)
63+
#define BR_NEIGH_VLAN_SUPPRESS BIT(23)
6364

6465
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
6566

include/uapi/linux/if_bridge.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ enum {
525525
BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
526526
BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
527527
BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
528+
BRIDGE_VLANDB_ENTRY_NEIGH_SUPPRESS,
528529
__BRIDGE_VLANDB_ENTRY_MAX,
529530
};
530531
#define BRIDGE_VLANDB_ENTRY_MAX (__BRIDGE_VLANDB_ENTRY_MAX - 1)

include/uapi/linux/if_link.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ enum {
569569
IFLA_BRPORT_MAB,
570570
IFLA_BRPORT_MCAST_N_GROUPS,
571571
IFLA_BRPORT_MCAST_MAX_GROUPS,
572+
IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
572573
__IFLA_BRPORT_MAX
573574
};
574575
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)

net/bridge/br_arp_nd_proxy.c

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
3030
bool neigh_suppress = false;
3131

3232
list_for_each_entry(p, &br->port_list, list) {
33-
if (p->flags & BR_NEIGH_SUPPRESS) {
33+
if (p->flags & (BR_NEIGH_SUPPRESS | BR_NEIGH_VLAN_SUPPRESS)) {
3434
neigh_suppress = true;
3535
break;
3636
}
@@ -158,7 +158,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
158158
return;
159159

160160
if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
161-
if (p && (p->flags & BR_NEIGH_SUPPRESS))
161+
if (br_is_neigh_suppress_enabled(p, vid))
162162
return;
163163
if (parp->ar_op != htons(ARPOP_RREQUEST) &&
164164
parp->ar_op != htons(ARPOP_RREPLY) &&
@@ -202,8 +202,8 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
202202
bool replied = false;
203203

204204
if ((p && (p->flags & BR_PROXYARP)) ||
205-
(f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
206-
BR_NEIGH_SUPPRESS)))) {
205+
(f->dst && (f->dst->flags & BR_PROXYARP_WIFI)) ||
206+
br_is_neigh_suppress_enabled(f->dst, vid)) {
207207
if (!vid)
208208
br_arp_send(br, p, skb->dev, sip, tip,
209209
sha, n->ha, sha, 0, 0);
@@ -407,7 +407,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
407407

408408
BR_INPUT_SKB_CB(skb)->proxyarp_replied = 0;
409409

410-
if (p && (p->flags & BR_NEIGH_SUPPRESS))
410+
if (br_is_neigh_suppress_enabled(p, vid))
411411
return;
412412

413413
if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
@@ -461,7 +461,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
461461
if (f) {
462462
bool replied = false;
463463

464-
if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
464+
if (br_is_neigh_suppress_enabled(f->dst, vid)) {
465465
if (vid != 0)
466466
br_nd_send(br, p, skb, n,
467467
skb->vlan_proto,
@@ -483,3 +483,24 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
483483
}
484484
}
485485
#endif
486+
487+
bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid)
488+
{
489+
if (!p)
490+
return false;
491+
492+
if (!vid)
493+
return !!(p->flags & BR_NEIGH_SUPPRESS);
494+
495+
if (p->flags & BR_NEIGH_VLAN_SUPPRESS) {
496+
struct net_bridge_vlan_group *vg = nbp_vlan_group_rcu(p);
497+
struct net_bridge_vlan *v;
498+
499+
v = br_vlan_find(vg, vid);
500+
if (!v)
501+
return false;
502+
return !!(v->priv_flags & BR_VLFLAG_NEIGH_SUPPRESS_ENABLED);
503+
} else {
504+
return !!(p->flags & BR_NEIGH_SUPPRESS);
505+
}
506+
}

net/bridge/br_device.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
8080

8181
dest = eth_hdr(skb)->h_dest;
8282
if (is_broadcast_ether_addr(dest)) {
83-
br_flood(br, skb, BR_PKT_BROADCAST, false, true);
83+
br_flood(br, skb, BR_PKT_BROADCAST, false, true, vid);
8484
} else if (is_multicast_ether_addr(dest)) {
8585
if (unlikely(netpoll_tx_running(dev))) {
86-
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
86+
br_flood(br, skb, BR_PKT_MULTICAST, false, true, vid);
8787
goto out;
8888
}
8989
if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) {
@@ -96,11 +96,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
9696
br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
9797
br_multicast_flood(mdst, skb, brmctx, false, true);
9898
else
99-
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
99+
br_flood(br, skb, BR_PKT_MULTICAST, false, true, vid);
100100
} else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
101101
br_forward(dst->dst, skb, false, true);
102102
} else {
103-
br_flood(br, skb, BR_PKT_UNICAST, false, true);
103+
br_flood(br, skb, BR_PKT_UNICAST, false, true, vid);
104104
}
105105
out:
106106
rcu_read_unlock();

net/bridge/br_forward.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ static struct net_bridge_port *maybe_deliver(
197197

198198
/* called under rcu_read_lock */
199199
void br_flood(struct net_bridge *br, struct sk_buff *skb,
200-
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
200+
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig,
201+
u16 vid)
201202
{
202203
struct net_bridge_port *prev = NULL;
203204
struct net_bridge_port *p;
@@ -224,8 +225,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
224225
/* Do not flood to ports that enable proxy ARP */
225226
if (p->flags & BR_PROXYARP)
226227
continue;
227-
if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
228-
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
228+
if (BR_INPUT_SKB_CB(skb)->proxyarp_replied &&
229+
((p->flags & BR_PROXYARP_WIFI) ||
230+
br_is_neigh_suppress_enabled(p, vid)))
229231
continue;
230232

231233
prev = maybe_deliver(prev, p, skb, local_orig);

net/bridge/br_if.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
759759
if (mask & BR_AUTO_MASK)
760760
nbp_update_port_count(br);
761761

762-
if (mask & BR_NEIGH_SUPPRESS)
762+
if (mask & (BR_NEIGH_SUPPRESS | BR_NEIGH_VLAN_SUPPRESS))
763763
br_recalculate_neigh_suppress_enabled(br);
764764
}
765765

net/bridge/br_input.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
207207
br_forward(dst->dst, skb, local_rcv, false);
208208
} else {
209209
if (!mcast_hit)
210-
br_flood(br, skb, pkt_type, local_rcv, false);
210+
br_flood(br, skb, pkt_type, local_rcv, false, vid);
211211
else
212212
br_multicast_flood(mdst, skb, brmctx, local_rcv, false);
213213
}

net/bridge/br_netlink.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ static inline size_t br_port_info_size(void)
189189
+ nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
190190
+ nla_total_size(1) /* IFLA_BRPORT_LOCKED */
191191
+ nla_total_size(1) /* IFLA_BRPORT_MAB */
192+
+ nla_total_size(1) /* IFLA_BRPORT_NEIGH_VLAN_SUPPRESS */
192193
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
193194
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
194195
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -278,7 +279,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
278279
!!(p->flags & BR_MRP_LOST_IN_CONT)) ||
279280
nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)) ||
280281
nla_put_u8(skb, IFLA_BRPORT_LOCKED, !!(p->flags & BR_PORT_LOCKED)) ||
281-
nla_put_u8(skb, IFLA_BRPORT_MAB, !!(p->flags & BR_PORT_MAB)))
282+
nla_put_u8(skb, IFLA_BRPORT_MAB, !!(p->flags & BR_PORT_MAB)) ||
283+
nla_put_u8(skb, IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
284+
!!(p->flags & BR_NEIGH_VLAN_SUPPRESS)))
282285
return -EMSGSIZE;
283286

284287
timerval = br_timer_value(&p->message_age_timer);
@@ -891,6 +894,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
891894
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
892895
[IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
893896
[IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
897+
[IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
894898
};
895899

896900
/* Change the state of the port and notify spanning tree */
@@ -957,6 +961,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
957961
br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
958962
br_set_port_flag(p, tb, IFLA_BRPORT_LOCKED, BR_PORT_LOCKED);
959963
br_set_port_flag(p, tb, IFLA_BRPORT_MAB, BR_PORT_MAB);
964+
br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
965+
BR_NEIGH_VLAN_SUPPRESS);
960966

961967
if ((p->flags & BR_PORT_MAB) &&
962968
(!(p->flags & BR_PORT_LOCKED) || !(p->flags & BR_LEARNING))) {

net/bridge/br_private.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ enum {
178178
BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1),
179179
BR_VLFLAG_MCAST_ENABLED = BIT(2),
180180
BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3),
181+
BR_VLFLAG_NEIGH_SUPPRESS_ENABLED = BIT(4),
181182
};
182183

183184
/**
@@ -849,7 +850,8 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb,
849850
bool local_rcv, bool local_orig);
850851
int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
851852
void br_flood(struct net_bridge *br, struct sk_buff *skb,
852-
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
853+
enum br_pkt_type pkt_type, bool local_rcv, bool local_orig,
854+
u16 vid);
853855

854856
/* return true if both source port and dest port are isolated */
855857
static inline bool br_skb_isolated(const struct net_bridge_port *to,
@@ -2218,4 +2220,5 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
22182220
void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
22192221
u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
22202222
struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
2223+
bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid);
22212224
#endif

0 commit comments

Comments
 (0)