Skip to content

Commit dab4e1f

Browse files
brbMartin KaFai Lau
authored andcommitted
bpf: Derive source IP addr via bpf_*_fib_lookup()
Extend the bpf_fib_lookup() helper by making it to return the source IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set. For example, the following snippet can be used to derive the desired source IP address: struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr }; ret = bpf_skb_fib_lookup(skb, p, sizeof(p), BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH); if (ret != BPF_FIB_LKUP_RET_SUCCESS) return TC_ACT_SHOT; /* the p.ipv4_src now contains the source address */ The inability to derive the proper source address may cause malfunctions in BPF-based dataplanes for hosts containing netdevs with more than one routable IP address or for multi-homed hosts. For example, Cilium implements packet masquerading in BPF. If an egressing netdev to which the Cilium's BPF prog is attached has multiple IP addresses, then only one [hardcoded] IP address can be used for masquerading. This breaks connectivity if any other IP address should have been selected instead, for example, when a public and private addresses are attached to the same egress interface. The change was tested with Cilium [1]. Nikolay Aleksandrov helped to figure out the IPv6 addr selection. [1]: cilium/cilium#28283 Signed-off-by: Martynas Pumputis <m@lambda.lt> Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
1 parent 1be84ca commit dab4e1f

5 files changed

Lines changed: 43 additions & 1 deletion

File tree

include/net/ipv6_stubs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ struct ipv6_bpf_stub {
8585
sockptr_t optval, unsigned int optlen);
8686
int (*ipv6_getsockopt)(struct sock *sk, int level, int optname,
8787
sockptr_t optval, sockptr_t optlen);
88+
int (*ipv6_dev_get_saddr)(struct net *net,
89+
const struct net_device *dst_dev,
90+
const struct in6_addr *daddr,
91+
unsigned int prefs,
92+
struct in6_addr *saddr);
8893
};
8994
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
9095

include/uapi/linux/bpf.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3264,6 +3264,11 @@ union bpf_attr {
32643264
* and *params*->smac will not be set as output. A common
32653265
* use case is to call **bpf_redirect_neigh**\ () after
32663266
* doing **bpf_fib_lookup**\ ().
3267+
* **BPF_FIB_LOOKUP_SRC**
3268+
* Derive and set source IP addr in *params*->ipv{4,6}_src
3269+
* for the nexthop. If the src addr cannot be derived,
3270+
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
3271+
* case, *params*->dmac and *params*->smac are not set either.
32673272
*
32683273
* *ctx* is either **struct xdp_md** for XDP programs or
32693274
* **struct sk_buff** tc cls_act programs.
@@ -6964,6 +6969,7 @@ enum {
69646969
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
69656970
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
69666971
BPF_FIB_LOOKUP_TBID = (1U << 3),
6972+
BPF_FIB_LOOKUP_SRC = (1U << 4),
69676973
};
69686974

69696975
enum {
@@ -6976,6 +6982,7 @@ enum {
69766982
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
69776983
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
69786984
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
6985+
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
69796986
};
69806987

69816988
struct bpf_fib_lookup {
@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup {
70107017
__u32 rt_metric;
70117018
};
70127019

7020+
/* input: source address to consider for lookup
7021+
* output: source address result from lookup
7022+
*/
70137023
union {
70147024
__be32 ipv4_src;
70157025
__u32 ipv6_src[4]; /* in6_addr; network order */

net/core/filter.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5850,6 +5850,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
58505850
params->rt_metric = res.fi->fib_priority;
58515851
params->ifindex = dev->ifindex;
58525852

5853+
if (flags & BPF_FIB_LOOKUP_SRC)
5854+
params->ipv4_src = fib_result_prefsrc(net, &res);
5855+
58535856
/* xdp and cls_bpf programs are run in RCU-bh so
58545857
* rcu_read_lock_bh is not needed here
58555858
*/
@@ -5992,6 +5995,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
59925995
params->rt_metric = res.f6i->fib6_metric;
59935996
params->ifindex = dev->ifindex;
59945997

5998+
if (flags & BPF_FIB_LOOKUP_SRC) {
5999+
if (res.f6i->fib6_prefsrc.plen) {
6000+
*src = res.f6i->fib6_prefsrc.addr;
6001+
} else {
6002+
err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
6003+
&fl6.daddr, 0,
6004+
src);
6005+
if (err)
6006+
return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
6007+
}
6008+
}
6009+
59956010
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
59966011
goto set_fwd_params;
59976012

@@ -6010,7 +6025,8 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
60106025
#endif
60116026

60126027
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
6013-
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID)
6028+
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
6029+
BPF_FIB_LOOKUP_SRC)
60146030

60156031
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
60166032
struct bpf_fib_lookup *, params, int, plen, u32, flags)

net/ipv6/af_inet6.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,6 +1061,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
10611061
.udp6_lib_lookup = __udp6_lib_lookup,
10621062
.ipv6_setsockopt = do_ipv6_setsockopt,
10631063
.ipv6_getsockopt = do_ipv6_getsockopt,
1064+
.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
10641065
};
10651066

10661067
static int __init inet6_init(void)

tools/include/uapi/linux/bpf.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3264,6 +3264,11 @@ union bpf_attr {
32643264
* and *params*->smac will not be set as output. A common
32653265
* use case is to call **bpf_redirect_neigh**\ () after
32663266
* doing **bpf_fib_lookup**\ ().
3267+
* **BPF_FIB_LOOKUP_SRC**
3268+
* Derive and set source IP addr in *params*->ipv{4,6}_src
3269+
* for the nexthop. If the src addr cannot be derived,
3270+
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
3271+
* case, *params*->dmac and *params*->smac are not set either.
32673272
*
32683273
* *ctx* is either **struct xdp_md** for XDP programs or
32693274
* **struct sk_buff** tc cls_act programs.
@@ -6964,6 +6969,7 @@ enum {
69646969
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
69656970
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
69666971
BPF_FIB_LOOKUP_TBID = (1U << 3),
6972+
BPF_FIB_LOOKUP_SRC = (1U << 4),
69676973
};
69686974

69696975
enum {
@@ -6976,6 +6982,7 @@ enum {
69766982
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
69776983
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
69786984
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
6985+
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
69796986
};
69806987

69816988
struct bpf_fib_lookup {
@@ -7010,6 +7017,9 @@ struct bpf_fib_lookup {
70107017
__u32 rt_metric;
70117018
};
70127019

7020+
/* input: source address to consider for lookup
7021+
* output: source address result from lookup
7022+
*/
70137023
union {
70147024
__be32 ipv4_src;
70157025
__u32 ipv6_src[4]; /* in6_addr; network order */

0 commit comments

Comments
 (0)