Skip to content

Commit d30301b

Browse files
LorenzoBianconiummakynes
authored andcommitted
netfilter: flowtable: Add IPIP tx sw acceleration
Introduce sw acceleration for tx path of IPIP tunnels relying on the netfilter flowtable infrastructure. This patch introduces basic infrastructure to accelerate other tunnel types (e.g. IP6IP6). IPIP sw tx acceleration can be tested running the following scenario where the traffic is forwarded between two NICs (eth0 and eth1) and an IPIP tunnel is used to access a remote site (using eth1 as the underlay device): ETH0 -- TUN0 <==> ETH1 -- [IP network] -- TUN1 (192.168.100.2) $ip addr show 6: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:00:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet 192.168.0.2/24 scope global eth0 valid_lft forever preferred_lft forever 7: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 link/ether 00:11:22:33:11:55 brd ff:ff:ff:ff:ff:ff inet 192.168.1.1/24 scope global eth1 valid_lft forever preferred_lft forever 8: tun0@NONE: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1480 qdisc noqueue state UNKNOWN group default qlen 1000 link/ipip 192.168.1.1 peer 192.168.1.2 inet 192.168.100.1/24 scope global tun0 valid_lft forever preferred_lft forever $ip route show default via 192.168.100.2 dev tun0 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.2 192.168.1.0/24 dev eth1 proto kernel scope link src 192.168.1.1 192.168.100.0/24 dev tun0 proto kernel scope link src 192.168.100.1 $nft list ruleset table inet filter { flowtable ft { hook ingress priority filter devices = { eth0, eth1 } } chain forward { type filter hook forward priority filter; policy accept; meta l4proto { tcp, udp } flow add @ft } } Reproducing the scenario described above using veths I got the following results: - TCP stream trasmitted into the IPIP tunnel: - net-next: (baseline) ~ 85Gbps - net-next + IPIP flowtable support: ~102Gbps Co-developed-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
1 parent ab427db commit d30301b

2 files changed

Lines changed: 106 additions & 4 deletions

File tree

net/netfilter/nf_flow_table_ip.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,9 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
437437
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
438438

439439
mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
440+
if (flow->tuplehash[!dir].tuple.tun_num)
441+
mtu -= sizeof(*iph);
442+
440443
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
441444
return 0;
442445

@@ -508,6 +511,62 @@ static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
508511
return 0;
509512
}
510513

514+
static int nf_flow_tunnel_ipip_push(struct net *net, struct sk_buff *skb,
515+
struct flow_offload_tuple *tuple,
516+
__be32 *ip_daddr)
517+
{
518+
struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
519+
struct rtable *rt = dst_rtable(tuple->dst_cache);
520+
u8 tos = iph->tos, ttl = iph->ttl;
521+
__be16 frag_off = iph->frag_off;
522+
u32 headroom = sizeof(*iph);
523+
int err;
524+
525+
err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4);
526+
if (err)
527+
return err;
528+
529+
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
530+
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
531+
err = skb_cow_head(skb, headroom);
532+
if (err)
533+
return err;
534+
535+
skb_scrub_packet(skb, true);
536+
skb_clear_hash_if_not_l4(skb);
537+
538+
/* Push down and install the IP header. */
539+
skb_push(skb, sizeof(*iph));
540+
skb_reset_network_header(skb);
541+
542+
iph = ip_hdr(skb);
543+
iph->version = 4;
544+
iph->ihl = sizeof(*iph) >> 2;
545+
iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : frag_off;
546+
iph->protocol = tuple->tun.l3_proto;
547+
iph->tos = tos;
548+
iph->daddr = tuple->tun.src_v4.s_addr;
549+
iph->saddr = tuple->tun.dst_v4.s_addr;
550+
iph->ttl = ttl;
551+
iph->tot_len = htons(skb->len);
552+
__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
553+
ip_send_check(iph);
554+
555+
*ip_daddr = tuple->tun.src_v4.s_addr;
556+
557+
return 0;
558+
}
559+
560+
static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
561+
struct flow_offload_tuple *tuple,
562+
__be32 *ip_daddr)
563+
{
564+
if (tuple->tun_num)
565+
return nf_flow_tunnel_ipip_push(net, skb, tuple, ip_daddr);
566+
567+
return 0;
568+
}
569+
511570
static int nf_flow_encap_push(struct sk_buff *skb,
512571
struct flow_offload_tuple *tuple)
513572
{
@@ -572,6 +631,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
572631
other_tuple = &flow->tuplehash[!dir].tuple;
573632
ip_daddr = other_tuple->src_v4.s_addr;
574633

634+
if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0)
635+
return NF_DROP;
636+
575637
if (nf_flow_encap_push(skb, other_tuple) < 0)
576638
return NF_DROP;
577639

net/netfilter/nf_flow_table_path.c

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,46 @@ static bool nft_flowtable_find_dev(const struct net_device *dev,
197197
return found;
198198
}
199199

200-
static void nft_dev_forward_path(struct nf_flow_route *route,
200+
static int nft_flow_tunnel_update_route(const struct nft_pktinfo *pkt,
201+
struct flow_offload_tunnel *tun,
202+
struct nf_flow_route *route,
203+
enum ip_conntrack_dir dir)
204+
{
205+
struct dst_entry *cur_dst = route->tuple[dir].dst;
206+
struct dst_entry *tun_dst = NULL;
207+
struct flowi fl = {};
208+
209+
switch (nft_pf(pkt)) {
210+
case NFPROTO_IPV4:
211+
fl.u.ip4.daddr = tun->dst_v4.s_addr;
212+
fl.u.ip4.saddr = tun->src_v4.s_addr;
213+
fl.u.ip4.flowi4_iif = nft_in(pkt)->ifindex;
214+
fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
215+
fl.u.ip4.flowi4_mark = pkt->skb->mark;
216+
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
217+
break;
218+
case NFPROTO_IPV6:
219+
fl.u.ip6.daddr = tun->dst_v6;
220+
fl.u.ip6.saddr = tun->src_v6;
221+
fl.u.ip6.flowi6_iif = nft_in(pkt)->ifindex;
222+
fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
223+
fl.u.ip6.flowi6_mark = pkt->skb->mark;
224+
fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
225+
break;
226+
}
227+
228+
nf_route(nft_net(pkt), &tun_dst, &fl, false, nft_pf(pkt));
229+
if (!tun_dst)
230+
return -ENOENT;
231+
232+
route->tuple[dir].dst = tun_dst;
233+
dst_release(cur_dst);
234+
235+
return 0;
236+
}
237+
238+
static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
239+
struct nf_flow_route *route,
201240
const struct nf_conn *ct,
202241
enum ip_conntrack_dir dir,
203242
struct nft_flowtable *ft)
@@ -220,7 +259,8 @@ static void nft_dev_forward_path(struct nf_flow_route *route,
220259
route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
221260
}
222261

223-
if (info.num_tuns) {
262+
if (info.num_tuns &&
263+
!nft_flow_tunnel_update_route(pkt, &info.tun, route, dir)) {
224264
route->tuple[!dir].in.tun.src_v6 = info.tun.dst_v6;
225265
route->tuple[!dir].in.tun.dst_v6 = info.tun.src_v6;
226266
route->tuple[!dir].in.tun.l3_proto = info.tun.l3_proto;
@@ -281,9 +321,9 @@ int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
281321
nft_default_forward_path(route, other_dst, !dir);
282322

283323
if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
284-
nft_dev_forward_path(route, ct, dir, ft);
324+
nft_dev_forward_path(pkt, route, ct, dir, ft);
285325
if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
286-
nft_dev_forward_path(route, ct, !dir, ft);
326+
nft_dev_forward_path(pkt, route, ct, !dir, ft);
287327

288328
return 0;
289329
}

0 commit comments

Comments
 (0)