Skip to content

Commit 172bf00

Browse files
committed
xfrm: Support GRO for IPv4 ESP in UDP encapsulation
This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated packets. Decapsulation happens at L2 and saves a full round through the stack for each packet. This is also needed to support HW offload for ESP in UDP encapsulation. Enabling this would imporove performance for ESP in UDP datapath, i.e IPsec with NAT in between. By default GRP for ESP-in-UDP is disabled for UDP sockets. To enable this feature for an ESP socket, the following two options need to be set: 1. enable ESP-in-UDP: (this is already set by an IKE daemon). int type = UDP_ENCAP_ESPINUDP; setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type)); 2. To enable GRO for ESP in UDP socket: type = true; setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type)); Enabling ESP-in-UDP has the side effect of preventing the Linux stack from seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are immediately decapsulated from UDP and decrypted. This change may affect nftable rules that match on ESP packets at L3. Also tcpdump won't see the ESP packet. Developers/admins are advised to review and adapt any nftable rules accordingly before enabling this feature to prevent potential rule breakage. Also tcpdump will not see from ESP packets from a ESP in UDP flow, when this is enabled. Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> Co-developed-by: Antony Antony <antony.antony@secunet.com> Signed-off-by: Antony Antony <antony.antony@secunet.com> Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
1 parent b439475 commit 172bf00

5 files changed

Lines changed: 98 additions & 20 deletions

File tree

include/net/gro.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ struct napi_gro_cb {
4141
/* Number of segments aggregated. */
4242
u16 count;
4343

44-
/* Used in ipv6_gro_receive() and foo-over-udp */
44+
/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
4545
u16 proto;
4646

4747
/* Used in napi_gro_cb::free */

include/net/xfrm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,8 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
17101710
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
17111711
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
17121712
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
1713+
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
1714+
struct sk_buff *skb);
17131715
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
17141716
int optlen);
17151717
#else

net/ipv4/esp4_offload.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
3333
int offset = skb_gro_offset(skb);
3434
struct xfrm_offload *xo;
3535
struct xfrm_state *x;
36+
int encap_type = 0;
3637
__be32 seq;
3738
__be32 spi;
3839

@@ -70,14 +71,17 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
7071

7172
xo->flags |= XFRM_GRO;
7273

74+
if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
75+
encap_type = UDP_ENCAP_ESPINUDP;
76+
7377
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
7478
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
7579
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
7680
XFRM_SPI_SKB_CB(skb)->seq = seq;
7781

7882
/* We don't need to handle errors from xfrm_input, it does all
7983
* the error handling and frees the resources on error. */
80-
xfrm_input(skb, IPPROTO_ESP, spi, 0);
84+
xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
8185

8286
return ERR_PTR(-EINPROGRESS);
8387
out_reset:

net/ipv4/udp.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2625,6 +2625,17 @@ void udp_destroy_sock(struct sock *sk)
26252625
}
26262626
}
26272627

2628+
static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
2629+
struct sock *sk)
2630+
{
2631+
#ifdef CONFIG_XFRM
2632+
if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
2633+
if (family == AF_INET)
2634+
WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
2635+
}
2636+
#endif
2637+
}
2638+
26282639
/*
26292640
* Socket option code for UDP
26302641
*/
@@ -2674,6 +2685,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
26742685
case 0:
26752686
#ifdef CONFIG_XFRM
26762687
case UDP_ENCAP_ESPINUDP:
2688+
set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
2689+
fallthrough;
26772690
case UDP_ENCAP_ESPINUDP_NON_IKE:
26782691
#if IS_ENABLED(CONFIG_IPV6)
26792692
if (sk->sk_family == AF_INET6)
@@ -2716,6 +2729,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
27162729
udp_tunnel_encap_enable(sk);
27172730
udp_assign_bit(GRO_ENABLED, sk, valbool);
27182731
udp_assign_bit(ACCEPT_L4, sk, valbool);
2732+
set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
27192733
break;
27202734

27212735
/*

net/ipv4/xfrm4_input.c

Lines changed: 76 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <linux/netfilter_ipv4.h>
1818
#include <net/ip.h>
1919
#include <net/xfrm.h>
20+
#include <net/protocol.h>
21+
#include <net/gro.h>
2022

2123
static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
2224
struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
7274
return 0;
7375
}
7476

75-
/* If it's a keepalive packet, then just eat it.
76-
* If it's an encapsulated packet, then pass it to the
77-
* IPsec xfrm input.
78-
* Returns 0 if skb passed to xfrm or was dropped.
79-
* Returns >0 if skb should be passed to UDP.
80-
* Returns <0 if skb should be resubmitted (-ret is protocol)
81-
*/
82-
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
77+
static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
8378
{
8479
struct udp_sock *up = udp_sk(sk);
8580
struct udphdr *uh;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
110105
case UDP_ENCAP_ESPINUDP:
111106
/* Check if this is a keepalive packet. If so, eat it. */
112107
if (len == 1 && udpdata[0] == 0xff) {
113-
goto drop;
108+
return -EINVAL;
114109
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
115110
/* ESP Packet without Non-ESP header */
116111
len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
121116
case UDP_ENCAP_ESPINUDP_NON_IKE:
122117
/* Check if this is a keepalive packet. If so, eat it. */
123118
if (len == 1 && udpdata[0] == 0xff) {
124-
goto drop;
119+
return -EINVAL;
125120
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
126121
udpdata32[0] == 0 && udpdata32[1] == 0) {
127122

@@ -139,33 +134,96 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
139134
* protocol to ESP, and then call into the transform receiver.
140135
*/
141136
if (skb_unclone(skb, GFP_ATOMIC))
142-
goto drop;
137+
return -EINVAL;
143138

144139
/* Now we can update and verify the packet length... */
145140
iph = ip_hdr(skb);
146141
iphlen = iph->ihl << 2;
147142
iph->tot_len = htons(ntohs(iph->tot_len) - len);
148143
if (skb->len < iphlen + len) {
149144
/* packet is too small!?! */
150-
goto drop;
145+
return -EINVAL;
151146
}
152147

153148
/* pull the data buffer up to the ESP header and set the
154149
* transport header to point to ESP. Keep UDP on the stack
155150
* for later.
156151
*/
157-
__skb_pull(skb, len);
158-
skb_reset_transport_header(skb);
152+
if (pull) {
153+
__skb_pull(skb, len);
154+
skb_reset_transport_header(skb);
155+
} else {
156+
skb_set_transport_header(skb, len);
157+
}
159158

160159
/* process ESP */
161-
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
162-
163-
drop:
164-
kfree_skb(skb);
165160
return 0;
166161
}
167162
EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
168163

164+
/* If it's a keepalive packet, then just eat it.
165+
* If it's an encapsulated packet, then pass it to the
166+
* IPsec xfrm input.
167+
* Returns 0 if skb passed to xfrm or was dropped.
168+
* Returns >0 if skb should be passed to UDP.
169+
* Returns <0 if skb should be resubmitted (-ret is protocol)
170+
*/
171+
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
172+
{
173+
int ret;
174+
175+
ret = __xfrm4_udp_encap_rcv(sk, skb, true);
176+
if (!ret)
177+
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
178+
udp_sk(sk)->encap_type);
179+
180+
if (ret < 0) {
181+
kfree_skb(skb);
182+
return 0;
183+
}
184+
185+
return ret;
186+
}
187+
188+
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
189+
struct sk_buff *skb)
190+
{
191+
int offset = skb_gro_offset(skb);
192+
const struct net_offload *ops;
193+
struct sk_buff *pp = NULL;
194+
int ret;
195+
196+
offset = offset - sizeof(struct udphdr);
197+
198+
if (!pskb_pull(skb, offset))
199+
return NULL;
200+
201+
rcu_read_lock();
202+
ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
203+
if (!ops || !ops->callbacks.gro_receive)
204+
goto out;
205+
206+
ret = __xfrm4_udp_encap_rcv(sk, skb, false);
207+
if (ret)
208+
goto out;
209+
210+
skb_push(skb, offset);
211+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
212+
213+
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
214+
rcu_read_unlock();
215+
216+
return pp;
217+
218+
out:
219+
rcu_read_unlock();
220+
skb_push(skb, offset);
221+
NAPI_GRO_CB(skb)->same_flow = 0;
222+
NAPI_GRO_CB(skb)->flush = 1;
223+
224+
return NULL;
225+
}
226+
169227
int xfrm4_rcv(struct sk_buff *skb)
170228
{
171229
return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);

0 commit comments

Comments
 (0)