Skip to content

Commit 1fdad81

Browse files
committed
Merge branch 'big-tcp-without-hbh-in-ipv6'
Alice Mikityanska says: ==================== BIG TCP without HBH in IPv6 Resubmitting after the grace period. This series is part 1 of "BIG TCP for UDP tunnels". Due to the number of patches, I'm splitting it into two logical parts: * Remove hop-by-hop header for BIG TCP IPv6 to align with BIG TCP IPv4. * Fix up things that prevent BIG TCP from working with UDP tunnels. The current BIG TCP IPv6 code inserts a hop-by-hop extension header with 32-bit length of the packet. When the packet is encapsulated, and either the outer or the inner protocol is IPv6, or both are IPv6, there will be 1 or 2 HBH headers that need to be dealt with. The issues that arise: 1. The drivers don't strip it, and they'd all need to know the structure of each tunnel protocol in order to strip it correctly, also taking into account all combinations of IPv4/IPv6 inner/outer protocols. 2. Even if (1) is implemented, it would be an additional performance penalty per aggregated packet. 3. The skb_gso_validate_network_len check is skipped in ip6_finish_output_gso when IP6SKB_FAKEJUMBO is set, but it seems that it would make sense to do the actual validation, just taking into account the length of the HBH header. When the support for tunnels is added, it becomes trickier, because there may be one or two HBH headers, depending on whether it's IPv6 in IPv6 or not. At the same time, having an HBH header to store the 32-bit length is not strictly necessary, as BIG TCP IPv4 doesn't do anything like this and just restores the length from skb->len. The same thing can be done for BIG TCP IPv6. Removing HBH from BIG TCP would allow to simplify the implementation significantly, and align it with BIG TCP IPv4, which has been a long-standing goal. ==================== Link: https://patch.msgid.link/20260205133925.526371-1-alice.kernel@fastmail.im Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 5826eec + 35f66ce commit 1fdad81

22 files changed

Lines changed: 61 additions & 282 deletions

File tree

drivers/net/ethernet/broadcom/bnge/bnge_txrx.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,9 +1463,6 @@ netdev_tx_t bnge_start_xmit(struct sk_buff *skb, struct net_device *dev)
14631463
return NETDEV_TX_BUSY;
14641464
}
14651465

1466-
if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
1467-
goto tx_free;
1468-
14691466
last_frag = skb_shinfo(skb)->nr_frags;
14701467

14711468
txbd = &txr->tx_desc_ring[TX_RING(bn, prod)][TX_IDX(prod)];

drivers/net/ethernet/broadcom/bnxt/bnxt.c

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -517,9 +517,6 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
517517
return NETDEV_TX_BUSY;
518518
}
519519

520-
if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
521-
goto tx_free;
522-
523520
length = skb->len;
524521
len = skb_headlen(skb);
525522
last_frag = skb_shinfo(skb)->nr_frags;
@@ -13881,7 +13878,6 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
1388113878
u8 **nextp)
1388213879
{
1388313880
struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
13884-
struct hop_jumbo_hdr *jhdr;
1388513881
int hdr_count = 0;
1388613882
u8 *nexthdr;
1388713883
int start;
@@ -13910,24 +13906,7 @@ static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
1391013906
if (hdrlen > 64)
1391113907
return false;
1391213908

13913-
/* The ext header may be a hop-by-hop header inserted for
13914-
* big TCP purposes. This will be removed before sending
13915-
* from NIC, so do not count it.
13916-
*/
13917-
if (*nexthdr == NEXTHDR_HOP) {
13918-
if (likely(skb->len <= GRO_LEGACY_MAX_SIZE))
13919-
goto increment_hdr;
13920-
13921-
jhdr = (struct hop_jumbo_hdr *)hp;
13922-
if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
13923-
jhdr->nexthdr != IPPROTO_TCP)
13924-
goto increment_hdr;
13925-
13926-
goto next_hdr;
13927-
}
13928-
increment_hdr:
1392913909
hdr_count++;
13930-
next_hdr:
1393113910
nexthdr = &hp->nexthdr;
1393213911
start += hdrlen;
1393313912
}

drivers/net/ethernet/google/gve/gve_tx_dqo.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -963,9 +963,6 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
963963
int num_buffer_descs;
964964
int total_num_descs;
965965

966-
if (skb_is_gso(skb) && unlikely(ipv6_hopopt_jumbo_remove(skb)))
967-
goto drop;
968-
969966
if (tx->dqo.qpl) {
970967
/* We do not need to verify the number of buffers used per
971968
* packet or per segment in case of TSO as with 2K size buffers

drivers/net/ethernet/intel/ice/ice_txrx.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,9 +2156,6 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
21562156

21572157
ice_trace(xmit_frame_ring, tx_ring, skb);
21582158

2159-
if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
2160-
goto out_drop;
2161-
21622159
count = ice_xmit_desc_count(skb);
21632160
if (ice_chk_linearize(skb, count)) {
21642161
if (__skb_linearize(skb))

drivers/net/ethernet/mellanox/mlx4/en_tx.c

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -636,28 +636,20 @@ static int get_real_size(const struct sk_buff *skb,
636636
struct net_device *dev,
637637
int *lso_header_size,
638638
bool *inline_ok,
639-
void **pfrag,
640-
int *hopbyhop)
639+
void **pfrag)
641640
{
642641
struct mlx4_en_priv *priv = netdev_priv(dev);
643642
int real_size;
644643

645644
if (shinfo->gso_size) {
646645
*inline_ok = false;
647-
*hopbyhop = 0;
648646
if (skb->encapsulation) {
649647
*lso_header_size = skb_inner_tcp_all_headers(skb);
650648
} else {
651-
/* Detects large IPV6 TCP packets and prepares for removal of
652-
* HBH header that has been pushed by ip6_xmit(),
653-
* mainly so that tcpdump can dissect them.
654-
*/
655-
if (ipv6_has_hopopt_jumbo(skb))
656-
*hopbyhop = sizeof(struct hop_jumbo_hdr);
657649
*lso_header_size = skb_tcp_all_headers(skb);
658650
}
659651
real_size = CTRL_SIZE + shinfo->nr_frags * DS_SIZE +
660-
ALIGN(*lso_header_size - *hopbyhop + 4, DS_SIZE);
652+
ALIGN(*lso_header_size + 4, DS_SIZE);
661653
if (unlikely(*lso_header_size != skb_headlen(skb))) {
662654
/* We add a segment for the skb linear buffer only if
663655
* it contains data */
@@ -884,7 +876,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
884876
int desc_size;
885877
int real_size;
886878
u32 index, bf_index;
887-
struct ipv6hdr *h6;
888879
__be32 op_own;
889880
int lso_header_size;
890881
void *fragptr = NULL;
@@ -893,7 +884,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
893884
bool stop_queue;
894885
bool inline_ok;
895886
u8 data_offset;
896-
int hopbyhop;
897887
bool bf_ok;
898888

899889
tx_ind = skb_get_queue_mapping(skb);
@@ -903,7 +893,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
903893
goto tx_drop;
904894

905895
real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
906-
&inline_ok, &fragptr, &hopbyhop);
896+
&inline_ok, &fragptr);
907897
if (unlikely(!real_size))
908898
goto tx_drop_count;
909899

@@ -956,7 +946,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
956946
data = &tx_desc->data;
957947
data_offset = offsetof(struct mlx4_en_tx_desc, data);
958948
} else {
959-
int lso_align = ALIGN(lso_header_size - hopbyhop + 4, DS_SIZE);
949+
int lso_align = ALIGN(lso_header_size + 4, DS_SIZE);
960950

961951
data = (void *)&tx_desc->lso + lso_align;
962952
data_offset = offsetof(struct mlx4_en_tx_desc, lso) + lso_align;
@@ -1021,31 +1011,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
10211011
((ring->prod & ring->size) ?
10221012
cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
10231013

1024-
lso_header_size -= hopbyhop;
10251014
/* Fill in the LSO prefix */
10261015
tx_desc->lso.mss_hdr_size = cpu_to_be32(
10271016
shinfo->gso_size << 16 | lso_header_size);
10281017

1018+
/* Copy headers;
1019+
* note that we already verified that it is linear
1020+
*/
1021+
memcpy(tx_desc->lso.header, skb->data, lso_header_size);
10291022

1030-
if (unlikely(hopbyhop)) {
1031-
/* remove the HBH header.
1032-
* Layout: [Ethernet header][IPv6 header][HBH][TCP header]
1033-
*/
1034-
memcpy(tx_desc->lso.header, skb->data, ETH_HLEN + sizeof(*h6));
1035-
h6 = (struct ipv6hdr *)((char *)tx_desc->lso.header + ETH_HLEN);
1036-
h6->nexthdr = IPPROTO_TCP;
1037-
/* Copy the TCP header after the IPv6 one */
1038-
memcpy(h6 + 1,
1039-
skb->data + ETH_HLEN + sizeof(*h6) +
1040-
sizeof(struct hop_jumbo_hdr),
1041-
tcp_hdrlen(skb));
1042-
/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
1043-
} else {
1044-
/* Copy headers;
1045-
* note that we already verified that it is linear
1046-
*/
1047-
memcpy(tx_desc->lso.header, skb->data, lso_header_size);
1048-
}
10491023
ring->tso_packets++;
10501024

10511025
i = shinfo->gso_segs;

drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

Lines changed: 12 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,11 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
152152
* to inline later in the transmit descriptor
153153
*/
154154
static inline u16
155-
mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
155+
mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
156156
{
157157
struct mlx5e_sq_stats *stats = sq->stats;
158158
u16 ihs;
159159

160-
*hopbyhop = 0;
161160
if (skb->encapsulation) {
162161
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
163162
ihs = skb_inner_transport_offset(skb) +
@@ -167,17 +166,12 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
167166
stats->tso_inner_packets++;
168167
stats->tso_inner_bytes += skb->len - ihs;
169168
} else {
170-
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
169+
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
171170
ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
172-
} else {
171+
else
173172
ihs = skb_tcp_all_headers(skb);
174-
if (ipv6_has_hopopt_jumbo(skb)) {
175-
*hopbyhop = sizeof(struct hop_jumbo_hdr);
176-
ihs -= sizeof(struct hop_jumbo_hdr);
177-
}
178-
}
179173
stats->tso_packets++;
180-
stats->tso_bytes += skb->len - ihs - *hopbyhop;
174+
stats->tso_bytes += skb->len - ihs;
181175
}
182176

183177
return ihs;
@@ -239,7 +233,6 @@ struct mlx5e_tx_attr {
239233
__be16 mss;
240234
u16 insz;
241235
u8 opcode;
242-
u8 hopbyhop;
243236
};
244237

245238
struct mlx5e_tx_wqe_attr {
@@ -275,16 +268,14 @@ static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
275268
struct mlx5e_sq_stats *stats = sq->stats;
276269

277270
if (skb_is_gso(skb)) {
278-
int hopbyhop;
279-
u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop);
271+
u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb);
280272

281273
*attr = (struct mlx5e_tx_attr) {
282274
.opcode = MLX5_OPCODE_LSO,
283275
.mss = cpu_to_be16(skb_shinfo(skb)->gso_size),
284276
.ihs = ihs,
285277
.num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
286-
.headlen = skb_headlen(skb) - ihs - hopbyhop,
287-
.hopbyhop = hopbyhop,
278+
.headlen = skb_headlen(skb) - ihs,
288279
};
289280

290281
stats->packets += skb_shinfo(skb)->gso_segs;
@@ -439,7 +430,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
439430
struct mlx5_wqe_data_seg *dseg;
440431
struct mlx5e_tx_wqe_info *wi;
441432
u16 ihs = attr->ihs;
442-
struct ipv6hdr *h6;
443433
struct mlx5e_sq_stats *stats = sq->stats;
444434
int num_dma;
445435

@@ -456,28 +446,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
456446
if (ihs) {
457447
u8 *start = eseg->inline_hdr.start;
458448

459-
if (unlikely(attr->hopbyhop)) {
460-
/* remove the HBH header.
461-
* Layout: [Ethernet header][IPv6 header][HBH][TCP header]
462-
*/
463-
if (skb_vlan_tag_present(skb)) {
464-
mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6));
465-
ihs += VLAN_HLEN;
466-
h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr));
467-
} else {
468-
unsafe_memcpy(start, skb->data,
469-
ETH_HLEN + sizeof(*h6),
470-
MLX5_UNSAFE_MEMCPY_DISCLAIMER);
471-
h6 = (struct ipv6hdr *)(start + ETH_HLEN);
472-
}
473-
h6->nexthdr = IPPROTO_TCP;
474-
/* Copy the TCP header after the IPv6 one */
475-
memcpy(h6 + 1,
476-
skb->data + ETH_HLEN + sizeof(*h6) +
477-
sizeof(struct hop_jumbo_hdr),
478-
tcp_hdrlen(skb));
479-
/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
480-
} else if (skb_vlan_tag_present(skb)) {
449+
if (skb_vlan_tag_present(skb)) {
481450
mlx5e_insert_vlan(start, skb, ihs);
482451
ihs += VLAN_HLEN;
483452
stats->added_vlan_packets++;
@@ -491,7 +460,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
491460
}
492461

493462
dseg += wqe_attr->ds_cnt_ids;
494-
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop,
463+
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
495464
attr->headlen, dseg);
496465
if (unlikely(num_dma < 0))
497466
goto err_drop;
@@ -1019,34 +988,14 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
1019988
eseg->mss = attr.mss;
1020989

1021990
if (attr.ihs) {
1022-
if (unlikely(attr.hopbyhop)) {
1023-
struct ipv6hdr *h6;
1024-
1025-
/* remove the HBH header.
1026-
* Layout: [Ethernet header][IPv6 header][HBH][TCP header]
1027-
*/
1028-
unsafe_memcpy(eseg->inline_hdr.start, skb->data,
1029-
ETH_HLEN + sizeof(*h6),
1030-
MLX5_UNSAFE_MEMCPY_DISCLAIMER);
1031-
h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN);
1032-
h6->nexthdr = IPPROTO_TCP;
1033-
/* Copy the TCP header after the IPv6 one */
1034-
unsafe_memcpy(h6 + 1,
1035-
skb->data + ETH_HLEN + sizeof(*h6) +
1036-
sizeof(struct hop_jumbo_hdr),
1037-
tcp_hdrlen(skb),
1038-
MLX5_UNSAFE_MEMCPY_DISCLAIMER);
1039-
/* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */
1040-
} else {
1041-
unsafe_memcpy(eseg->inline_hdr.start, skb->data,
1042-
attr.ihs,
1043-
MLX5_UNSAFE_MEMCPY_DISCLAIMER);
1044-
}
991+
unsafe_memcpy(eseg->inline_hdr.start, skb->data,
992+
attr.ihs,
993+
MLX5_UNSAFE_MEMCPY_DISCLAIMER);
1045994
eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
1046995
dseg += wqe_attr.ds_cnt_inl;
1047996
}
1048997

1049-
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop,
998+
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs,
1050999
attr.headlen, dseg);
10511000
if (unlikely(num_dma < 0))
10521001
goto err_drop;

drivers/net/ethernet/microsoft/mana/mana_en.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,9 +355,6 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev)
355355
if (skb_cow_head(skb, MANA_HEADROOM))
356356
goto tx_drop_count;
357357

358-
if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
359-
goto tx_drop_count;
360-
361358
txq = &apc->tx_qp[txq_idx].txq;
362359
gdma_sq = txq->gdma_sq;
363360
cq = &apc->tx_qp[txq_idx].tx_cq;

include/linux/ipv6.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,28 @@ static inline unsigned int ipv6_transport_len(const struct sk_buff *skb)
126126
skb_network_header_len(skb);
127127
}
128128

129+
static inline unsigned int
130+
ipv6_payload_len(const struct sk_buff *skb, const struct ipv6hdr *ip6)
131+
{
132+
u32 len = ntohs(ip6->payload_len);
133+
134+
return (len || !skb_is_gso(skb) || !skb_is_gso_tcp(skb)) ?
135+
len :
136+
skb->len - skb_network_offset(skb) - sizeof(struct ipv6hdr);
137+
}
138+
139+
static inline unsigned int skb_ipv6_payload_len(const struct sk_buff *skb)
140+
{
141+
return ipv6_payload_len(skb, ipv6_hdr(skb));
142+
}
143+
144+
#define IPV6_MAXPLEN 65535
145+
146+
static inline void ipv6_set_payload_len(struct ipv6hdr *ip6, unsigned int len)
147+
{
148+
ip6->payload_len = len <= IPV6_MAXPLEN ? htons(len) : 0;
149+
}
150+
129151
/*
130152
This structure contains results of exthdrs parsing
131153
as offsets from skb->nh.
@@ -155,7 +177,6 @@ struct inet6_skb_parm {
155177
#define IP6SKB_L3SLAVE 64
156178
#define IP6SKB_JUMBOGRAM 128
157179
#define IP6SKB_SEG6 256
158-
#define IP6SKB_FAKEJUMBO 512
159180
#define IP6SKB_MULTIPATH 1024
160181
#define IP6SKB_MCROUTE 2048
161182
};

0 commit comments

Comments
 (0)