Skip to content

Commit ad2047c

Browse files
mfijalkoAlexei Starovoitov
authored andcommitted
ice: work on pre-XDP prog frag count
Fix an OOM panic in XDP_DRV mode when a XDP program shrinks a multi-buffer packet by 4k bytes and then redirects it to an AF_XDP socket. Since support for handling multi-buffer frames was added to XDP, usage of bpf_xdp_adjust_tail() helper within XDP program can free the page that given fragment occupies and in turn decrease the fragment count within skb_shared_info that is embedded in xdp_buff struct. In current ice driver codebase, it can become problematic when page recycling logic decides not to reuse the page. In such case, __page_frag_cache_drain() is used with ice_rx_buf::pagecnt_bias that was not adjusted after refcount of page was changed by XDP prog which in turn does not drain the refcount to 0 and page is never freed. To address this, let us store the count of frags before the XDP program was executed on Rx ring struct. This will be used to compare with current frag count from skb_shared_info embedded in xdp_buff. A smaller value in the latter indicates that XDP prog freed frag(s). Then, for given delta decrement pagecnt_bias for XDP_DROP verdict. While at it, let us also handle the EOP frag within ice_set_rx_bufs_act() to make our life easier, so all of the adjustments needed to be applied against freed frags are performed in the single place. Fixes: 2fba7dc ("ice: Add support for XDP multi-buffer on Rx side") Acked-by: Magnus Karlsson <magnus.karlsson@intel.com> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> Link: https://lore.kernel.org/r/20240124191602.566724-5-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent c511471 commit ad2047c

3 files changed

Lines changed: 32 additions & 14 deletions

File tree

drivers/net/ethernet/intel/ice/ice_txrx.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -603,9 +603,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
603603
ret = ICE_XDP_CONSUMED;
604604
}
605605
exit:
606-
rx_buf->act = ret;
607-
if (unlikely(xdp_buff_has_frags(xdp)))
608-
ice_set_rx_bufs_act(xdp, rx_ring, ret);
606+
ice_set_rx_bufs_act(xdp, rx_ring, ret);
609607
}
610608

611609
/**
@@ -893,14 +891,17 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
893891
}
894892

895893
if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
896-
if (unlikely(xdp_buff_has_frags(xdp)))
897-
ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
894+
ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
898895
return -ENOMEM;
899896
}
900897

901898
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
902899
rx_buf->page_offset, size);
903900
sinfo->xdp_frags_size += size;
901+
/* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
902+
* can pop off frags but driver has to handle it on its own
903+
*/
904+
rx_ring->nr_frags = sinfo->nr_frags;
904905

905906
if (page_is_pfmemalloc(rx_buf->page))
906907
xdp_buff_set_frag_pfmemalloc(xdp);
@@ -1251,6 +1252,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
12511252

12521253
xdp->data = NULL;
12531254
rx_ring->first_desc = ntc;
1255+
rx_ring->nr_frags = 0;
12541256
continue;
12551257
construct_skb:
12561258
if (likely(ice_ring_uses_build_skb(rx_ring)))
@@ -1266,10 +1268,12 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
12661268
ICE_XDP_CONSUMED);
12671269
xdp->data = NULL;
12681270
rx_ring->first_desc = ntc;
1271+
rx_ring->nr_frags = 0;
12691272
break;
12701273
}
12711274
xdp->data = NULL;
12721275
rx_ring->first_desc = ntc;
1276+
rx_ring->nr_frags = 0;
12731277

12741278
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
12751279
if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,

drivers/net/ethernet/intel/ice/ice_txrx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ struct ice_rx_ring {
358358
struct ice_tx_ring *xdp_ring;
359359
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
360360
struct xsk_buff_pool *xsk_pool;
361+
u32 nr_frags;
361362
dma_addr_t dma; /* physical address of ring */
362363
u16 rx_buf_len;
363364
u8 dcb_tc; /* Traffic class of ring */

drivers/net/ethernet/intel/ice/ice_txrx_lib.h

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,39 @@
1212
* act: action to store onto Rx buffers related to XDP buffer parts
1313
*
1414
* Set action that should be taken before putting Rx buffer from first frag
15-
* to one before last. Last one is handled by caller of this function as it
16-
* is the EOP frag that is currently being processed. This function is
17-
* supposed to be called only when XDP buffer contains frags.
15+
* to the last.
1816
*/
1917
static inline void
2018
ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
2119
const unsigned int act)
2220
{
23-
const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
24-
u32 first = rx_ring->first_desc;
25-
u32 nr_frags = sinfo->nr_frags;
21+
u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
22+
u32 nr_frags = rx_ring->nr_frags + 1;
23+
u32 idx = rx_ring->first_desc;
2624
u32 cnt = rx_ring->count;
2725
struct ice_rx_buf *buf;
2826

2927
for (int i = 0; i < nr_frags; i++) {
30-
buf = &rx_ring->rx_buf[first];
28+
buf = &rx_ring->rx_buf[idx];
3129
buf->act = act;
3230

33-
if (++first == cnt)
34-
first = 0;
31+
if (++idx == cnt)
32+
idx = 0;
33+
}
34+
35+
/* adjust pagecnt_bias on frags freed by XDP prog */
36+
if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
37+
u32 delta = rx_ring->nr_frags - sinfo_frags;
38+
39+
while (delta) {
40+
if (idx == 0)
41+
idx = cnt - 1;
42+
else
43+
idx--;
44+
buf = &rx_ring->rx_buf[idx];
45+
buf->pagecnt_bias--;
46+
delta--;
47+
}
3548
}
3649
}
3750

0 commit comments

Comments
 (0)