Skip to content

Commit 5ff3d3d

Browse files
WeiFang-NXPPaolo Abeni
authored andcommitted
net: fec: add fec_enet_rx_queue_xdp() for XDP path
Currently, the processing of XDP path packets and protocol stack packets are both mixed in fec_enet_rx_queue(), which makes the logic somewhat confusing and debugging more difficult. Furthermore, some logic is not needed by each other. Such as the kernel path does not need to call xdp_init_buff(), XDP path does not support swap_buffer(), because fec_enet_bpf() returns "-EOPNOTSUPP" for those platforms which need swap_buffer()), and so on. This prevents XDP from achieving its maximum performance. Therefore, XDP path packets processing has been separated from fec_enet_rx_queue() by adding the fec_enet_rx_queue_xdp() function to optimize XDP path logic and improve XDP performance. The XDP performance on the iMX93 platform was compared before and after applying this patch. Detailed results are as follows and we can see the performance has been improved. Env: i.MX93, packet size 64 bytes including FCS, only single core and RX BD ring are used to receive packets, flow-control is off. Before the patch is applied: xdp-bench tx eth0 Summary 396,868 rx/s 0 err,drop/s Summary 396,024 rx/s 0 err,drop/s xdp-bench drop eth0 Summary 684,781 rx/s 0 err/s Summary 675,746 rx/s 0 err/s xdp-bench pass eth0 Summary 208,552 rx/s 0 err,drop/s Summary 208,654 rx/s 0 err,drop/s xdp-bench redirect eth0 eth0 eth0->eth0 311,210 rx/s 0 err,drop/s 311,208 xmit/s eth0->eth0 310,808 rx/s 0 err,drop/s 310,809 xmit/s After the patch is applied: xdp-bench tx eth0 Summary 409,975 rx/s 0 err,drop/s Summary 411,073 rx/s 0 err,drop/s xdp-bench drop eth0 Summary 700,681 rx/s 0 err/s Summary 698,102 rx/s 0 err/s xdp-bench pass eth0 Summary 211,356 rx/s 0 err,drop/s Summary 210,629 rx/s 0 err,drop/s xdp-bench redirect eth0 eth0 eth0->eth0 320,351 rx/s 0 err,drop/s 320,348 xmit/s eth0->eth0 318,988 rx/s 0 err,drop/s 318,988 xmit/s Signed-off-by: Wei Fang <wei.fang@nxp.com> Link: https://patch.msgid.link/20260205085742.2685134-7-wei.fang@nxp.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
1 parent bc609f9 commit 5ff3d3d

1 file changed

Lines changed: 170 additions & 86 deletions

File tree

drivers/net/ethernet/freescale/fec_main.c

Lines changed: 170 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,71 +1666,6 @@ static int fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
16661666
return 0;
16671667
}
16681668

1669-
static u32
1670-
fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
1671-
struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int cpu)
1672-
{
1673-
unsigned int sync, len = xdp->data_end - xdp->data;
1674-
u32 ret = FEC_ENET_XDP_PASS;
1675-
struct page *page;
1676-
int err;
1677-
u32 act;
1678-
1679-
act = bpf_prog_run_xdp(prog, xdp);
1680-
1681-
/* Due xdp_adjust_tail and xdp_adjust_head: DMA sync for_device cover
1682-
* max len CPU touch
1683-
*/
1684-
sync = xdp->data_end - xdp->data;
1685-
sync = max(sync, len);
1686-
1687-
switch (act) {
1688-
case XDP_PASS:
1689-
rxq->stats[RX_XDP_PASS]++;
1690-
ret = FEC_ENET_XDP_PASS;
1691-
break;
1692-
1693-
case XDP_REDIRECT:
1694-
rxq->stats[RX_XDP_REDIRECT]++;
1695-
err = xdp_do_redirect(fep->netdev, xdp, prog);
1696-
if (unlikely(err))
1697-
goto xdp_err;
1698-
1699-
ret = FEC_ENET_XDP_REDIR;
1700-
break;
1701-
1702-
case XDP_TX:
1703-
rxq->stats[RX_XDP_TX]++;
1704-
err = fec_enet_xdp_tx_xmit(fep, cpu, xdp, sync);
1705-
if (unlikely(err)) {
1706-
rxq->stats[RX_XDP_TX_ERRORS]++;
1707-
goto xdp_err;
1708-
}
1709-
1710-
ret = FEC_ENET_XDP_TX;
1711-
break;
1712-
1713-
default:
1714-
bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
1715-
fallthrough;
1716-
1717-
case XDP_ABORTED:
1718-
fallthrough; /* handle aborts by dropping packet */
1719-
1720-
case XDP_DROP:
1721-
rxq->stats[RX_XDP_DROP]++;
1722-
xdp_err:
1723-
ret = FEC_ENET_XDP_CONSUMED;
1724-
page = virt_to_head_page(xdp->data);
1725-
page_pool_put_page(rxq->page_pool, page, sync, true);
1726-
if (act != XDP_DROP)
1727-
trace_xdp_exception(fep->netdev, prog, act);
1728-
break;
1729-
}
1730-
1731-
return ret;
1732-
}
1733-
17341669
static void fec_enet_rx_vlan(const struct net_device *ndev, struct sk_buff *skb)
17351670
{
17361671
if (ndev->features & NETIF_F_HW_VLAN_CTAG_RX) {
@@ -1843,19 +1778,14 @@ static struct sk_buff *fec_build_skb(struct fec_enet_private *fep,
18431778
static int fec_enet_rx_queue(struct fec_enet_private *fep,
18441779
u16 queue, int budget)
18451780
{
1846-
u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
18471781
struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
1848-
struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
18491782
bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
1850-
u32 ret, xdp_result = FEC_ENET_XDP_PASS;
18511783
struct net_device *ndev = fep->netdev;
18521784
struct bufdesc *bdp = rxq->bd.cur;
18531785
u32 sub_len = 4 + fep->rx_shift;
1854-
int cpu = smp_processor_id();
18551786
int pkt_received = 0;
18561787
u16 status, pkt_len;
18571788
struct sk_buff *skb;
1858-
struct xdp_buff xdp;
18591789
struct page *page;
18601790
dma_addr_t dma;
18611791
int index;
@@ -1871,8 +1801,6 @@ static int fec_enet_rx_queue(struct fec_enet_private *fep,
18711801
/* First, grab all of the stats for the incoming packet.
18721802
* These get messed up if we get called due to a busy condition.
18731803
*/
1874-
xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
1875-
18761804
while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
18771805

18781806
if (pkt_received >= budget)
@@ -1903,17 +1831,6 @@ static int fec_enet_rx_queue(struct fec_enet_private *fep,
19031831
DMA_FROM_DEVICE);
19041832
prefetch(page_address(page));
19051833

1906-
if (xdp_prog) {
1907-
xdp_buff_clear_frags_flag(&xdp);
1908-
/* subtract 16bit shift and FCS */
1909-
xdp_prepare_buff(&xdp, page_address(page),
1910-
data_start, pkt_len - sub_len, false);
1911-
ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, cpu);
1912-
xdp_result |= ret;
1913-
if (ret != FEC_ENET_XDP_PASS)
1914-
goto rx_processing_done;
1915-
}
1916-
19171834
if (unlikely(need_swap)) {
19181835
u8 *data;
19191836

@@ -1962,7 +1879,168 @@ static int fec_enet_rx_queue(struct fec_enet_private *fep,
19621879
}
19631880
rxq->bd.cur = bdp;
19641881

1965-
if (xdp_result & FEC_ENET_XDP_REDIR)
1882+
return pkt_received;
1883+
}
1884+
1885+
static void fec_xdp_drop(struct fec_enet_priv_rx_q *rxq,
1886+
struct xdp_buff *xdp, u32 sync)
1887+
{
1888+
struct page *page = virt_to_head_page(xdp->data);
1889+
1890+
page_pool_put_page(rxq->page_pool, page, sync, true);
1891+
}
1892+
1893+
static int fec_enet_rx_queue_xdp(struct fec_enet_private *fep, int queue,
1894+
int budget, struct bpf_prog *prog)
1895+
{
1896+
u32 data_start = FEC_ENET_XDP_HEADROOM + fep->rx_shift;
1897+
struct fec_enet_priv_rx_q *rxq = fep->rx_queue[queue];
1898+
struct net_device *ndev = fep->netdev;
1899+
struct bufdesc *bdp = rxq->bd.cur;
1900+
u32 sub_len = 4 + fep->rx_shift;
1901+
int cpu = smp_processor_id();
1902+
int pkt_received = 0;
1903+
struct sk_buff *skb;
1904+
u16 status, pkt_len;
1905+
struct xdp_buff xdp;
1906+
struct page *page;
1907+
u32 xdp_res = 0;
1908+
dma_addr_t dma;
1909+
int index, err;
1910+
u32 act, sync;
1911+
1912+
#if defined(CONFIG_COLDFIRE) && !defined(CONFIG_COLDFIRE_COHERENT_DMA)
1913+
/*
1914+
* Hacky flush of all caches instead of using the DMA API for the TSO
1915+
* headers.
1916+
*/
1917+
flush_cache_all();
1918+
#endif
1919+
1920+
xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq);
1921+
1922+
while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {
1923+
if (pkt_received >= budget)
1924+
break;
1925+
pkt_received++;
1926+
1927+
writel(FEC_ENET_RXF_GET(queue), fep->hwp + FEC_IEVENT);
1928+
1929+
/* Check for errors. */
1930+
status ^= BD_ENET_RX_LAST;
1931+
if (unlikely(fec_rx_error_check(ndev, status)))
1932+
goto rx_processing_done;
1933+
1934+
/* Process the incoming frame. */
1935+
ndev->stats.rx_packets++;
1936+
pkt_len = fec16_to_cpu(bdp->cbd_datlen);
1937+
ndev->stats.rx_bytes += pkt_len - fep->rx_shift;
1938+
1939+
index = fec_enet_get_bd_index(bdp, &rxq->bd);
1940+
page = rxq->rx_buf[index];
1941+
dma = fec32_to_cpu(bdp->cbd_bufaddr);
1942+
1943+
if (fec_enet_update_cbd(rxq, bdp, index)) {
1944+
ndev->stats.rx_dropped++;
1945+
goto rx_processing_done;
1946+
}
1947+
1948+
dma_sync_single_for_cpu(&fep->pdev->dev, dma, pkt_len,
1949+
DMA_FROM_DEVICE);
1950+
prefetch(page_address(page));
1951+
1952+
xdp_buff_clear_frags_flag(&xdp);
1953+
/* subtract 16bit shift and FCS */
1954+
pkt_len -= sub_len;
1955+
xdp_prepare_buff(&xdp, page_address(page), data_start,
1956+
pkt_len, false);
1957+
1958+
act = bpf_prog_run_xdp(prog, &xdp);
1959+
/* Due xdp_adjust_tail and xdp_adjust_head: DMA sync
1960+
* for_device cover max len CPU touch.
1961+
*/
1962+
sync = xdp.data_end - xdp.data;
1963+
sync = max(sync, pkt_len);
1964+
1965+
switch (act) {
1966+
case XDP_PASS:
1967+
rxq->stats[RX_XDP_PASS]++;
1968+
/* The packet length includes FCS, but we don't want to
1969+
* include that when passing upstream as it messes up
1970+
* bridging applications.
1971+
*/
1972+
skb = fec_build_skb(fep, rxq, bdp, page, pkt_len);
1973+
if (!skb)
1974+
trace_xdp_exception(ndev, prog, XDP_PASS);
1975+
else
1976+
napi_gro_receive(&fep->napi, skb);
1977+
1978+
break;
1979+
case XDP_REDIRECT:
1980+
rxq->stats[RX_XDP_REDIRECT]++;
1981+
err = xdp_do_redirect(ndev, &xdp, prog);
1982+
if (unlikely(err)) {
1983+
fec_xdp_drop(rxq, &xdp, sync);
1984+
trace_xdp_exception(ndev, prog, XDP_REDIRECT);
1985+
} else {
1986+
xdp_res |= FEC_ENET_XDP_REDIR;
1987+
}
1988+
break;
1989+
case XDP_TX:
1990+
rxq->stats[RX_XDP_TX]++;
1991+
err = fec_enet_xdp_tx_xmit(fep, cpu, &xdp, sync);
1992+
if (unlikely(err)) {
1993+
rxq->stats[RX_XDP_TX_ERRORS]++;
1994+
fec_xdp_drop(rxq, &xdp, sync);
1995+
trace_xdp_exception(ndev, prog, XDP_TX);
1996+
}
1997+
break;
1998+
default:
1999+
bpf_warn_invalid_xdp_action(ndev, prog, act);
2000+
fallthrough;
2001+
case XDP_ABORTED:
2002+
trace_xdp_exception(ndev, prog, act);
2003+
/* handle aborts by dropping packet */
2004+
fallthrough;
2005+
case XDP_DROP:
2006+
rxq->stats[RX_XDP_DROP]++;
2007+
fec_xdp_drop(rxq, &xdp, sync);
2008+
break;
2009+
}
2010+
2011+
rx_processing_done:
2012+
/* Clear the status flags for this buffer */
2013+
status &= ~BD_ENET_RX_STATS;
2014+
/* Mark the buffer empty */
2015+
status |= BD_ENET_RX_EMPTY;
2016+
2017+
if (fep->bufdesc_ex) {
2018+
struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
2019+
2020+
ebdp->cbd_esc = cpu_to_fec32(BD_ENET_RX_INT);
2021+
ebdp->cbd_prot = 0;
2022+
ebdp->cbd_bdu = 0;
2023+
}
2024+
2025+
/* Make sure the updates to rest of the descriptor are
2026+
* performed before transferring ownership.
2027+
*/
2028+
dma_wmb();
2029+
bdp->cbd_sc = cpu_to_fec16(status);
2030+
2031+
/* Update BD pointer to next entry */
2032+
bdp = fec_enet_get_nextdesc(bdp, &rxq->bd);
2033+
2034+
/* Doing this here will keep the FEC running while we process
2035+
* incoming frames. On a heavily loaded network, we should be
2036+
* able to keep up at the expense of system resources.
2037+
*/
2038+
writel(0, rxq->bd.reg_desc_active);
2039+
}
2040+
2041+
rxq->bd.cur = bdp;
2042+
2043+
if (xdp_res & FEC_ENET_XDP_REDIR)
19662044
xdp_do_flush();
19672045

19682046
return pkt_received;
@@ -1971,11 +2049,17 @@ static int fec_enet_rx_queue(struct fec_enet_private *fep,
19712049
static int fec_enet_rx(struct net_device *ndev, int budget)
19722050
{
19732051
struct fec_enet_private *fep = netdev_priv(ndev);
2052+
struct bpf_prog *prog = READ_ONCE(fep->xdp_prog);
19742053
int i, done = 0;
19752054

19762055
/* Make sure that AVB queues are processed first. */
1977-
for (i = fep->num_rx_queues - 1; i >= 0; i--)
1978-
done += fec_enet_rx_queue(fep, i, budget - done);
2056+
for (i = fep->num_rx_queues - 1; i >= 0; i--) {
2057+
if (prog)
2058+
done += fec_enet_rx_queue_xdp(fep, i, budget - done,
2059+
prog);
2060+
else
2061+
done += fec_enet_rx_queue(fep, i, budget - done);
2062+
}
19792063

19802064
return done;
19812065
}

0 commit comments

Comments
 (0)