Skip to content

Commit 0da1dba

Browse files
Tariq Toukankuba-moo
authored andcommitted
net/mlx5e: XSK, Fix unintended ICOSQ change
XSK wakeup must use the async ICOSQ (with proper locking), as it is not guaranteed to run on the same CPU as the channel. The commit that converted the NAPI trigger path to use the sync ICOSQ incorrectly applied the same change to XSK, causing XSK wakeups to use the sync ICOSQ as well. Revert XSK flows to use the async ICOSQ. XDP program attach/detach triggers channel reopen, while XSK pool enable/disable can happen on-the-fly via NDOs without reopening channels. As a result, xsk_pool state cannot be reliably used at mlx5e_open_channel() time to decide whether an async ICOSQ is needed. Update the async_icosq_needed logic to depend on the presence of an XDP program rather than the xsk_pool, ensuring the async ICOSQ is available when XSK wakeups are enabled. This fixes multiple issues: 1. Illegal synchronize_rcu() in an RCU read- side critical section via mlx5e_xsk_wakeup() -> mlx5e_trigger_napi_icosq() -> synchronize_net(). The stack holds RCU read-lock in xsk_poll(). 2. Hitting a NULL pointer dereference in mlx5e_xsk_wakeup(): [] BUG: kernel NULL pointer dereference, address: 0000000000000240 [] #PF: supervisor read access in kernel mode [] #PF: error_code(0x0000) - not-present page [] PGD 0 P4D 0 [] Oops: Oops: 0000 [#1] SMP [] CPU: 0 UID: 0 PID: 2255 Comm: qemu-system-x86 Not tainted 6.19.0-rc5+ #229 PREEMPT(none) [] Hardware name: [...] [] RIP: 0010:mlx5e_xsk_wakeup+0x53/0x90 [mlx5_core] Reported-by: Daniel Borkmann <daniel@iogearbox.net> Closes: https://lore.kernel.org/all/20260123223916.361295-1-daniel@iogearbox.net/ Fixes: 56aca3e ("net/mlx5e: Use regular ICOSQ for triggering NAPI") Tested-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com> Acked-by: Alice Mikityanska <alice.kernel@fastmail.im> Link: https://patch.msgid.link/20260217074525.1761454-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent b1216f1 commit 0da1dba

4 files changed

Lines changed: 21 additions & 10 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,7 @@ int mlx5e_open_locked(struct net_device *netdev);
11031103
int mlx5e_close_locked(struct net_device *netdev);
11041104

11051105
void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c);
1106+
void mlx5e_trigger_napi_async_icosq(struct mlx5e_channel *c);
11061107
void mlx5e_trigger_napi_sched(struct napi_struct *napi);
11071108

11081109
int mlx5e_open_channels(struct mlx5e_priv *priv,

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
127127
goto err_remove_pool;
128128

129129
mlx5e_activate_xsk(c);
130-
mlx5e_trigger_napi_icosq(c);
130+
mlx5e_trigger_napi_async_icosq(c);
131131

132132
/* Don't wait for WQEs, because the newer xdpsock sample doesn't provide
133133
* any Fill Ring entries at the setup stage.
@@ -179,7 +179,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
179179
c = priv->channels.c[ix];
180180

181181
mlx5e_activate_rq(&c->rq);
182-
mlx5e_trigger_napi_icosq(c);
182+
mlx5e_trigger_napi_async_icosq(c);
183183
mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT);
184184

185185
mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false);

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
3434
&c->async_icosq->state))
3535
return 0;
3636

37-
mlx5e_trigger_napi_icosq(c);
37+
mlx5e_trigger_napi_async_icosq(c);
3838
}
3939

4040
return 0;

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2744,16 +2744,26 @@ static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu)
27442744

27452745
void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c)
27462746
{
2747+
struct mlx5e_icosq *sq = &c->icosq;
27472748
bool locked;
27482749

2749-
if (!test_and_set_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state))
2750-
synchronize_net();
2750+
set_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &sq->state);
2751+
synchronize_net();
27512752

2752-
locked = mlx5e_icosq_sync_lock(&c->icosq);
2753-
mlx5e_trigger_irq(&c->icosq);
2754-
mlx5e_icosq_sync_unlock(&c->icosq, locked);
2753+
locked = mlx5e_icosq_sync_lock(sq);
2754+
mlx5e_trigger_irq(sq);
2755+
mlx5e_icosq_sync_unlock(sq, locked);
27552756

2756-
clear_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &c->icosq.state);
2757+
clear_bit(MLX5E_SQ_STATE_LOCK_NEEDED, &sq->state);
2758+
}
2759+
2760+
void mlx5e_trigger_napi_async_icosq(struct mlx5e_channel *c)
2761+
{
2762+
struct mlx5e_icosq *sq = c->async_icosq;
2763+
2764+
spin_lock_bh(&sq->lock);
2765+
mlx5e_trigger_irq(sq);
2766+
spin_unlock_bh(&sq->lock);
27572767
}
27582768

27592769
void mlx5e_trigger_napi_sched(struct napi_struct *napi)
@@ -2836,7 +2846,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
28362846
netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix);
28372847
netif_napi_set_irq_locked(&c->napi, irq);
28382848

2839-
async_icosq_needed = !!xsk_pool || priv->ktls_rx_was_enabled;
2849+
async_icosq_needed = !!params->xdp_prog || priv->ktls_rx_was_enabled;
28402850
err = mlx5e_open_queues(c, params, cparam, async_icosq_needed);
28412851
if (unlikely(err))
28422852
goto err_napi_del;

0 commit comments

Comments
 (0)