Skip to content

Commit 83ac030

Browse files
Cosmin Ratiukuba-moo
authored andcommitted
net/mlx5e: Fix deadlocks between devlink and netdev instance locks
In the mentioned "Fixes" commit, various work tasks triggering devlink health reporter recovery were switched to use netdev_trylock to protect against concurrent tear down of the channels being recovered. But this had the side effect of introducing potential deadlocks because of incorrect lock ordering. The correct lock order is described by the init flow: probe_one -> mlx5_init_one (acquires devlink lock) -> mlx5_init_one_devl_locked -> mlx5_register_device -> mlx5_rescan_drivers_locked -...-> mlx5e_probe -> _mlx5e_probe -> register_netdev (acquires rtnl lock) -> register_netdevice (acquires netdev lock) => devlink lock -> rtnl lock -> netdev lock. But in the current recovery flow, the order is wrong: mlx5e_tx_err_cqe_work (acquires netdev lock) -> mlx5e_reporter_tx_err_cqe -> mlx5e_health_report -> devlink_health_report (acquires devlink lock => boom!) -> devlink_health_reporter_recover -> mlx5e_tx_reporter_recover -> mlx5e_tx_reporter_recover_from_ctx -> mlx5e_tx_reporter_err_cqe_recover The same pattern exists in: mlx5e_reporter_rx_timeout mlx5e_reporter_tx_ptpsq_unhealthy mlx5e_reporter_tx_timeout Fix these by moving the netdev_trylock calls from the work handlers lower in the call stack, in the respective recovery functions, where they are actually necessary. Fixes: 8f7b003 ("net/mlx5e: Convert mlx5 netdevs to instance locking") Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com> Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Jacob Keller <Jacob.e.keller@intel.com> Link: https://patch.msgid.link/20260218072904.1764634-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 9854b24 commit 83ac030

4 files changed

Lines changed: 61 additions & 58 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -457,22 +457,8 @@ static void mlx5e_ptpsq_unhealthy_work(struct work_struct *work)
457457
{
458458
struct mlx5e_ptpsq *ptpsq =
459459
container_of(work, struct mlx5e_ptpsq, report_unhealthy_work);
460-
struct mlx5e_txqsq *sq = &ptpsq->txqsq;
461-
462-
/* Recovering the PTP SQ means re-enabling NAPI, which requires the
463-
* netdev instance lock. However, SQ closing has to wait for this work
464-
* task to finish while also holding the same lock. So either get the
465-
* lock or find that the SQ is no longer enabled and thus this work is
466-
* not relevant anymore.
467-
*/
468-
while (!netdev_trylock(sq->netdev)) {
469-
if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))
470-
return;
471-
msleep(20);
472-
}
473460

474461
mlx5e_reporter_tx_ptpsq_unhealthy(ptpsq);
475-
netdev_unlock(sq->netdev);
476462
}
477463

478464
static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn,

drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// SPDX-License-Identifier: GPL-2.0
22
// Copyright (c) 2019 Mellanox Technologies.
33

4+
#include <net/netdev_lock.h>
5+
46
#include "health.h"
57
#include "params.h"
68
#include "txrx.h"
@@ -177,6 +179,16 @@ static int mlx5e_rx_reporter_timeout_recover(void *ctx)
177179
rq = ctx;
178180
priv = rq->priv;
179181

182+
/* Acquire netdev instance lock to synchronize with channel close and
183+
* reopen flows. Either successfully obtain the lock, or detect that
184+
* channels are closing for another reason, making this work no longer
185+
* necessary.
186+
*/
187+
while (!netdev_trylock(rq->netdev)) {
188+
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
189+
return 0;
190+
msleep(20);
191+
}
180192
mutex_lock(&priv->state_lock);
181193

182194
eq = rq->cq.mcq.eq;
@@ -186,6 +198,7 @@ static int mlx5e_rx_reporter_timeout_recover(void *ctx)
186198
clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
187199

188200
mutex_unlock(&priv->state_lock);
201+
netdev_unlock(rq->netdev);
189202

190203
return err;
191204
}

drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
/* SPDX-License-Identifier: GPL-2.0 */
22
/* Copyright (c) 2019 Mellanox Technologies. */
33

4+
#include <net/netdev_lock.h>
5+
46
#include "health.h"
57
#include "en/ptp.h"
68
#include "en/devlink.h"
@@ -79,6 +81,18 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
7981
if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
8082
return 0;
8183

84+
/* Recovering queues means re-enabling NAPI, which requires the netdev
85+
* instance lock. However, SQ closing flows have to wait for work tasks
86+
* to finish while also holding the netdev instance lock. So either get
87+
* the lock or find that the SQ is no longer enabled and thus this work
88+
* is not relevant anymore.
89+
*/
90+
while (!netdev_trylock(dev)) {
91+
if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))
92+
return 0;
93+
msleep(20);
94+
}
95+
8296
err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
8397
if (err) {
8498
netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
@@ -114,9 +128,11 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
114128
else
115129
mlx5e_trigger_napi_sched(sq->cq.napi);
116130

131+
netdev_unlock(dev);
117132
return 0;
118133
out:
119134
clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
135+
netdev_unlock(dev);
120136
return err;
121137
}
122138

@@ -137,26 +153,41 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx)
137153
sq = to_ctx->sq;
138154
eq = sq->cq.mcq.eq;
139155
priv = sq->priv;
156+
157+
/* Recovering the TX queues implies re-enabling NAPI, which requires
158+
* the netdev instance lock.
159+
* However, channel closing flows have to wait for this work to finish
160+
* while holding the same lock. So either get the lock or find that
161+
* channels are being closed for other reason and this work is not
162+
* relevant anymore.
163+
*/
164+
while (!netdev_trylock(sq->netdev)) {
165+
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
166+
return 0;
167+
msleep(20);
168+
}
169+
140170
err = mlx5e_health_channel_eq_recover(sq->netdev, eq, sq->cq.ch_stats);
141171
if (!err) {
142172
to_ctx->status = 0; /* this sq recovered */
143-
return err;
173+
goto out;
144174
}
145175

146176
mutex_lock(&priv->state_lock);
147177
err = mlx5e_safe_reopen_channels(priv);
148178
mutex_unlock(&priv->state_lock);
149179
if (!err) {
150180
to_ctx->status = 1; /* all channels recovered */
151-
return err;
181+
goto out;
152182
}
153183

154184
to_ctx->status = err;
155185
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
156186
netdev_err(priv->netdev,
157187
"mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n",
158188
err);
159-
189+
out:
190+
netdev_unlock(sq->netdev);
160191
return err;
161192
}
162193

@@ -173,10 +204,22 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
173204
return 0;
174205

175206
priv = ptpsq->txqsq.priv;
207+
netdev = priv->netdev;
208+
209+
/* Recovering the PTP SQ means re-enabling NAPI, which requires the
210+
* netdev instance lock. However, SQ closing has to wait for this work
211+
* task to finish while also holding the same lock. So either get the
212+
* lock or find that the SQ is no longer enabled and thus this work is
213+
* not relevant anymore.
214+
*/
215+
while (!netdev_trylock(netdev)) {
216+
if (!test_bit(MLX5E_SQ_STATE_ENABLED, &ptpsq->txqsq.state))
217+
return 0;
218+
msleep(20);
219+
}
176220

177221
mutex_lock(&priv->state_lock);
178222
chs = &priv->channels;
179-
netdev = priv->netdev;
180223

181224
carrier_ok = netif_carrier_ok(netdev);
182225
netif_carrier_off(netdev);
@@ -193,6 +236,7 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
193236
netif_carrier_on(netdev);
194237

195238
mutex_unlock(&priv->state_lock);
239+
netdev_unlock(netdev);
196240

197241
return err;
198242
}

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -631,19 +631,7 @@ static void mlx5e_rq_timeout_work(struct work_struct *timeout_work)
631631
struct mlx5e_rq,
632632
rx_timeout_work);
633633

634-
/* Acquire netdev instance lock to synchronize with channel close and
635-
* reopen flows. Either successfully obtain the lock, or detect that
636-
* channels are closing for another reason, making this work no longer
637-
* necessary.
638-
*/
639-
while (!netdev_trylock(rq->netdev)) {
640-
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
641-
return;
642-
msleep(20);
643-
}
644-
645634
mlx5e_reporter_rx_timeout(rq);
646-
netdev_unlock(rq->netdev);
647635
}
648636

649637
static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
@@ -1952,20 +1940,7 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
19521940
struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
19531941
recover_work);
19541942

1955-
/* Recovering queues means re-enabling NAPI, which requires the netdev
1956-
* instance lock. However, SQ closing flows have to wait for work tasks
1957-
* to finish while also holding the netdev instance lock. So either get
1958-
* the lock or find that the SQ is no longer enabled and thus this work
1959-
* is not relevant anymore.
1960-
*/
1961-
while (!netdev_trylock(sq->netdev)) {
1962-
if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))
1963-
return;
1964-
msleep(20);
1965-
}
1966-
19671943
mlx5e_reporter_tx_err_cqe(sq);
1968-
netdev_unlock(sq->netdev);
19691944
}
19701945

19711946
static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
@@ -5115,19 +5090,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
51155090
struct net_device *netdev = priv->netdev;
51165091
int i;
51175092

5118-
/* Recovering the TX queues implies re-enabling NAPI, which requires
5119-
* the netdev instance lock.
5120-
* However, channel closing flows have to wait for this work to finish
5121-
* while holding the same lock. So either get the lock or find that
5122-
* channels are being closed for other reason and this work is not
5123-
* relevant anymore.
5124-
*/
5125-
while (!netdev_trylock(netdev)) {
5126-
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state))
5127-
return;
5128-
msleep(20);
5129-
}
5130-
51315093
for (i = 0; i < netdev->real_num_tx_queues; i++) {
51325094
struct netdev_queue *dev_queue =
51335095
netdev_get_tx_queue(netdev, i);
@@ -5140,8 +5102,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
51405102
/* break if tried to reopened channels */
51415103
break;
51425104
}
5143-
5144-
netdev_unlock(netdev);
51455105
}
51465106

51475107
static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)

0 commit comments

Comments
 (0)