Skip to content

Commit ae3cb71

Browse files
gal-pressmankuba-moo
authored andcommitted
net/mlx5e: Fix misidentification of ASO CQE during poll loop
The ASO completion poll loop uses usleep_range() which can sleep much longer than requested due to scheduler latency. Under load, we witnessed a 20ms+ delay until the process was rescheduled, causing the jiffies based timeout to expire while the thread is sleeping. The original do-while loop structure (poll, sleep, check timeout) would exit without a final poll when waking after timeout, missing a CQE that arrived during sleep. Instead of the open-coded while loop, use the kernel's read_poll_timeout() which always performs an additional check after the sleep expiration, and is less error-prone. Note: read_poll_timeout() doesn't accept a sleep range, by passing 10 sleep_us the sleep range effectively changes from 2-10 to 3-10 usecs. Fixes: 739cfa3 ("net/mlx5: Make ASO poll CQ usable in atomic context") Fixes: 7e3fce8 ("net/mlx5e: Overcome slow response for first macsec ASO WQE") Signed-off-by: Gal Pressman <gal@nvidia.com> Reviewed-by: Jianbo Liu <jianbol@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Reviewed-by: Jacob Keller <Jacob.e.keller@intel.com> Link: https://patch.msgid.link/20260218072904.1764634-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
1 parent 47bf2e8 commit ae3cb71

2 files changed

Lines changed: 6 additions & 14 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
22
// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33

4+
#include <linux/iopoll.h>
45
#include <linux/math64.h>
56
#include "lib/aso.h"
67
#include "en/tc/post_act.h"
@@ -115,7 +116,6 @@ mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
115116
struct mlx5e_flow_meters *flow_meters;
116117
u8 cir_man, cir_exp, cbs_man, cbs_exp;
117118
struct mlx5_aso_wqe *aso_wqe;
118-
unsigned long expires;
119119
struct mlx5_aso *aso;
120120
u64 rate, burst;
121121
u8 ds_cnt;
@@ -187,12 +187,8 @@ mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev,
187187
mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl);
188188

189189
/* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */
190-
expires = jiffies + msecs_to_jiffies(10);
191-
do {
192-
err = mlx5_aso_poll_cq(aso, true);
193-
if (err)
194-
usleep_range(2, 10);
195-
} while (err && time_is_after_jiffies(expires));
190+
read_poll_timeout(mlx5_aso_poll_cq, err, !err, 10, 10 * USEC_PER_MSEC,
191+
false, aso, true);
196192
mutex_unlock(&flow_meters->aso_lock);
197193

198194
return err;

drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <linux/mlx5/mlx5_ifc.h>
66
#include <linux/xarray.h>
77
#include <linux/if_vlan.h>
8+
#include <linux/iopoll.h>
89

910
#include "en.h"
1011
#include "lib/aso.h"
@@ -1397,7 +1398,6 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac
13971398
struct mlx5e_macsec_aso *aso;
13981399
struct mlx5_aso_wqe *aso_wqe;
13991400
struct mlx5_aso *maso;
1400-
unsigned long expires;
14011401
int err;
14021402

14031403
aso = &macsec->aso;
@@ -1411,12 +1411,8 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac
14111411
macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL);
14121412

14131413
mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl);
1414-
expires = jiffies + msecs_to_jiffies(10);
1415-
do {
1416-
err = mlx5_aso_poll_cq(maso, false);
1417-
if (err)
1418-
usleep_range(2, 10);
1419-
} while (err && time_is_after_jiffies(expires));
1414+
read_poll_timeout(mlx5_aso_poll_cq, err, !err, 10, 10 * USEC_PER_MSEC,
1415+
false, maso, false);
14201416

14211417
if (err)
14221418
goto err_out;

0 commit comments

Comments
 (0)