Skip to content

Commit e5eba42

Browse files
Akiva GoldbergerPaolo Abeni
authored andcommitted
mlx5: Fix default values in create CQ
Currently, CQs without a completion function are assigned the mlx5_add_cq_to_tasklet function by default. This is problematic since only user CQs created through the mlx5_ib driver are intended to use this function. Additionally, all CQs that will use doorbells instead of polling for completions must call mlx5_cq_arm. However, the default CQ creation flow leaves a valid value in the CQ's arm_db field, allowing FW to send interrupts to polling-only CQs in certain corner cases. These two factors would allow a polling-only kernel CQ to be triggered by an EQ interrupt and call a completion function intended only for user CQs, causing a null pointer exception. Some areas in the driver have prevented this issue with one-off fixes but did not address the root cause. This patch fixes the described issue by adding defaults to the create CQ flow. It adds a default dummy completion function to protect against null pointer exceptions, and it sets an invalid command sequence number by default in kernel CQs to prevent the FW from sending an interrupt to the CQ until it is armed. User CQs are responsible for their own initialization values. Callers of mlx5_core_create_cq are responsible for changing the completion function and arming the CQ per their needs. Fixes: cdd04f4 ("net/mlx5: Add support to create SQ and CQ for ASO") Signed-off-by: Akiva Goldberger <agoldberger@nvidia.com> Reviewed-by: Moshe Shemesh <moshe@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Acked-by: Leon Romanovsky <leon@kernel.org> Link: https://patch.msgid.link/1762681743-1084694-1-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
1 parent ed6b563 commit e5eba42

8 files changed

Lines changed: 44 additions & 48 deletions

File tree

drivers/infiniband/hw/mlx5/cq.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,15 +1020,18 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
10201020
if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
10211021
MLX5_SET(cqc, cqc, oi, 1);
10221022

1023+
if (udata) {
1024+
cq->mcq.comp = mlx5_add_cq_to_tasklet;
1025+
cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
1026+
} else {
1027+
cq->mcq.comp = mlx5_ib_cq_comp;
1028+
}
1029+
10231030
err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out));
10241031
if (err)
10251032
goto err_cqb;
10261033

10271034
mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
1028-
if (udata)
1029-
cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp;
1030-
else
1031-
cq->mcq.comp = mlx5_ib_cq_comp;
10321035
cq->mcq.event = mlx5_ib_cq_event;
10331036

10341037
INIT_LIST_HEAD(&cq->wc_list);

drivers/net/ethernet/mellanox/mlx5/core/cq.c

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ void mlx5_cq_tasklet_cb(struct tasklet_struct *t)
6666
tasklet_schedule(&ctx->task);
6767
}
6868

69-
static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
70-
struct mlx5_eqe *eqe)
69+
void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
70+
struct mlx5_eqe *eqe)
7171
{
7272
unsigned long flags;
7373
struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
@@ -95,7 +95,15 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
9595
if (schedule_tasklet)
9696
tasklet_schedule(&tasklet_ctx->task);
9797
}
98+
EXPORT_SYMBOL(mlx5_add_cq_to_tasklet);
9899

100+
static void mlx5_core_cq_dummy_cb(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
101+
{
102+
mlx5_core_err(cq->eq->core.dev,
103+
"CQ default completion callback, CQ #%u\n", cq->cqn);
104+
}
105+
106+
#define MLX5_CQ_INIT_CMD_SN cpu_to_be32(2 << 28)
99107
/* Callers must verify outbox status in case of err */
100108
int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
101109
u32 *in, int inlen, u32 *out, int outlen)
@@ -121,10 +129,19 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
121129
cq->arm_sn = 0;
122130
cq->eq = eq;
123131
cq->uid = MLX5_GET(create_cq_in, in, uid);
132+
133+
/* Kernel CQs must set the arm_db address prior to calling
134+
* this function, allowing for the proper value to be
135+
* initialized. User CQs are responsible for their own
136+
* initialization since they do not use the arm_db field.
137+
*/
138+
if (cq->arm_db)
139+
*cq->arm_db = MLX5_CQ_INIT_CMD_SN;
140+
124141
refcount_set(&cq->refcount, 1);
125142
init_completion(&cq->free);
126143
if (!cq->comp)
127-
cq->comp = mlx5_add_cq_to_tasklet;
144+
cq->comp = mlx5_core_cq_dummy_cb;
128145
/* assuming CQ will be deleted before the EQ */
129146
cq->tasklet_ctx.priv = &eq->tasklet_ctx;
130147
INIT_LIST_HEAD(&cq->tasklet_ctx.list);

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2219,7 +2219,6 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
22192219
mcq->set_ci_db = cq->wq_ctrl.db.db;
22202220
mcq->arm_db = cq->wq_ctrl.db.db + 1;
22212221
*mcq->set_ci_db = 0;
2222-
*mcq->arm_db = 0;
22232222
mcq->vector = param->eq_ix;
22242223
mcq->comp = mlx5e_completion_event;
22252224
mcq->event = mlx5e_cq_error_event;

drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,13 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
421421
__be64 *pas;
422422
u32 i;
423423

424+
conn->cq.mcq.cqe_sz = 64;
425+
conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db;
426+
conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1;
427+
*conn->cq.mcq.set_ci_db = 0;
428+
conn->cq.mcq.vector = 0;
429+
conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
430+
424431
cq_size = roundup_pow_of_two(cq_size);
425432
MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size));
426433

@@ -468,15 +475,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
468475
if (err)
469476
goto err_cqwq;
470477

471-
conn->cq.mcq.cqe_sz = 64;
472-
conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db;
473-
conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1;
474-
*conn->cq.mcq.set_ci_db = 0;
475-
*conn->cq.mcq.arm_db = 0;
476-
conn->cq.mcq.vector = 0;
477-
conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete;
478478
tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet);
479-
480479
mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn);
481480

482481
goto out;

drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -873,12 +873,6 @@ static int hws_send_ring_open_sq(struct mlx5hws_context *ctx,
873873
return err;
874874
}
875875

876-
static void hws_cq_complete(struct mlx5_core_cq *mcq,
877-
struct mlx5_eqe *eqe)
878-
{
879-
pr_err("CQ completion CQ: #%u\n", mcq->cqn);
880-
}
881-
882876
static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev,
883877
int numa_node,
884878
struct mlx5hws_send_engine *queue,
@@ -901,7 +895,6 @@ static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev,
901895
mcq->cqe_sz = 64;
902896
mcq->set_ci_db = cq->wq_ctrl.db.db;
903897
mcq->arm_db = cq->wq_ctrl.db.db + 1;
904-
mcq->comp = hws_cq_complete;
905898

906899
for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
907900
cqe = mlx5_cqwq_get_wqe(&cq->wq, i);

drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,12 +1049,6 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
10491049
return 0;
10501050
}
10511051

1052-
static void dr_cq_complete(struct mlx5_core_cq *mcq,
1053-
struct mlx5_eqe *eqe)
1054-
{
1055-
pr_err("CQ completion CQ: #%u\n", mcq->cqn);
1056-
}
1057-
10581052
static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
10591053
struct mlx5_uars_page *uar,
10601054
size_t ncqe)
@@ -1089,6 +1083,13 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
10891083
cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
10901084
}
10911085

1086+
cq->mcq.cqe_sz = 64;
1087+
cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
1088+
cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
1089+
*cq->mcq.set_ci_db = 0;
1090+
cq->mcq.vector = 0;
1091+
cq->mdev = mdev;
1092+
10921093
inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
10931094
sizeof(u64) * cq->wq_ctrl.buf.npages;
10941095
in = kvzalloc(inlen, GFP_KERNEL);
@@ -1112,27 +1113,12 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
11121113
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
11131114
mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
11141115

1115-
cq->mcq.comp = dr_cq_complete;
1116-
11171116
err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
11181117
kvfree(in);
11191118

11201119
if (err)
11211120
goto err_cqwq;
11221121

1123-
cq->mcq.cqe_sz = 64;
1124-
cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
1125-
cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
1126-
*cq->mcq.set_ci_db = 0;
1127-
1128-
/* set no-zero value, in order to avoid the HW to run db-recovery on
1129-
* CQ that used in polling mode.
1130-
*/
1131-
*cq->mcq.arm_db = cpu_to_be32(2 << 28);
1132-
1133-
cq->mcq.vector = 0;
1134-
cq->mdev = mdev;
1135-
11361122
return cq;
11371123

11381124
err_cqwq:

drivers/vdpa/mlx5/net/mlx5_vnet.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,8 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
573573
vcq->mcq.set_ci_db = vcq->db.db;
574574
vcq->mcq.arm_db = vcq->db.db + 1;
575575
vcq->mcq.cqe_sz = 64;
576+
vcq->mcq.comp = mlx5_vdpa_cq_comp;
577+
vcq->cqe = num_ent;
576578

577579
err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
578580
if (err)
@@ -612,10 +614,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
612614
if (err)
613615
goto err_vec;
614616

615-
vcq->mcq.comp = mlx5_vdpa_cq_comp;
616-
vcq->cqe = num_ent;
617-
vcq->mcq.set_ci_db = vcq->db.db;
618-
vcq->mcq.arm_db = vcq->db.db + 1;
619617
mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
620618
kfree(in);
621619
return 0;

include/linux/mlx5/cq.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq)
183183
complete(&cq->free);
184184
}
185185

186+
void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe);
186187
int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
187188
u32 *in, int inlen, u32 *out, int outlen);
188189
int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,

0 commit comments

Comments
 (0)