Skip to content

Commit 323275a

Browse files
wenglianfarleon
authored andcommitted
RDMA/hns: Fix cpu stuck caused by printings during reset
During reset, cmd to destroy resources such as qp, cq, and mr may fail, and error logs will be printed. When a large number of resources are destroyed, there will be lots of printings, and it may lead to a cpu stuck. Delete some unnecessary printings and replace other printing functions in these paths with the ratelimited version. Fixes: 9a44353 ("IB/hns: Add driver files for hns RoCE driver") Fixes: c7bcb13 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Fixes: 70f9252 ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT") Fixes: 926a01d ("RDMA/hns: Add QP operations support for hip08 SoC") Signed-off-by: wenglianfa <wenglianfa@huawei.com> Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> Link: https://patch.msgid.link/20241024124000.2931869-6-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent d81fb65 commit 323275a

5 files changed

Lines changed: 41 additions & 48 deletions

File tree

drivers/infiniband/hw/hns/hns_roce_cq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
179179
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC,
180180
hr_cq->cqn);
181181
if (ret)
182-
dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
183-
hr_cq->cqn);
182+
dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n",
183+
ret, hr_cq->cqn);
184184

185185
xa_erase_irq(&cq_table->array, hr_cq->cqn);
186186

drivers/infiniband/hw/hns/hns_roce_hem.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,8 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
672672

673673
ret = hr_dev->hw->clear_hem(hr_dev, table, obj, HEM_HOP_STEP_DIRECT);
674674
if (ret)
675-
dev_warn(dev, "failed to clear HEM base address, ret = %d.\n",
676-
ret);
675+
dev_warn_ratelimited(dev, "failed to clear HEM base address, ret = %d.\n",
676+
ret);
677677

678678
hns_roce_free_hem(hr_dev, table->hem[i]);
679679
table->hem[i] = NULL;

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 33 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -373,19 +373,12 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
373373
static int check_send_valid(struct hns_roce_dev *hr_dev,
374374
struct hns_roce_qp *hr_qp)
375375
{
376-
struct ib_device *ibdev = &hr_dev->ib_dev;
377-
378376
if (unlikely(hr_qp->state == IB_QPS_RESET ||
379377
hr_qp->state == IB_QPS_INIT ||
380-
hr_qp->state == IB_QPS_RTR)) {
381-
ibdev_err(ibdev, "failed to post WQE, QP state %u!\n",
382-
hr_qp->state);
378+
hr_qp->state == IB_QPS_RTR))
383379
return -EINVAL;
384-
} else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) {
385-
ibdev_err(ibdev, "failed to post WQE, dev state %d!\n",
386-
hr_dev->state);
380+
else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN))
387381
return -EIO;
388-
}
389382

390383
return 0;
391384
}
@@ -2775,8 +2768,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
27752768
ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
27762769
IB_QPS_INIT, NULL);
27772770
if (ret) {
2778-
ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
2779-
ret);
2771+
ibdev_err_ratelimited(ibdev, "failed to modify qp to init, ret = %d.\n",
2772+
ret);
27802773
return ret;
27812774
}
27822775

@@ -3421,8 +3414,8 @@ static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp)
34213414

34223415
ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr);
34233416
if (ret) {
3424-
ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n",
3425-
ret);
3417+
ibdev_err_ratelimited(ibdev, "failed to post wqe for free mr, ret = %d.\n",
3418+
ret);
34263419
return ret;
34273420
}
34283421

@@ -3461,9 +3454,9 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
34613454

34623455
ret = free_mr_post_send_lp_wqe(hr_qp);
34633456
if (ret) {
3464-
ibdev_err(ibdev,
3465-
"failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
3466-
hr_qp->qpn, ret);
3457+
ibdev_err_ratelimited(ibdev,
3458+
"failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n",
3459+
hr_qp->qpn, ret);
34673460
break;
34683461
}
34693462

@@ -3474,16 +3467,16 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
34743467
while (cqe_cnt) {
34753468
npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
34763469
if (npolled < 0) {
3477-
ibdev_err(ibdev,
3478-
"failed to poll cqe for free mr, remain %d cqe.\n",
3479-
cqe_cnt);
3470+
ibdev_err_ratelimited(ibdev,
3471+
"failed to poll cqe for free mr, remain %d cqe.\n",
3472+
cqe_cnt);
34803473
goto out;
34813474
}
34823475

34833476
if (time_after(jiffies, end)) {
3484-
ibdev_err(ibdev,
3485-
"failed to poll cqe for free mr and timeout, remain %d cqe.\n",
3486-
cqe_cnt);
3477+
ibdev_err_ratelimited(ibdev,
3478+
"failed to poll cqe for free mr and timeout, remain %d cqe.\n",
3479+
cqe_cnt);
34873480
goto out;
34883481
}
34893482
cqe_cnt -= npolled;
@@ -5061,10 +5054,8 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
50615054
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
50625055
int ret = 0;
50635056

5064-
if (!check_qp_state(cur_state, new_state)) {
5065-
ibdev_err(&hr_dev->ib_dev, "Illegal state for QP!\n");
5057+
if (!check_qp_state(cur_state, new_state))
50665058
return -EINVAL;
5067-
}
50685059

50695060
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
50705061
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
@@ -5325,7 +5316,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
53255316
/* SW pass context to HW */
53265317
ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp);
53275318
if (ret) {
5328-
ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret);
5319+
ibdev_err_ratelimited(ibdev, "failed to modify QP, ret = %d.\n", ret);
53295320
goto out;
53305321
}
53315322

@@ -5463,15 +5454,17 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
54635454

54645455
ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context);
54655456
if (ret) {
5466-
ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret);
5457+
ibdev_err_ratelimited(ibdev,
5458+
"failed to query QPC, ret = %d.\n",
5459+
ret);
54675460
ret = -EINVAL;
54685461
goto out;
54695462
}
54705463

54715464
state = hr_reg_read(&context, QPC_QP_ST);
54725465
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
54735466
if (tmp_qp_state == -1) {
5474-
ibdev_err(ibdev, "Illegal ib_qp_state\n");
5467+
ibdev_err_ratelimited(ibdev, "Illegal ib_qp_state\n");
54755468
ret = -EINVAL;
54765469
goto out;
54775470
}
@@ -5564,9 +5557,9 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
55645557
ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0,
55655558
hr_qp->state, IB_QPS_RESET, udata);
55665559
if (ret)
5567-
ibdev_err(ibdev,
5568-
"failed to modify QP to RST, ret = %d.\n",
5569-
ret);
5560+
ibdev_err_ratelimited(ibdev,
5561+
"failed to modify QP to RST, ret = %d.\n",
5562+
ret);
55705563
}
55715564

55725565
send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL;
@@ -5609,9 +5602,9 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
56095602

56105603
ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata);
56115604
if (ret)
5612-
ibdev_err(&hr_dev->ib_dev,
5613-
"failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
5614-
hr_qp->qpn, ret);
5605+
ibdev_err_ratelimited(&hr_dev->ib_dev,
5606+
"failed to destroy QP, QPN = 0x%06lx, ret = %d.\n",
5607+
hr_qp->qpn, ret);
56155608

56165609
hns_roce_qp_destroy(hr_dev, hr_qp, udata);
56175610

@@ -5905,9 +5898,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
59055898
HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn);
59065899
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
59075900
if (ret)
5908-
ibdev_err(&hr_dev->ib_dev,
5909-
"failed to process cmd when modifying CQ, ret = %d.\n",
5910-
ret);
5901+
ibdev_err_ratelimited(&hr_dev->ib_dev,
5902+
"failed to process cmd when modifying CQ, ret = %d.\n",
5903+
ret);
59115904

59125905
err_out:
59135906
if (ret)
@@ -5931,9 +5924,9 @@ static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn,
59315924
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma,
59325925
HNS_ROCE_CMD_QUERY_CQC, cqn);
59335926
if (ret) {
5934-
ibdev_err(&hr_dev->ib_dev,
5935-
"failed to process cmd when querying CQ, ret = %d.\n",
5936-
ret);
5927+
ibdev_err_ratelimited(&hr_dev->ib_dev,
5928+
"failed to process cmd when querying CQ, ret = %d.\n",
5929+
ret);
59375930
goto err_mailbox;
59385931
}
59395932

drivers/infiniband/hw/hns/hns_roce_mr.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr
138138
key_to_hw_index(mr->key) &
139139
(hr_dev->caps.num_mtpts - 1));
140140
if (ret)
141-
ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
142-
ret);
141+
ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n",
142+
ret);
143143
}
144144

145145
free_mr_pbl(hr_dev, mr);

drivers/infiniband/hw/hns/hns_roce_srq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
151151
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ,
152152
srq->srqn);
153153
if (ret)
154-
dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
155-
ret, srq->srqn);
154+
dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
155+
ret, srq->srqn);
156156

157157
xa_erase_irq(&srq_table->xa, srq->srqn);
158158

0 commit comments

Comments
 (0)