Skip to content

Commit 42f1d09

Browse files
shirazsaleemrleon
authored andcommitted
RDMA/irdma: Extend CQE Error and Flush Handling for GEN3 Devices
Enhance the CQE error and flush handling specific to GEN3 devices. Unlike GEN1/2 devices, which depend on software to generate completions in error, GEN3 devices leverage firmware to generate CQEs in error for all WQEs posted after a QP moves to an error state. Key changes include: - Updating the CQ poll logic to properly advance the CQ head in the event of a flush CQE. - Updating the flush logic for GEN3 to pass error WQE idx for SQ on an AE to flush out unprocessed WQEs in error. - Isolating the decoding of AE to flush codes into a separate routine irdma_ae_to_qp_err_code. This routine can now be leveraged to flush error CQEs on an AE and when error CQE is received for SRQ. Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com> Link: https://patch.msgid.link/20250827152545.2056-16-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni <jmoroni@google.com> Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent a24a29c commit 42f1d09

7 files changed

Lines changed: 277 additions & 188 deletions

File tree

drivers/infiniband/hw/irdma/ctrl.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2674,6 +2674,12 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
26742674
info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
26752675
info->ae_src) : 0;
26762676
set_64bit_val(wqe, 8, temp);
2677+
if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
2678+
set_64bit_val(wqe, 40,
2679+
FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, info->err_sq_idx));
2680+
set_64bit_val(wqe, 48,
2681+
FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, info->err_rq_idx));
2682+
}
26772683

26782684
hdr = qp->qp_uk.qp_id |
26792685
FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) |
@@ -2682,6 +2688,9 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
26822688
FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) |
26832689
FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) |
26842690
FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
2691+
if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
2692+
hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, info->err_sq_idx_valid) |
2693+
FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, info->err_rq_idx_valid);
26852694
dma_wmb(); /* make sure WQE is written before valid bit is set */
26862695

26872696
set_64bit_val(wqe, 24, hdr);

drivers/infiniband/hw/irdma/defs.h

Lines changed: 4 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -301,107 +301,6 @@ enum irdma_cqp_op_type {
301301
#define IRDMA_CQP_OP_GATHER_STATS 0x2e
302302
#define IRDMA_CQP_OP_UP_MAP 0x2f
303303

304-
/* Async Events codes */
305-
#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102
306-
#define IRDMA_AE_AMP_INVALID_STAG 0x0103
307-
#define IRDMA_AE_AMP_BAD_QP 0x0104
308-
#define IRDMA_AE_AMP_BAD_PD 0x0105
309-
#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106
310-
#define IRDMA_AE_AMP_BAD_STAG_INDEX 0x0107
311-
#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108
312-
#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109
313-
#define IRDMA_AE_AMP_TO_WRAP 0x010a
314-
#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c
315-
#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d
316-
#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS 0x010e
317-
#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH 0x0110
318-
#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111
319-
#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112
320-
#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113
321-
#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114
322-
#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115
323-
#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
324-
#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117
325-
#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
326-
#define IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS 0x0119
327-
#define IRDMA_AE_AMP_MWBIND_TO_INVALID_PARENT 0x011a
328-
#define IRDMA_AE_AMP_MWBIND_BIND_DISABLED 0x011b
329-
#define IRDMA_AE_PRIV_OPERATION_DENIED 0x011c
330-
#define IRDMA_AE_AMP_INVALIDATE_TYPE1_MW 0x011d
331-
#define IRDMA_AE_AMP_MWBIND_ZERO_BASED_TYPE1_MW 0x011e
332-
#define IRDMA_AE_AMP_FASTREG_INVALID_PBL_HPS_CFG 0x011f
333-
#define IRDMA_AE_AMP_MWBIND_WRONG_TYPE 0x0120
334-
#define IRDMA_AE_AMP_FASTREG_PBLE_MISMATCH 0x0121
335-
#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG 0x0132
336-
#define IRDMA_AE_UDA_XMIT_BAD_PD 0x0133
337-
#define IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT 0x0134
338-
#define IRDMA_AE_UDA_L4LEN_INVALID 0x0135
339-
#define IRDMA_AE_BAD_CLOSE 0x0201
340-
#define IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE 0x0202
341-
#define IRDMA_AE_CQ_OPERATION_ERROR 0x0203
342-
#define IRDMA_AE_RDMA_READ_WHILE_ORD_ZERO 0x0205
343-
#define IRDMA_AE_STAG_ZERO_INVALID 0x0206
344-
#define IRDMA_AE_IB_RREQ_AND_Q1_FULL 0x0207
345-
#define IRDMA_AE_IB_INVALID_REQUEST 0x0208
346-
#define IRDMA_AE_SRQ_LIMIT 0x0209
347-
#define IRDMA_AE_WQE_UNEXPECTED_OPCODE 0x020a
348-
#define IRDMA_AE_WQE_INVALID_PARAMETER 0x020b
349-
#define IRDMA_AE_WQE_INVALID_FRAG_DATA 0x020c
350-
#define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d
351-
#define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e
352-
#define IRDMA_AE_SRQ_CATASTROPHIC_ERROR 0x020f
353-
#define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220
354-
#define IRDMA_AE_ATOMIC_ALIGNMENT 0x0221
355-
#define IRDMA_AE_ATOMIC_MASK 0x0222
356-
#define IRDMA_AE_INVALID_REQUEST 0x0223
357-
#define IRDMA_AE_PCIE_ATOMIC_DISABLE 0x0224
358-
#define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301
359-
#define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303
360-
#define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304
361-
#define IRDMA_AE_DDP_UBE_INVALID_MO 0x0305
362-
#define IRDMA_AE_DDP_UBE_INVALID_MSN_NO_BUFFER_AVAILABLE 0x0306
363-
#define IRDMA_AE_DDP_UBE_INVALID_QN 0x0307
364-
#define IRDMA_AE_DDP_NO_L_BIT 0x0308
365-
#define IRDMA_AE_RDMAP_ROE_INVALID_RDMAP_VERSION 0x0311
366-
#define IRDMA_AE_RDMAP_ROE_UNEXPECTED_OPCODE 0x0312
367-
#define IRDMA_AE_ROE_INVALID_RDMA_READ_REQUEST 0x0313
368-
#define IRDMA_AE_ROE_INVALID_RDMA_WRITE_OR_READ_RESP 0x0314
369-
#define IRDMA_AE_ROCE_RSP_LENGTH_ERROR 0x0316
370-
#define IRDMA_AE_ROCE_EMPTY_MCG 0x0380
371-
#define IRDMA_AE_ROCE_BAD_MC_IP_ADDR 0x0381
372-
#define IRDMA_AE_ROCE_BAD_MC_QPID 0x0382
373-
#define IRDMA_AE_MCG_QP_PROTOCOL_MISMATCH 0x0383
374-
#define IRDMA_AE_INVALID_ARP_ENTRY 0x0401
375-
#define IRDMA_AE_INVALID_TCP_OPTION_RCVD 0x0402
376-
#define IRDMA_AE_STALE_ARP_ENTRY 0x0403
377-
#define IRDMA_AE_INVALID_AH_ENTRY 0x0406
378-
#define IRDMA_AE_LLP_CLOSE_COMPLETE 0x0501
379-
#define IRDMA_AE_LLP_CONNECTION_RESET 0x0502
380-
#define IRDMA_AE_LLP_FIN_RECEIVED 0x0503
381-
#define IRDMA_AE_LLP_RECEIVED_MARKER_AND_LENGTH_FIELDS_DONT_MATCH 0x0504
382-
#define IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR 0x0505
383-
#define IRDMA_AE_LLP_SEGMENT_TOO_SMALL 0x0507
384-
#define IRDMA_AE_LLP_SYN_RECEIVED 0x0508
385-
#define IRDMA_AE_LLP_TERMINATE_RECEIVED 0x0509
386-
#define IRDMA_AE_LLP_TOO_MANY_RETRIES 0x050a
387-
#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
388-
#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c
389-
#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e
390-
#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f
391-
#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520
392-
#define IRDMA_AE_RESET_SENT 0x0601
393-
#define IRDMA_AE_TERMINATE_SENT 0x0602
394-
#define IRDMA_AE_RESET_NOT_SENT 0x0603
395-
#define IRDMA_AE_LCE_QP_CATASTROPHIC 0x0700
396-
#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
397-
#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702
398-
#define IRDMA_AE_REMOTE_QP_CATASTROPHIC 0x0703
399-
#define IRDMA_AE_LOCAL_QP_CATASTROPHIC 0x0704
400-
#define IRDMA_AE_RCE_QP_CATASTROPHIC 0x0705
401-
#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900
402-
#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901
403-
#define IRDMA_AE_ADAPTER_CATASTROPHIC 0x0B0B
404-
405304
#define FLD_LS_64(dev, val, field) \
406305
(((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
407306
#define FLD_RS_64(dev, val, field) \
@@ -771,6 +670,10 @@ enum irdma_cqp_op_type {
771670
#define IRDMA_CQPSQ_FWQE_USERFLCODE BIT_ULL(60)
772671
#define IRDMA_CQPSQ_FWQE_FLUSHSQ BIT_ULL(61)
773672
#define IRDMA_CQPSQ_FWQE_FLUSHRQ BIT_ULL(62)
673+
#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID BIT_ULL(42)
674+
#define IRDMA_CQPSQ_FWQE_ERR_SQ_IDX GENMASK_ULL(49, 32)
675+
#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID BIT_ULL(43)
676+
#define IRDMA_CQPSQ_FWQE_ERR_RQ_IDX GENMASK_ULL(46, 32)
774677
#define IRDMA_CQPSQ_MAPT_PORT GENMASK_ULL(15, 0)
775678
#define IRDMA_CQPSQ_MAPT_ADDPORT BIT_ULL(62)
776679
#define IRDMA_CQPSQ_UPESD_SDCMD GENMASK_ULL(31, 0)

drivers/infiniband/hw/irdma/hw.c

Lines changed: 27 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -135,76 +135,24 @@ static void irdma_process_ceq(struct irdma_pci_f *rf, struct irdma_ceq *ceq)
135135
static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
136136
struct irdma_aeqe_info *info)
137137
{
138+
struct qp_err_code qp_err;
139+
138140
qp->sq_flush_code = info->sq;
139141
qp->rq_flush_code = info->rq;
140-
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
141-
142-
switch (info->ae_id) {
143-
case IRDMA_AE_AMP_BOUNDS_VIOLATION:
144-
case IRDMA_AE_AMP_INVALID_STAG:
145-
case IRDMA_AE_AMP_RIGHTS_VIOLATION:
146-
case IRDMA_AE_AMP_UNALLOCATED_STAG:
147-
case IRDMA_AE_AMP_BAD_PD:
148-
case IRDMA_AE_AMP_BAD_QP:
149-
case IRDMA_AE_AMP_BAD_STAG_KEY:
150-
case IRDMA_AE_AMP_BAD_STAG_INDEX:
151-
case IRDMA_AE_AMP_TO_WRAP:
152-
case IRDMA_AE_PRIV_OPERATION_DENIED:
153-
qp->flush_code = FLUSH_PROT_ERR;
154-
qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
155-
break;
156-
case IRDMA_AE_UDA_XMIT_BAD_PD:
157-
case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
158-
qp->flush_code = FLUSH_LOC_QP_OP_ERR;
159-
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
160-
break;
161-
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
162-
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT:
163-
case IRDMA_AE_UDA_L4LEN_INVALID:
164-
case IRDMA_AE_DDP_UBE_INVALID_MO:
165-
case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
166-
qp->flush_code = FLUSH_LOC_LEN_ERR;
167-
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
168-
break;
169-
case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS:
170-
case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
171-
qp->flush_code = FLUSH_REM_ACCESS_ERR;
172-
qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
173-
break;
174-
case IRDMA_AE_LLP_SEGMENT_TOO_SMALL:
175-
case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR:
176-
case IRDMA_AE_ROCE_RSP_LENGTH_ERROR:
177-
case IRDMA_AE_IB_REMOTE_OP_ERROR:
178-
qp->flush_code = FLUSH_REM_OP_ERR;
179-
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
180-
break;
181-
case IRDMA_AE_LCE_QP_CATASTROPHIC:
182-
qp->flush_code = FLUSH_FATAL_ERR;
183-
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
184-
break;
185-
case IRDMA_AE_IB_RREQ_AND_Q1_FULL:
186-
qp->flush_code = FLUSH_GENERAL_ERR;
187-
break;
188-
case IRDMA_AE_LLP_TOO_MANY_RETRIES:
189-
qp->flush_code = FLUSH_RETRY_EXC_ERR;
190-
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
191-
break;
192-
case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
193-
case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
194-
case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
195-
case IRDMA_AE_AMP_MWBIND_VALID_STAG:
196-
qp->flush_code = FLUSH_MW_BIND_ERR;
197-
qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
198-
break;
199-
case IRDMA_AE_IB_INVALID_REQUEST:
200-
qp->flush_code = FLUSH_REM_INV_REQ_ERR;
201-
qp->event_type = IRDMA_QP_EVENT_REQ_ERR;
202-
break;
203-
default:
204-
qp->flush_code = FLUSH_GENERAL_ERR;
205-
qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC;
206-
break;
142+
if (qp->qp_uk.uk_attrs->hw_rev >= IRDMA_GEN_3) {
143+
if (info->sq) {
144+
qp->err_sq_idx_valid = true;
145+
qp->err_sq_idx = info->wqe_idx;
146+
}
147+
if (info->rq) {
148+
qp->err_rq_idx_valid = true;
149+
qp->err_rq_idx = info->wqe_idx;
150+
}
207151
}
152+
153+
qp_err = irdma_ae_to_qp_err_code(info->ae_id);
154+
qp->flush_code = qp_err.flush_code;
155+
qp->event_type = qp_err.event_type;
208156
}
209157

210158
/**
@@ -320,7 +268,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
320268
if (info->ae_id != IRDMA_AE_QP_SUSPEND_COMPLETE)
321269
iwqp->last_aeq = info->ae_id;
322270
spin_unlock_irqrestore(&iwqp->lock, flags);
323-
ctx_info = &iwqp->ctx_info;
324271
} else if (info->srq) {
325272
if (info->ae_id != IRDMA_AE_SRQ_LIMIT)
326273
continue;
@@ -466,9 +413,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
466413
default:
467414
ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d, ae_src=%d\n",
468415
info->ae_id, info->qp, info->qp_cq_id, info->ae_src);
469-
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
470-
ctx_info->roce_info->err_rq_idx_valid = info->rq;
471-
if (info->rq) {
416+
ctx_info = &iwqp->ctx_info;
417+
if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) {
418+
ctx_info->roce_info->err_rq_idx_valid =
419+
ctx_info->srq_valid ? false : info->err_rq_idx_valid;
420+
if (ctx_info->roce_info->err_rq_idx_valid) {
472421
ctx_info->roce_info->err_rq_idx = info->wqe_idx;
473422
irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va,
474423
ctx_info);
@@ -2832,7 +2781,9 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
28322781
struct irdma_pci_f *rf = iwqp->iwdev->rf;
28332782
u8 flush_code = iwqp->sc_qp.flush_code;
28342783

2835-
if (!(flush_mask & IRDMA_FLUSH_SQ) && !(flush_mask & IRDMA_FLUSH_RQ))
2784+
if ((!(flush_mask & IRDMA_FLUSH_SQ) &&
2785+
!(flush_mask & IRDMA_FLUSH_RQ)) ||
2786+
((flush_mask & IRDMA_REFLUSH) && rf->rdma_ver >= IRDMA_GEN_3))
28362787
return;
28372788

28382789
/* Set flush info fields*/
@@ -2845,6 +2796,10 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
28452796
info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
28462797
info.rq_minor_code = FLUSH_GENERAL_ERR;
28472798
info.userflushcode = true;
2799+
info.err_sq_idx_valid = iwqp->sc_qp.err_sq_idx_valid;
2800+
info.err_sq_idx = iwqp->sc_qp.err_sq_idx;
2801+
info.err_rq_idx_valid = iwqp->sc_qp.err_rq_idx_valid;
2802+
info.err_rq_idx = iwqp->sc_qp.err_rq_idx;
28482803

28492804
if (flush_mask & IRDMA_REFLUSH) {
28502805
if (info.sq)

drivers/infiniband/hw/irdma/type.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,6 @@ enum irdma_term_mpa_errors {
9797
MPA_REQ_RSP = 0x04,
9898
};
9999

100-
enum irdma_qp_event_type {
101-
IRDMA_QP_EVENT_CATASTROPHIC,
102-
IRDMA_QP_EVENT_ACCESS_ERR,
103-
IRDMA_QP_EVENT_REQ_ERR,
104-
};
105-
106100
enum irdma_hw_stats_index {
107101
/* gen1 - 32-bit */
108102
IRDMA_HW_STAT_INDEX_IP4RXDISCARD = 0,
@@ -565,6 +559,10 @@ struct irdma_sc_qp {
565559
bool virtual_map:1;
566560
bool flush_sq:1;
567561
bool flush_rq:1;
562+
bool err_sq_idx_valid:1;
563+
bool err_rq_idx_valid:1;
564+
u32 err_sq_idx;
565+
u32 err_rq_idx;
568566
bool sq_flush_code:1;
569567
bool rq_flush_code:1;
570568
u32 pkt_limit;
@@ -1289,6 +1287,8 @@ struct irdma_cqp_manage_push_page_info {
12891287
};
12901288

12911289
struct irdma_qp_flush_info {
1290+
u32 err_sq_idx;
1291+
u32 err_rq_idx;
12921292
u16 sq_minor_code;
12931293
u16 sq_major_code;
12941294
u16 rq_minor_code;
@@ -1299,6 +1299,8 @@ struct irdma_qp_flush_info {
12991299
bool rq:1;
13001300
bool userflushcode:1;
13011301
bool generate_ae:1;
1302+
bool err_sq_idx_valid:1;
1303+
bool err_rq_idx_valid:1;
13021304
};
13031305

13041306
struct irdma_gen_ae_info {

drivers/infiniband/hw/irdma/uk.c

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,6 +1148,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
11481148
__le64 *cqe;
11491149
struct irdma_qp_uk *qp;
11501150
struct irdma_srq_uk *srq;
1151+
struct qp_err_code qp_err;
11511152
u8 is_srq;
11521153
struct irdma_ring *pring = NULL;
11531154
u32 wqe_idx;
@@ -1233,16 +1234,35 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
12331234
if (info->error) {
12341235
info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3);
12351236
info->minor_err = FIELD_GET(IRDMA_CQ_MINERR, qword3);
1236-
if (info->major_err == IRDMA_FLUSH_MAJOR_ERR) {
1237-
info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
1237+
switch (info->major_err) {
1238+
case IRDMA_SRQFLUSH_RSVD_MAJOR_ERR:
1239+
qp_err = irdma_ae_to_qp_err_code(info->minor_err);
1240+
info->minor_err = qp_err.flush_code;
1241+
fallthrough;
1242+
case IRDMA_FLUSH_MAJOR_ERR:
12381243
/* Set the min error to standard flush error code for remaining cqes */
12391244
if (info->minor_err != FLUSH_GENERAL_ERR) {
12401245
qword3 &= ~IRDMA_CQ_MINERR;
12411246
qword3 |= FIELD_PREP(IRDMA_CQ_MINERR, FLUSH_GENERAL_ERR);
12421247
set_64bit_val(cqe, 24, qword3);
12431248
}
1244-
} else {
1245-
info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
1249+
info->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
1250+
break;
1251+
default:
1252+
#define IRDMA_CIE_SIGNATURE 0xE
1253+
#define IRDMA_CQMAJERR_HIGH_NIBBLE GENMASK(15, 12)
1254+
if (info->q_type == IRDMA_CQE_QTYPE_SQ &&
1255+
qp->qp_type == IRDMA_QP_TYPE_ROCE_UD &&
1256+
FIELD_GET(IRDMA_CQMAJERR_HIGH_NIBBLE, info->major_err)
1257+
== IRDMA_CIE_SIGNATURE) {
1258+
info->error = 0;
1259+
info->major_err = 0;
1260+
info->minor_err = 0;
1261+
info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
1262+
} else {
1263+
info->comp_status = IRDMA_COMPL_STATUS_UNKNOWN;
1264+
}
1265+
break;
12461266
}
12471267
} else {
12481268
info->comp_status = IRDMA_COMPL_STATUS_SUCCESS;
@@ -1251,7 +1271,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
12511271
get_64bit_val(cqe, 0, &qword0);
12521272
get_64bit_val(cqe, 16, &qword2);
12531273

1254-
info->tcp_seq_num_rtt = (u32)FIELD_GET(IRDMACQ_TCPSEQNUMRTT, qword0);
12551274
info->qp_id = (u32)FIELD_GET(IRDMACQ_QPID, qword2);
12561275
info->ud_src_qpn = (u32)FIELD_GET(IRDMACQ_UDSRCQPN, qword2);
12571276

@@ -1377,9 +1396,15 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
13771396
ret_code = 0;
13781397

13791398
exit:
1380-
if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED)
1399+
if (!ret_code && info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) {
13811400
if (pring && IRDMA_RING_MORE_WORK(*pring))
1382-
move_cq_head = false;
1401+
/* Park CQ head during a flush to generate additional CQEs
1402+
* from SW for all unprocessed WQEs. For GEN3 and beyond
1403+
* FW will generate/flush these CQEs so move to the next CQE
1404+
*/
1405+
move_cq_head = qp->uk_attrs->hw_rev <= IRDMA_GEN_2 ?
1406+
false : true;
1407+
}
13831408

13841409
if (move_cq_head) {
13851410
IRDMA_RING_MOVE_HEAD_NOCHECK(cq->cq_ring);

0 commit comments

Comments
 (0)