Skip to content

Commit c7db0ab

Browse files
krzyczrleon
authored andcommitted
RDMA/irdma: Add GEN3 CQP support with deferred completions
GEN3 introduces asynchronous handling of Control QP (CQP) operations to minimize head-of-line blocking. Create the CQP using the updated GEN3- specific descriptor fields and implement the necessary support for this deferred completion mechanism. Signed-off-by: Krzysztof Czurylo <krzysztof.czurylo@intel.com> Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com> Link: https://patch.msgid.link/20250827152545.2056-5-tatyana.e.nikolova@intel.com Tested-by: Jacob Moroni <jmoroni@google.com> Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent 7d5a7cc commit c7db0ab

7 files changed

Lines changed: 438 additions & 15 deletions

File tree

drivers/infiniband/hw/irdma/ctrl.c

Lines changed: 250 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2741,6 +2741,89 @@ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val,
27412741
*error = FIELD_GET(IRDMA_CQPTAIL_CQP_OP_ERR, *val);
27422742
}
27432743

2744+
/**
2745+
* irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending list
2746+
* @dev: sc device struct
2747+
* @info: AE entry info
2748+
* @first: true if this is the first call to this handler for given AEQE
2749+
* @scratch: (out) scratch entry pointer
2750+
* @sw_def_info: (in/out) SW ticket value for this AE
2751+
*
2752+
* In case of AE_DEF_CMPL event, this function should be called in a loop
2753+
* until it returns NULL-ptr via scratch.
2754+
* For each call, it looks for a matching CQP request on pending list,
2755+
* removes it from the list and returns the pointer to the associated scratch
2756+
* entry.
2757+
* If this is the first call to this function for given AEQE, sw_def_info
2758+
* value is not used to find matching requests. Instead, it is populated
2759+
* with the value from the first matching cqp_request on the list.
2760+
* For subsequent calls, ooo_op->sw_def_info need to match the value passed
2761+
* by a caller.
2762+
*
2763+
* Return: scratch entry pointer for cqp_request to be released or NULL
2764+
* if no matching request is found.
2765+
*/
2766+
void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
2767+
struct irdma_aeqe_info *info,
2768+
bool first, u64 *scratch,
2769+
u32 *sw_def_info)
2770+
{
2771+
struct irdma_ooo_cqp_op *ooo_op;
2772+
unsigned long flags;
2773+
2774+
*scratch = 0;
2775+
2776+
spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags);
2777+
list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
2778+
if (ooo_op->deferred &&
2779+
((first && ooo_op->def_info == info->def_info) ||
2780+
(!first && ooo_op->sw_def_info == *sw_def_info))) {
2781+
*sw_def_info = ooo_op->sw_def_info;
2782+
*scratch = ooo_op->scratch;
2783+
2784+
list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail);
2785+
atomic64_inc(&dev->cqp->completed_ops);
2786+
2787+
break;
2788+
}
2789+
}
2790+
spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags);
2791+
2792+
if (first && !*scratch)
2793+
ibdev_dbg(to_ibdev(dev),
2794+
"AEQ: deferred completion with unknown ticket: def_info 0x%x\n",
2795+
info->def_info);
2796+
}
2797+
2798+
/**
2799+
* irdma_sc_cqp_cleanup_handler - remove requests from pending list
2800+
* @dev: sc device struct
2801+
*
2802+
* This function should be called in a loop from irdma_cleanup_pending_cqp_op.
2803+
* For each call, it returns first CQP request on pending list, removes it
2804+
* from the list and returns the pointer to the associated scratch entry.
2805+
*
2806+
* Return: scratch entry pointer for cqp_request to be released or NULL
2807+
* if pending list is empty.
2808+
*/
2809+
u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev)
2810+
{
2811+
struct irdma_ooo_cqp_op *ooo_op;
2812+
u64 scratch = 0;
2813+
2814+
list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
2815+
scratch = ooo_op->scratch;
2816+
2817+
list_del(&ooo_op->list_entry);
2818+
list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail);
2819+
atomic64_inc(&dev->cqp->completed_ops);
2820+
2821+
break;
2822+
}
2823+
2824+
return scratch;
2825+
}
2826+
27442827
/**
27452828
* irdma_cqp_poll_registers - poll cqp registers
27462829
* @cqp: struct for cqp hw
@@ -3126,6 +3209,8 @@ void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq)
31263209
int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
31273210
struct irdma_cqp_init_info *info)
31283211
{
3212+
struct irdma_ooo_cqp_op *ooo_op;
3213+
u32 num_ooo_ops;
31293214
u8 hw_sq_size;
31303215

31313216
if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 ||
@@ -3156,17 +3241,43 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
31563241
cqp->rocev2_rto_policy = info->rocev2_rto_policy;
31573242
cqp->protocol_used = info->protocol_used;
31583243
memcpy(&cqp->dcqcn_params, &info->dcqcn_params, sizeof(cqp->dcqcn_params));
3244+
if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
3245+
cqp->ooisc_blksize = info->ooisc_blksize;
3246+
cqp->rrsp_blksize = info->rrsp_blksize;
3247+
cqp->q1_blksize = info->q1_blksize;
3248+
cqp->xmit_blksize = info->xmit_blksize;
3249+
cqp->blksizes_valid = info->blksizes_valid;
3250+
cqp->ts_shift = info->ts_shift;
3251+
cqp->ts_override = info->ts_override;
3252+
cqp->en_fine_grained_timers = info->en_fine_grained_timers;
3253+
cqp->pe_en_vf_cnt = info->pe_en_vf_cnt;
3254+
cqp->ooo_op_array = info->ooo_op_array;
3255+
/* initialize the OOO lists */
3256+
INIT_LIST_HEAD(&cqp->ooo_avail);
3257+
INIT_LIST_HEAD(&cqp->ooo_pnd);
3258+
if (cqp->ooo_op_array) {
3259+
/* Populate avail list entries */
3260+
for (num_ooo_ops = 0, ooo_op = info->ooo_op_array;
3261+
num_ooo_ops < cqp->sq_size;
3262+
num_ooo_ops++, ooo_op++)
3263+
list_add(&ooo_op->list_entry, &cqp->ooo_avail);
3264+
}
3265+
}
31593266
info->dev->cqp = cqp;
31603267

31613268
IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size);
3269+
cqp->last_def_cmpl_ticket = 0;
3270+
cqp->sw_def_cmpl_ticket = 0;
31623271
cqp->requested_ops = 0;
31633272
atomic64_set(&cqp->completed_ops, 0);
31643273
/* for the cqp commands backlog. */
31653274
INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head);
31663275

31673276
writel(0, cqp->dev->hw_regs[IRDMA_CQPTAIL]);
3168-
writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]);
3169-
writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
3277+
if (cqp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) {
3278+
writel(0, cqp->dev->hw_regs[IRDMA_CQPDB]);
3279+
writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
3280+
}
31703281

31713282
ibdev_dbg(to_ibdev(cqp->dev),
31723283
"WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n",
@@ -3198,6 +3309,7 @@ int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err)
31983309
return -ENOMEM;
31993310

32003311
spin_lock_init(&cqp->dev->cqp_lock);
3312+
spin_lock_init(&cqp->ooo_list_lock);
32013313

32023314
temp = FIELD_PREP(IRDMA_CQPHC_SQSIZE, cqp->hw_sq_size) |
32033315
FIELD_PREP(IRDMA_CQPHC_SVER, cqp->struct_ver) |
@@ -3209,12 +3321,29 @@ int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err)
32093321
FIELD_PREP(IRDMA_CQPHC_PROTOCOL_USED,
32103322
cqp->protocol_used);
32113323
}
3324+
if (hw_rev >= IRDMA_GEN_3)
3325+
temp |= FIELD_PREP(IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS,
3326+
cqp->en_fine_grained_timers);
32123327

32133328
set_64bit_val(cqp->host_ctx, 0, temp);
32143329
set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
32153330

32163331
temp = FIELD_PREP(IRDMA_CQPHC_ENABLED_VFS, cqp->ena_vf_count) |
32173332
FIELD_PREP(IRDMA_CQPHC_HMC_PROFILE, cqp->hmc_profile);
3333+
3334+
if (hw_rev >= IRDMA_GEN_3)
3335+
temp |= FIELD_PREP(IRDMA_CQPHC_OOISC_BLKSIZE,
3336+
cqp->ooisc_blksize) |
3337+
FIELD_PREP(IRDMA_CQPHC_RRSP_BLKSIZE,
3338+
cqp->rrsp_blksize) |
3339+
FIELD_PREP(IRDMA_CQPHC_Q1_BLKSIZE, cqp->q1_blksize) |
3340+
FIELD_PREP(IRDMA_CQPHC_XMIT_BLKSIZE,
3341+
cqp->xmit_blksize) |
3342+
FIELD_PREP(IRDMA_CQPHC_BLKSIZES_VALID,
3343+
cqp->blksizes_valid) |
3344+
FIELD_PREP(IRDMA_CQPHC_TIMESTAMP_OVERRIDE,
3345+
cqp->ts_override) |
3346+
FIELD_PREP(IRDMA_CQPHC_TS_SHIFT, cqp->ts_shift);
32183347
set_64bit_val(cqp->host_ctx, 16, temp);
32193348
set_64bit_val(cqp->host_ctx, 24, (uintptr_t)cqp);
32203349
temp = FIELD_PREP(IRDMA_CQPHC_HW_MAJVER, cqp->hw_maj_ver) |
@@ -3375,6 +3504,87 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
33753504
writel(ccq->cq_uk.cq_id, ccq->dev->cq_arm_db);
33763505
}
33773506

3507+
/**
3508+
* irdma_sc_process_def_cmpl - process deferred or pending completion
3509+
* @cqp: CQP sc struct
3510+
* @info: CQP CQE info
3511+
* @wqe_idx: CQP WQE descriptor index
3512+
* @def_info: deferred op ticket value or out-of-order completion id
3513+
* @def_cmpl: true for deferred completion, false for pending (RCA)
3514+
*/
3515+
static void irdma_sc_process_def_cmpl(struct irdma_sc_cqp *cqp,
3516+
struct irdma_ccq_cqe_info *info,
3517+
u32 wqe_idx, u32 def_info, bool def_cmpl)
3518+
{
3519+
struct irdma_ooo_cqp_op *ooo_op;
3520+
unsigned long flags;
3521+
3522+
/* Deferred and out-of-order completions share the same list of pending
3523+
* completions. Since the list can be also accessed from AE handler,
3524+
* it must be protected by a lock.
3525+
*/
3526+
spin_lock_irqsave(&cqp->ooo_list_lock, flags);
3527+
3528+
/* For deferred completions bump up SW completion ticket value. */
3529+
if (def_cmpl) {
3530+
cqp->last_def_cmpl_ticket = def_info;
3531+
cqp->sw_def_cmpl_ticket++;
3532+
}
3533+
if (!list_empty(&cqp->ooo_avail)) {
3534+
ooo_op = (struct irdma_ooo_cqp_op *)
3535+
list_entry(cqp->ooo_avail.next,
3536+
struct irdma_ooo_cqp_op, list_entry);
3537+
3538+
list_del(&ooo_op->list_entry);
3539+
ooo_op->scratch = info->scratch;
3540+
ooo_op->def_info = def_info;
3541+
ooo_op->sw_def_info = cqp->sw_def_cmpl_ticket;
3542+
ooo_op->deferred = def_cmpl;
3543+
ooo_op->wqe_idx = wqe_idx;
3544+
/* Pending completions must be chronologically ordered,
3545+
* so adding at the end of list.
3546+
*/
3547+
list_add_tail(&ooo_op->list_entry, &cqp->ooo_pnd);
3548+
}
3549+
spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
3550+
3551+
info->pending = true;
3552+
}
3553+
3554+
/**
3555+
* irdma_sc_process_ooo_cmpl - process out-of-order (final) completion
3556+
* @cqp: CQP sc struct
3557+
* @info: CQP CQE info
3558+
* @def_info: out-of-order completion id
3559+
*/
3560+
static void irdma_sc_process_ooo_cmpl(struct irdma_sc_cqp *cqp,
3561+
struct irdma_ccq_cqe_info *info,
3562+
u32 def_info)
3563+
{
3564+
struct irdma_ooo_cqp_op *ooo_op_tmp;
3565+
struct irdma_ooo_cqp_op *ooo_op;
3566+
unsigned long flags;
3567+
3568+
info->scratch = 0;
3569+
3570+
spin_lock_irqsave(&cqp->ooo_list_lock, flags);
3571+
list_for_each_entry_safe(ooo_op, ooo_op_tmp, &cqp->ooo_pnd,
3572+
list_entry) {
3573+
if (!ooo_op->deferred && ooo_op->def_info == def_info) {
3574+
list_del(&ooo_op->list_entry);
3575+
info->scratch = ooo_op->scratch;
3576+
list_add(&ooo_op->list_entry, &cqp->ooo_avail);
3577+
break;
3578+
}
3579+
}
3580+
spin_unlock_irqrestore(&cqp->ooo_list_lock, flags);
3581+
3582+
if (!info->scratch)
3583+
ibdev_dbg(to_ibdev(cqp->dev),
3584+
"CQP: DEBUG_FW_OOO out-of-order completion with unknown def_info = 0x%x\n",
3585+
def_info);
3586+
}
3587+
33783588
/**
33793589
* irdma_sc_ccq_get_cqe_info - get ccq's cq entry
33803590
* @ccq: ccq sc struct
@@ -3383,13 +3593,18 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq)
33833593
int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
33843594
struct irdma_ccq_cqe_info *info)
33853595
{
3596+
u32 def_info;
3597+
bool def_cmpl = false;
3598+
bool pend_cmpl = false;
3599+
bool ooo_final_cmpl = false;
33863600
u64 qp_ctx, temp, temp1;
33873601
__le64 *cqe;
33883602
struct irdma_sc_cqp *cqp;
33893603
u32 wqe_idx;
33903604
u32 error;
33913605
u8 polarity;
33923606
int ret_code = 0;
3607+
unsigned long flags;
33933608

33943609
if (ccq->cq_uk.avoid_mem_cflct)
33953610
cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk);
@@ -3421,6 +3636,25 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
34213636

34223637
get_64bit_val(cqe, 16, &temp1);
34233638
info->op_ret_val = (u32)FIELD_GET(IRDMA_CCQ_OPRETVAL, temp1);
3639+
if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
3640+
def_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
3641+
info->min_err_code == IRDMA_CQPSQ_MIN_DEF_CMPL;
3642+
def_info = (u32)FIELD_GET(IRDMA_CCQ_DEFINFO, temp1);
3643+
3644+
pend_cmpl = info->maj_err_code == IRDMA_CQPSQ_MAJ_NO_ERROR &&
3645+
info->min_err_code == IRDMA_CQPSQ_MIN_OOO_CMPL;
3646+
3647+
ooo_final_cmpl = (bool)FIELD_GET(IRDMA_OOO_CMPL, temp);
3648+
3649+
if (def_cmpl || pend_cmpl || ooo_final_cmpl) {
3650+
if (ooo_final_cmpl)
3651+
irdma_sc_process_ooo_cmpl(cqp, info, def_info);
3652+
else
3653+
irdma_sc_process_def_cmpl(cqp, info, wqe_idx,
3654+
def_info, def_cmpl);
3655+
}
3656+
}
3657+
34243658
get_64bit_val(cqp->sq_base[wqe_idx].elem, 24, &temp1);
34253659
info->op_code = (u8)FIELD_GET(IRDMA_CQPSQ_OPCODE, temp1);
34263660
info->cqp = cqp;
@@ -3437,7 +3671,16 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
34373671

34383672
dma_wmb(); /* make sure shadow area is updated before moving tail */
34393673

3440-
IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
3674+
spin_lock_irqsave(&cqp->dev->cqp_lock, flags);
3675+
if (!ooo_final_cmpl)
3676+
IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
3677+
spin_unlock_irqrestore(&cqp->dev->cqp_lock, flags);
3678+
3679+
/* Do not increment completed_ops counter on pending or deferred
3680+
* completions.
3681+
*/
3682+
if (pend_cmpl || def_cmpl)
3683+
return ret_code;
34413684
atomic64_inc(&cqp->completed_ops);
34423685

34433686
return ret_code;
@@ -4123,6 +4366,10 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
41234366
info->compl_ctx = compl_ctx << 1;
41244367
ae_src = IRDMA_AE_SOURCE_RSVD;
41254368
break;
4369+
case IRDMA_AE_CQP_DEFERRED_COMPLETE:
4370+
info->def_info = info->wqe_idx;
4371+
ae_src = IRDMA_AE_SOURCE_RSVD;
4372+
break;
41264373
case IRDMA_AE_ROCE_EMPTY_MCG:
41274374
case IRDMA_AE_ROCE_BAD_MC_IP_ADDR:
41284375
case IRDMA_AE_ROCE_BAD_MC_QPID:

drivers/infiniband/hw/irdma/defs.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ enum irdma_cqp_op_type {
367367
#define IRDMA_AE_LCE_FUNCTION_CATASTROPHIC 0x0701
368368
#define IRDMA_AE_LCE_CQ_CATASTROPHIC 0x0702
369369
#define IRDMA_AE_QP_SUSPEND_COMPLETE 0x0900
370+
#define IRDMA_AE_CQP_DEFERRED_COMPLETE 0x0901
370371

371372
#define FLD_LS_64(dev, val, field) \
372373
(((u64)(val) << (dev)->hw_shifts[field ## _S]) & (dev)->hw_masks[field ## _M])
@@ -464,6 +465,16 @@ enum irdma_cqp_op_type {
464465
#define IRDMA_CQPHC_SVER GENMASK_ULL(31, 24)
465466
#define IRDMA_CQPHC_SQBASE GENMASK_ULL(63, 9)
466467

468+
#define IRDMA_CQPHC_TIMESTAMP_OVERRIDE BIT_ULL(5)
469+
#define IRDMA_CQPHC_TS_SHIFT GENMASK_ULL(12, 8)
470+
#define IRDMA_CQPHC_EN_FINE_GRAINED_TIMERS BIT_ULL(0)
471+
472+
#define IRDMA_CQPHC_OOISC_BLKSIZE GENMASK_ULL(63, 60)
473+
#define IRDMA_CQPHC_RRSP_BLKSIZE GENMASK_ULL(59, 56)
474+
#define IRDMA_CQPHC_Q1_BLKSIZE GENMASK_ULL(55, 52)
475+
#define IRDMA_CQPHC_XMIT_BLKSIZE GENMASK_ULL(51, 48)
476+
#define IRDMA_CQPHC_BLKSIZES_VALID BIT_ULL(4)
477+
467478
#define IRDMA_CQPHC_QPCTX GENMASK_ULL(63, 0)
468479
#define IRDMA_QP_DBSA_HW_SQ_TAIL GENMASK_ULL(14, 0)
469480
#define IRDMA_CQ_DBSA_CQEIDX GENMASK_ULL(19, 0)
@@ -477,6 +488,8 @@ enum irdma_cqp_op_type {
477488

478489
#define IRDMA_CCQ_OPRETVAL GENMASK_ULL(31, 0)
479490

491+
#define IRDMA_CCQ_DEFINFO GENMASK_ULL(63, 32)
492+
480493
#define IRDMA_CQ_MINERR GENMASK_ULL(15, 0)
481494
#define IRDMA_CQ_MAJERR GENMASK_ULL(31, 16)
482495
#define IRDMA_CQ_WQEIDX GENMASK_ULL(46, 32)
@@ -708,6 +721,8 @@ enum irdma_cqp_op_type {
708721

709722
#define IRDMA_CQPSQ_MIN_STAG_INVALID 0x0001
710723
#define IRDMA_CQPSQ_MIN_SUSPEND_PND 0x0005
724+
#define IRDMA_CQPSQ_MIN_DEF_CMPL 0x0006
725+
#define IRDMA_CQPSQ_MIN_OOO_CMPL 0x0007
711726

712727
#define IRDMA_CQPSQ_MAJ_NO_ERROR 0x0000
713728
#define IRDMA_CQPSQ_MAJ_OBJCACHE_ERROR 0xF000

0 commit comments

Comments
 (0)