Skip to content

Commit 4ad8181

Browse files
l00436852jgunthorpe
authored andcommitted
RDMA/hns: Fix RNR retransmission issue for HIP08
Due to the discrete nature of the HIP08 timer unit, a requester might finish the timeout period sooner, in elapsed real time, than its responder does, even when both sides share the identical RNR timeout length included in the RNR Nak packet and the responder indeed starts the timing prior to the requester. Furthermore, if a 'providential' resend packet arrived before the responder's timeout period expired, the responder is certainly entitled to drop the packet silently in the light of IB protocol. To address this problem, our team made good use of certain hardware facts: 1) The timing resolution regards the transmission arrangements is 1 microsecond, e.g. if cq_period field is set to 3, it would be interpreted as 3 microsecond by hardware 2) A QPC field shall inform the hardware how many timing unit (ticks) constitutes a full microsecond, which, by default, is 1000 3) It takes 14ns for the processor to handle a packet in the buffer, so the RNR timeout length of 10ns would ensure our processing mechanism is disabled during the entire timeout period and the packet won't be dropped silently To achieve (3), we permanently set the QPC field mentioned in (2) to zero which nominally indicates every time tick is equivalent to a microsecond in wall-clock time; now, a RNR timeout period at face value of 10 would only last 10 ticks, which is 10ns in wall-clock time. It's worth noting that we adapt the driver by magnifying certain configuration parameters(cq_period, eq_period and ack_timeout)by 1000 given the user assumes the configuring timing unit to be microseconds. Also, this particular improvisation is only deployed on HIP08 since other hardware has already solved this issue. Fixes: cfc85f3 ("RDMA/hns: Add profile support for hip08 driver") Link: https://lore.kernel.org/r/20211209140655.49493-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li <liyangyang20@huawei.com> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent 2585cf9 commit 4ad8181

2 files changed

Lines changed: 65 additions & 7 deletions

File tree

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,11 +1594,17 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
15941594
{
15951595
struct hns_roce_cmq_desc desc;
15961596
struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
1597+
u32 clock_cycles_of_1us;
15971598

15981599
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_GLOBAL_PARAM,
15991600
false);
16001601

1601-
hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, 0x3e8);
1602+
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
1603+
clock_cycles_of_1us = HNS_ROCE_1NS_CFG;
1604+
else
1605+
clock_cycles_of_1us = HNS_ROCE_1US_CFG;
1606+
1607+
hr_reg_write(req, CFG_GLOBAL_PARAM_1US_CYCLES, clock_cycles_of_1us);
16021608
hr_reg_write(req, CFG_GLOBAL_PARAM_UDP_PORT, ROCE_V2_UDP_DPORT);
16031609

16041610
return hns_roce_cmq_send(hr_dev, &desc, 1);
@@ -4802,6 +4808,30 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
48024808
return ret;
48034809
}
48044810

4811+
static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout)
4812+
{
4813+
#define QP_ACK_TIMEOUT_MAX_HIP08 20
4814+
#define QP_ACK_TIMEOUT_OFFSET 10
4815+
#define QP_ACK_TIMEOUT_MAX 31
4816+
4817+
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
4818+
if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) {
4819+
ibdev_warn(&hr_dev->ib_dev,
4820+
"Local ACK timeout shall be 0 to 20.\n");
4821+
return false;
4822+
}
4823+
*timeout += QP_ACK_TIMEOUT_OFFSET;
4824+
} else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) {
4825+
if (*timeout > QP_ACK_TIMEOUT_MAX) {
4826+
ibdev_warn(&hr_dev->ib_dev,
4827+
"Local ACK timeout shall be 0 to 31.\n");
4828+
return false;
4829+
}
4830+
}
4831+
4832+
return true;
4833+
}
4834+
48054835
static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
48064836
const struct ib_qp_attr *attr,
48074837
int attr_mask,
@@ -4811,6 +4841,7 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
48114841
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
48124842
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
48134843
int ret = 0;
4844+
u8 timeout;
48144845

48154846
if (attr_mask & IB_QP_AV) {
48164847
ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context,
@@ -4820,12 +4851,10 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
48204851
}
48214852

48224853
if (attr_mask & IB_QP_TIMEOUT) {
4823-
if (attr->timeout < 31) {
4824-
hr_reg_write(context, QPC_AT, attr->timeout);
4854+
timeout = attr->timeout;
4855+
if (check_qp_timeout_cfg_range(hr_dev, &timeout)) {
4856+
hr_reg_write(context, QPC_AT, timeout);
48254857
hr_reg_clear(qpc_mask, QPC_AT);
4826-
} else {
4827-
ibdev_warn(&hr_dev->ib_dev,
4828-
"Local ACK timeout shall be 0 to 30.\n");
48294858
}
48304859
}
48314860

@@ -4882,7 +4911,9 @@ static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp,
48824911
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
48834912

48844913
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
4885-
hr_reg_write(context, QPC_MIN_RNR_TIME, attr->min_rnr_timer);
4914+
hr_reg_write(context, QPC_MIN_RNR_TIME,
4915+
hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
4916+
HNS_ROCE_RNR_TIMER_10NS : attr->min_rnr_timer);
48864917
hr_reg_clear(qpc_mask, QPC_MIN_RNR_TIME);
48874918
}
48884919

@@ -5499,6 +5530,16 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
54995530

55005531
hr_reg_write(cq_context, CQC_CQ_MAX_CNT, cq_count);
55015532
hr_reg_clear(cqc_mask, CQC_CQ_MAX_CNT);
5533+
5534+
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
5535+
if (cq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
5536+
dev_info(hr_dev->dev,
5537+
"cq_period(%u) reached the upper limit, adjusted to 65.\n",
5538+
cq_period);
5539+
cq_period = HNS_ROCE_MAX_CQ_PERIOD;
5540+
}
5541+
cq_period *= HNS_ROCE_CLOCK_ADJUST;
5542+
}
55025543
hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period);
55035544
hr_reg_clear(cqc_mask, CQC_CQ_PERIOD);
55045545

@@ -5894,6 +5935,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
58945935
hr_reg_write(eqc, EQC_EQ_PROD_INDX, HNS_ROCE_EQ_INIT_PROD_IDX);
58955936
hr_reg_write(eqc, EQC_EQ_MAX_CNT, eq->eq_max_cnt);
58965937

5938+
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
5939+
if (eq->eq_period * HNS_ROCE_CLOCK_ADJUST > USHRT_MAX) {
5940+
dev_info(hr_dev->dev, "eq_period(%u) reached the upper limit, adjusted to 65.\n",
5941+
eq->eq_period);
5942+
eq->eq_period = HNS_ROCE_MAX_EQ_PERIOD;
5943+
}
5944+
eq->eq_period *= HNS_ROCE_CLOCK_ADJUST;
5945+
}
5946+
58975947
hr_reg_write(eqc, EQC_EQ_PERIOD, eq->eq_period);
58985948
hr_reg_write(eqc, EQC_EQE_REPORT_TIMER, HNS_ROCE_EQ_INIT_REPORT_TIMER);
58995949
hr_reg_write(eqc, EQC_EQE_BA_L, bt_ba >> 3);

drivers/infiniband/hw/hns/hns_roce_hw_v2.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,14 @@ struct hns_roce_dip {
14441444
struct list_head node; /* all dips are on a list */
14451445
};
14461446

1447+
/* only for RNR timeout issue of HIP08 */
1448+
#define HNS_ROCE_CLOCK_ADJUST 1000
1449+
#define HNS_ROCE_MAX_CQ_PERIOD 65
1450+
#define HNS_ROCE_MAX_EQ_PERIOD 65
1451+
#define HNS_ROCE_RNR_TIMER_10NS 1
1452+
#define HNS_ROCE_1US_CFG 999
1453+
#define HNS_ROCE_1NS_CFG 0
1454+
14471455
#define HNS_ROCE_AEQ_DEFAULT_BURST_NUM 0x0
14481456
#define HNS_ROCE_AEQ_DEFAULT_INTERVAL 0x0
14491457
#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x0

0 commit comments

Comments
 (0)