Skip to content

Commit 8ab05a5

Browse files
vladumrleon
authored andcommitted
IB/cm: Use separate agent w/o flow control for REP
Most responses (e.g., RTU) are not subject to flow control, as there is no further response expected. However, REPs are both requests (waiting for RTUs) and responses (being waited by REQs). With agent-level flow control added to the MAD layer, REPs can get delayed by outstanding REQs. This can cause a problem in a scenario such as 2 hosts connecting to each other at the same time. Both hosts fill the flow control outstanding slots with REQs. The corresponding REPs are now blocked behind those REQs, and neither side can make progress until REQs time out. Add a separate MAD agent which is only used to send REPs. This agent does not have a recv_handler as it doesn't process responses nor does it register to receive requests. Disable flow control for agents w/o a recv_handler, as they aren't waiting for responses. This allows the newly added REP agent to send even when clients are slow to generate RTU, which would be needed to unblock flow control outstanding slots. Relax check in ib_post_send_mad to allow retries for this agent. REPs will be retried by the MAD layer until CM layer receives a response (e.g., RTU) on the normal agent and cancels them. Suggested-by: Sean Hefty <shefty@nvidia.com> Reviewed-by: Maher Sanalla <msanalla@nvidia.com> Reviewed-by: Sean Hefty <shefty@nvidia.com> Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com> Signed-off-by: Or Har-Toov <ohartoov@nvidia.com> Link: https://patch.msgid.link/9ac12d0842b849e2c8537d6e291ee0af9f79855c.1751278420.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent 314cb74 commit 8ab05a5

2 files changed

Lines changed: 44 additions & 10 deletions

File tree

drivers/infiniband/core/cm.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ struct cm_counter_attribute {
161161
struct cm_port {
162162
struct cm_device *cm_dev;
163163
struct ib_mad_agent *mad_agent;
164+
struct ib_mad_agent *rep_agent;
164165
u32 port_num;
165166
atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
166167
};
@@ -274,7 +275,8 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
274275
complete(&cm_id_priv->comp);
275276
}
276277

277-
static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
278+
static struct ib_mad_send_buf *
279+
cm_alloc_msg_agent(struct cm_id_private *cm_id_priv, bool rep_agent)
278280
{
279281
struct ib_mad_agent *mad_agent;
280282
struct ib_mad_send_buf *m;
@@ -286,7 +288,8 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
286288
return ERR_PTR(-EINVAL);
287289

288290
read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
289-
mad_agent = cm_id_priv->av.port->mad_agent;
291+
mad_agent = rep_agent ? cm_id_priv->av.port->rep_agent :
292+
cm_id_priv->av.port->mad_agent;
290293
if (!mad_agent) {
291294
m = ERR_PTR(-EINVAL);
292295
goto out;
@@ -315,6 +318,11 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
315318
return m;
316319
}
317320

321+
static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
322+
{
323+
return cm_alloc_msg_agent(cm_id_priv, false);
324+
}
325+
318326
static void cm_free_msg(struct ib_mad_send_buf *msg)
319327
{
320328
if (msg->ah)
@@ -323,13 +331,14 @@ static void cm_free_msg(struct ib_mad_send_buf *msg)
323331
}
324332

325333
static struct ib_mad_send_buf *
326-
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
334+
cm_alloc_priv_msg_rep(struct cm_id_private *cm_id_priv, enum ib_cm_state state,
335+
bool rep_agent)
327336
{
328337
struct ib_mad_send_buf *msg;
329338

330339
lockdep_assert_held(&cm_id_priv->lock);
331340

332-
msg = cm_alloc_msg(cm_id_priv);
341+
msg = cm_alloc_msg_agent(cm_id_priv, rep_agent);
333342
if (IS_ERR(msg))
334343
return msg;
335344

@@ -344,6 +353,12 @@ cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
344353
return msg;
345354
}
346355

356+
static struct ib_mad_send_buf *
357+
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
358+
{
359+
return cm_alloc_priv_msg_rep(cm_id_priv, state, false);
360+
}
361+
347362
static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
348363
{
349364
struct cm_id_private *cm_id_priv = msg->context[0];
@@ -2295,7 +2310,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
22952310
goto out;
22962311
}
22972312

2298-
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT);
2313+
msg = cm_alloc_priv_msg_rep(cm_id_priv, IB_CM_REP_SENT, true);
22992314
if (IS_ERR(msg)) {
23002315
ret = PTR_ERR(msg);
23012316
goto out;
@@ -4380,9 +4395,22 @@ static int cm_add_one(struct ib_device *ib_device)
43804395
goto error2;
43814396
}
43824397

4398+
port->rep_agent = ib_register_mad_agent(ib_device, i,
4399+
IB_QPT_GSI,
4400+
NULL,
4401+
0,
4402+
cm_send_handler,
4403+
NULL,
4404+
port,
4405+
0);
4406+
if (IS_ERR(port->rep_agent)) {
4407+
ret = PTR_ERR(port->rep_agent);
4408+
goto error3;
4409+
}
4410+
43834411
ret = ib_modify_port(ib_device, i, 0, &port_modify);
43844412
if (ret)
4385-
goto error3;
4413+
goto error4;
43864414

43874415
count++;
43884416
}
@@ -4397,6 +4425,8 @@ static int cm_add_one(struct ib_device *ib_device)
43974425
write_unlock_irqrestore(&cm.device_lock, flags);
43984426
return 0;
43994427

4428+
error4:
4429+
ib_unregister_mad_agent(port->rep_agent);
44004430
error3:
44014431
ib_unregister_mad_agent(port->mad_agent);
44024432
error2:
@@ -4410,6 +4440,7 @@ static int cm_add_one(struct ib_device *ib_device)
44104440

44114441
port = cm_dev->port[i-1];
44124442
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4443+
ib_unregister_mad_agent(port->rep_agent);
44134444
ib_unregister_mad_agent(port->mad_agent);
44144445
ib_port_unregister_client_groups(ib_device, i,
44154446
cm_counter_groups);
@@ -4439,12 +4470,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
44394470

44404471
rdma_for_each_port (ib_device, i) {
44414472
struct ib_mad_agent *mad_agent;
4473+
struct ib_mad_agent *rep_agent;
44424474

44434475
if (!rdma_cap_ib_cm(ib_device, i))
44444476
continue;
44454477

44464478
port = cm_dev->port[i-1];
44474479
mad_agent = port->mad_agent;
4480+
rep_agent = port->rep_agent;
44484481
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
44494482
/*
44504483
* We flush the queue here after the going_down set, this
@@ -4458,8 +4491,10 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
44584491
*/
44594492
write_lock(&cm_dev->mad_agent_lock);
44604493
port->mad_agent = NULL;
4494+
port->rep_agent = NULL;
44614495
write_unlock(&cm_dev->mad_agent_lock);
44624496
ib_unregister_mad_agent(mad_agent);
4497+
ib_unregister_mad_agent(rep_agent);
44634498
ib_port_unregister_client_groups(ib_device, i,
44644499
cm_counter_groups);
44654500
}

drivers/infiniband/core/mad.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
424424
mad_agent_priv->sol_fc_send_count = 0;
425425
mad_agent_priv->sol_fc_wait_count = 0;
426426
mad_agent_priv->sol_fc_max =
427-
get_sol_fc_max_outstanding(mad_reg_req);
427+
recv_handler ? get_sol_fc_max_outstanding(mad_reg_req) : 0;
428+
428429
ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
429430
if (ret2) {
430431
ret = ERR_PTR(ret2);
@@ -1280,9 +1281,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
12801281
if (ret)
12811282
goto error;
12821283

1283-
if (!send_buf->mad_agent->send_handler ||
1284-
(send_buf->timeout_ms &&
1285-
!send_buf->mad_agent->recv_handler)) {
1284+
if (!send_buf->mad_agent->send_handler) {
12861285
ret = -EINVAL;
12871286
goto error;
12881287
}

0 commit comments

Comments
 (0)