Skip to content

Commit 314cb74

Browse files
ohartoovrleon
authored andcommitted
IB/mad: Add flow control for solicited MADs
Currently, MADs sent via an agent are being forwarded directly to the corresponding MAD QP layer. MADs with a timeout value set and requiring a response (solicited MADs) will be resent if the timeout expires without receiving a response. In a congested subnet, flooding MAD QP layer with more solicited send requests from the agent will only worsen the situation by triggering more timeouts and therefore more retries. Thus, add flow control for non-user solicited MADs to block agents from issuing new solicited MAD requests to the MAD QP until outstanding requests are completed and the MAD QP is ready to process additional requests. While at it, keep track of the total outstanding solicited MAD work requests in send or wait list. The number of outstanding send WRs will be limited by a fraction of the RQ size, and any new send WR that exceeds that limit will be held in a backlog list. Backlog MADs will be forwarded to agent send list only once the total number of outstanding send WRs falls below the limit. Unsolicited MADs, RMPP MADs and MADs which are not SA, SMP or CM are not subject to this flow control mechanism and will not be affected by this change. For this purpose, a new state is introduced: - 'IB_MAD_STATE_QUEUED': MAD is in backlog list Signed-off-by: Or Har-Toov <ohartoov@nvidia.com> Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com> Link: https://patch.msgid.link/c0ecaa1821badee124cd13f3bf860f67ce453beb.1751278420.git.leon@kernel.org Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent 1cf0d89 commit 314cb74

2 files changed

Lines changed: 214 additions & 10 deletions

File tree

drivers/infiniband/core/mad.c

Lines changed: 196 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,29 @@ int ib_response_mad(const struct ib_mad_hdr *hdr)
210210
}
211211
EXPORT_SYMBOL(ib_response_mad);
212212

213+
#define SOL_FC_MAX_DEFAULT_FRAC 4
214+
#define SOL_FC_MAX_SA_FRAC 32
215+
216+
static int get_sol_fc_max_outstanding(struct ib_mad_reg_req *mad_reg_req)
217+
{
218+
if (!mad_reg_req)
219+
/* Send only agent */
220+
return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
221+
222+
switch (mad_reg_req->mgmt_class) {
223+
case IB_MGMT_CLASS_CM:
224+
return mad_recvq_size / SOL_FC_MAX_DEFAULT_FRAC;
225+
case IB_MGMT_CLASS_SUBN_ADM:
226+
return mad_recvq_size / SOL_FC_MAX_SA_FRAC;
227+
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
228+
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
229+
return min(mad_recvq_size, IB_MAD_QP_RECV_SIZE) /
230+
SOL_FC_MAX_DEFAULT_FRAC;
231+
default:
232+
return 0;
233+
}
234+
}
235+
213236
/*
214237
* ib_register_mad_agent - Register to send/receive MADs
215238
*
@@ -392,12 +415,16 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
392415
INIT_LIST_HEAD(&mad_agent_priv->send_list);
393416
INIT_LIST_HEAD(&mad_agent_priv->wait_list);
394417
INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
418+
INIT_LIST_HEAD(&mad_agent_priv->backlog_list);
395419
INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
396420
INIT_LIST_HEAD(&mad_agent_priv->local_list);
397421
INIT_WORK(&mad_agent_priv->local_work, local_completions);
398422
refcount_set(&mad_agent_priv->refcount, 1);
399423
init_completion(&mad_agent_priv->comp);
400-
424+
mad_agent_priv->sol_fc_send_count = 0;
425+
mad_agent_priv->sol_fc_wait_count = 0;
426+
mad_agent_priv->sol_fc_max =
427+
get_sol_fc_max_outstanding(mad_reg_req);
401428
ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
402429
if (ret2) {
403430
ret = ERR_PTR(ret2);
@@ -1054,17 +1081,38 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
10541081
return ret;
10551082
}
10561083

1084+
static void handle_queued_state(struct ib_mad_send_wr_private *mad_send_wr,
1085+
struct ib_mad_agent_private *mad_agent_priv)
1086+
{
1087+
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP) {
1088+
mad_agent_priv->sol_fc_wait_count--;
1089+
list_move_tail(&mad_send_wr->agent_list,
1090+
&mad_agent_priv->backlog_list);
1091+
} else {
1092+
expect_mad_state(mad_send_wr, IB_MAD_STATE_INIT);
1093+
list_add_tail(&mad_send_wr->agent_list,
1094+
&mad_agent_priv->backlog_list);
1095+
}
1096+
}
1097+
10571098
static void handle_send_state(struct ib_mad_send_wr_private *mad_send_wr,
10581099
struct ib_mad_agent_private *mad_agent_priv)
10591100
{
10601101
if (mad_send_wr->state == IB_MAD_STATE_INIT) {
10611102
list_add_tail(&mad_send_wr->agent_list,
10621103
&mad_agent_priv->send_list);
10631104
} else {
1064-
expect_mad_state(mad_send_wr, IB_MAD_STATE_WAIT_RESP);
1105+
expect_mad_state2(mad_send_wr, IB_MAD_STATE_WAIT_RESP,
1106+
IB_MAD_STATE_QUEUED);
10651107
list_move_tail(&mad_send_wr->agent_list,
10661108
&mad_agent_priv->send_list);
10671109
}
1110+
1111+
if (mad_send_wr->is_solicited_fc) {
1112+
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
1113+
mad_agent_priv->sol_fc_wait_count--;
1114+
mad_agent_priv->sol_fc_send_count++;
1115+
}
10681116
}
10691117

10701118
static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr,
@@ -1076,8 +1124,13 @@ static void handle_wait_state(struct ib_mad_send_wr_private *mad_send_wr,
10761124

10771125
expect_mad_state3(mad_send_wr, IB_MAD_STATE_SEND_START,
10781126
IB_MAD_STATE_WAIT_RESP, IB_MAD_STATE_CANCELED);
1079-
list_del_init(&mad_send_wr->agent_list);
1127+
if (mad_send_wr->state == IB_MAD_STATE_SEND_START &&
1128+
mad_send_wr->is_solicited_fc) {
1129+
mad_agent_priv->sol_fc_send_count--;
1130+
mad_agent_priv->sol_fc_wait_count++;
1131+
}
10801132

1133+
list_del_init(&mad_send_wr->agent_list);
10811134
delay = mad_send_wr->timeout;
10821135
mad_send_wr->timeout += jiffies;
10831136

@@ -1103,17 +1156,31 @@ static void handle_early_resp_state(struct ib_mad_send_wr_private *mad_send_wr,
11031156
struct ib_mad_agent_private *mad_agent_priv)
11041157
{
11051158
expect_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
1159+
mad_agent_priv->sol_fc_send_count -= mad_send_wr->is_solicited_fc;
11061160
}
11071161

11081162
static void handle_canceled_state(struct ib_mad_send_wr_private *mad_send_wr,
11091163
struct ib_mad_agent_private *mad_agent_priv)
11101164
{
11111165
not_expect_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
1166+
if (mad_send_wr->is_solicited_fc) {
1167+
if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
1168+
mad_agent_priv->sol_fc_send_count--;
1169+
else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
1170+
mad_agent_priv->sol_fc_wait_count--;
1171+
}
11121172
}
11131173

11141174
static void handle_done_state(struct ib_mad_send_wr_private *mad_send_wr,
11151175
struct ib_mad_agent_private *mad_agent_priv)
11161176
{
1177+
if (mad_send_wr->is_solicited_fc) {
1178+
if (mad_send_wr->state == IB_MAD_STATE_SEND_START)
1179+
mad_agent_priv->sol_fc_send_count--;
1180+
else if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
1181+
mad_agent_priv->sol_fc_wait_count--;
1182+
}
1183+
11171184
list_del_init(&mad_send_wr->agent_list);
11181185
}
11191186

@@ -1126,6 +1193,9 @@ void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
11261193
switch (new_state) {
11271194
case IB_MAD_STATE_INIT:
11281195
break;
1196+
case IB_MAD_STATE_QUEUED:
1197+
handle_queued_state(mad_send_wr, mad_agent_priv);
1198+
break;
11291199
case IB_MAD_STATE_SEND_START:
11301200
handle_send_state(mad_send_wr, mad_agent_priv);
11311201
break;
@@ -1148,6 +1218,43 @@ void change_mad_state(struct ib_mad_send_wr_private *mad_send_wr,
11481218
mad_send_wr->state = new_state;
11491219
}
11501220

1221+
static bool is_solicited_fc_mad(struct ib_mad_send_wr_private *mad_send_wr)
1222+
{
1223+
struct ib_rmpp_mad *rmpp_mad;
1224+
u8 mgmt_class;
1225+
1226+
if (!mad_send_wr->timeout)
1227+
return 0;
1228+
1229+
rmpp_mad = mad_send_wr->send_buf.mad;
1230+
if (mad_send_wr->mad_agent_priv->agent.rmpp_version &&
1231+
(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE))
1232+
return 0;
1233+
1234+
mgmt_class =
1235+
((struct ib_mad_hdr *)mad_send_wr->send_buf.mad)->mgmt_class;
1236+
return mgmt_class == IB_MGMT_CLASS_CM ||
1237+
mgmt_class == IB_MGMT_CLASS_SUBN_ADM ||
1238+
mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
1239+
mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE;
1240+
}
1241+
1242+
static bool mad_is_for_backlog(struct ib_mad_send_wr_private *mad_send_wr)
1243+
{
1244+
struct ib_mad_agent_private *mad_agent_priv =
1245+
mad_send_wr->mad_agent_priv;
1246+
1247+
if (!mad_send_wr->is_solicited_fc || !mad_agent_priv->sol_fc_max)
1248+
return false;
1249+
1250+
if (!list_empty(&mad_agent_priv->backlog_list))
1251+
return true;
1252+
1253+
return mad_agent_priv->sol_fc_send_count +
1254+
mad_agent_priv->sol_fc_wait_count >=
1255+
mad_agent_priv->sol_fc_max;
1256+
}
1257+
11511258
/*
11521259
* ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
11531260
* with the registered client
@@ -1216,6 +1323,13 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
12161323
/* Reference MAD agent until send completes */
12171324
refcount_inc(&mad_agent_priv->refcount);
12181325
spin_lock_irqsave(&mad_agent_priv->lock, flags);
1326+
mad_send_wr->is_solicited_fc = is_solicited_fc_mad(mad_send_wr);
1327+
if (mad_is_for_backlog(mad_send_wr)) {
1328+
change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
1329+
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1330+
return 0;
1331+
}
1332+
12191333
change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
12201334
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
12211335

@@ -1839,6 +1953,18 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
18391953
return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
18401954
}
18411955

1956+
list_for_each_entry(wr, &mad_agent_priv->backlog_list, agent_list) {
1957+
if ((wr->tid == mad_hdr->tid) &&
1958+
rcv_has_same_class(wr, wc) &&
1959+
/*
1960+
* Don't check GID for direct routed MADs.
1961+
* These might have permissive LIDs.
1962+
*/
1963+
(is_direct(mad_hdr->mgmt_class) ||
1964+
rcv_has_same_gid(mad_agent_priv, wr, wc)))
1965+
return (wr->state != IB_MAD_STATE_CANCELED) ? wr : NULL;
1966+
}
1967+
18421968
/*
18431969
* It's possible to receive the response before we've
18441970
* been notified that the send has completed
@@ -1860,10 +1986,47 @@ ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
18601986
return NULL;
18611987
}
18621988

1989+
static void
1990+
process_backlog_mads(struct ib_mad_agent_private *mad_agent_priv)
1991+
{
1992+
struct ib_mad_send_wr_private *mad_send_wr;
1993+
struct ib_mad_send_wc mad_send_wc = {};
1994+
unsigned long flags;
1995+
int ret;
1996+
1997+
spin_lock_irqsave(&mad_agent_priv->lock, flags);
1998+
while (!list_empty(&mad_agent_priv->backlog_list) &&
1999+
(mad_agent_priv->sol_fc_send_count +
2000+
mad_agent_priv->sol_fc_wait_count <
2001+
mad_agent_priv->sol_fc_max)) {
2002+
mad_send_wr = list_entry(mad_agent_priv->backlog_list.next,
2003+
struct ib_mad_send_wr_private,
2004+
agent_list);
2005+
change_mad_state(mad_send_wr, IB_MAD_STATE_SEND_START);
2006+
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2007+
ret = ib_send_mad(mad_send_wr);
2008+
if (ret) {
2009+
spin_lock_irqsave(&mad_agent_priv->lock, flags);
2010+
deref_mad_agent(mad_agent_priv);
2011+
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
2012+
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2013+
mad_send_wc.send_buf = &mad_send_wr->send_buf;
2014+
mad_send_wc.status = IB_WC_LOC_QP_OP_ERR;
2015+
mad_agent_priv->agent.send_handler(
2016+
&mad_agent_priv->agent, &mad_send_wc);
2017+
}
2018+
2019+
spin_lock_irqsave(&mad_agent_priv->lock, flags);
2020+
}
2021+
2022+
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2023+
}
2024+
18632025
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
18642026
{
18652027
mad_send_wr->timeout = 0;
1866-
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP)
2028+
if (mad_send_wr->state == IB_MAD_STATE_WAIT_RESP ||
2029+
mad_send_wr->state == IB_MAD_STATE_QUEUED)
18672030
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
18682031
else
18692032
change_mad_state(mad_send_wr, IB_MAD_STATE_EARLY_RESP);
@@ -2320,11 +2483,14 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
23202483
adjust_timeout(mad_agent_priv);
23212484
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
23222485

2323-
if (ret == IB_RMPP_RESULT_INTERNAL)
2486+
if (ret == IB_RMPP_RESULT_INTERNAL) {
23242487
ib_rmpp_send_handler(mad_send_wc);
2325-
else
2488+
} else {
2489+
if (mad_send_wr->is_solicited_fc)
2490+
process_backlog_mads(mad_agent_priv);
23262491
mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
23272492
mad_send_wc);
2493+
}
23282494

23292495
/* Release reference on agent taken when sending */
23302496
deref_mad_agent(mad_agent_priv);
@@ -2497,14 +2663,20 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
24972663
&mad_agent_priv->send_list, agent_list)
24982664
change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
24992665

2500-
/* Empty wait list to prevent receives from finding a request */
2666+
/* Empty wait & backlog list to prevent receives from finding request */
25012667
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
25022668
&mad_agent_priv->wait_list, agent_list) {
25032669
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
25042670
list_add_tail(&mad_send_wr->agent_list, &cancel_list);
25052671
}
2506-
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
25072672

2673+
list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2674+
&mad_agent_priv->backlog_list, agent_list) {
2675+
change_mad_state(mad_send_wr, IB_MAD_STATE_DONE);
2676+
list_add_tail(&mad_send_wr->agent_list, &cancel_list);
2677+
}
2678+
2679+
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
25082680
/* Report all cancelled requests */
25092681
clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);
25102682
}
@@ -2528,6 +2700,13 @@ find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
25282700
&mad_send_wr->send_buf == send_buf)
25292701
return mad_send_wr;
25302702
}
2703+
2704+
list_for_each_entry(mad_send_wr, &mad_agent_priv->backlog_list,
2705+
agent_list) {
2706+
if (&mad_send_wr->send_buf == send_buf)
2707+
return mad_send_wr;
2708+
}
2709+
25312710
return NULL;
25322711
}
25332712

@@ -2550,8 +2729,9 @@ int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
25502729
return -EINVAL;
25512730
}
25522731

2553-
active = (mad_send_wr->state == IB_MAD_STATE_SEND_START ||
2554-
mad_send_wr->state == IB_MAD_STATE_EARLY_RESP);
2732+
active = ((mad_send_wr->state == IB_MAD_STATE_SEND_START) ||
2733+
(mad_send_wr->state == IB_MAD_STATE_EARLY_RESP) ||
2734+
(mad_send_wr->state == IB_MAD_STATE_QUEUED && timeout_ms));
25552735
if (!timeout_ms)
25562736
change_mad_state(mad_send_wr, IB_MAD_STATE_CANCELED);
25572737

@@ -2665,6 +2845,11 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
26652845
mad_send_wr->send_buf.retries++;
26662846

26672847
mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2848+
if (mad_send_wr->is_solicited_fc &&
2849+
!list_empty(&mad_send_wr->mad_agent_priv->backlog_list)) {
2850+
change_mad_state(mad_send_wr, IB_MAD_STATE_QUEUED);
2851+
return 0;
2852+
}
26682853

26692854
if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
26702855
ret = ib_retry_rmpp(mad_send_wr);
@@ -2730,6 +2915,7 @@ static void timeout_sends(struct work_struct *work)
27302915
}
27312916

27322917
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2918+
process_backlog_mads(mad_agent_priv);
27332919
clear_mad_error_list(&timeout_list, IB_WC_RESP_TIMEOUT_ERR,
27342920
mad_agent_priv);
27352921
clear_mad_error_list(&cancel_list, IB_WC_WR_FLUSH_ERR, mad_agent_priv);

0 commit comments

Comments
 (0)