Skip to content

Commit 4df22ca

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe: "A few recent regressions in rxe's multicast code, and some old driver bugs: - Error case unwind bug in rxe for rkeys - Dot not call netdev functions under a spinlock in rxe multicast code - Use the proper BH lock type in rxe multicast code - Fix idrma deadlock and crash - Add a missing flush to drain irdma QPs when in error - Fix high userspace latency in irdma during destroy due to synchronize_rcu() - Rare race in siw MPA processing" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/rxe: Change mcg_lock to a _bh lock RDMA/rxe: Do not call dev_mc_add/del() under a spinlock RDMA/siw: Fix a condition race issue in MPA request processing RDMA/irdma: Fix possible crash due to NULL netdev in notifier RDMA/irdma: Reduce iWARP QP destroy time RDMA/irdma: Flush iWARP QP if modified to ERR from RTR state RDMA/rxe: Recheck the MR in when generating a READ reply RDMA/irdma: Fix deadlock in irdma_cleanup_cm_core() RDMA/rxe: Fix "Replace mr by rkey in responder resources"
2 parents 6426792 + bfdc0ed commit 4df22ca

6 files changed

Lines changed: 85 additions & 96 deletions

File tree

drivers/infiniband/hw/irdma/cm.c

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2308,10 +2308,8 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
23082308
return NULL;
23092309
}
23102310

2311-
static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
2311+
static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
23122312
{
2313-
struct irdma_cm_node *cm_node =
2314-
container_of(rcu_head, struct irdma_cm_node, rcu_head);
23152313
struct irdma_cm_core *cm_core = cm_node->cm_core;
23162314
struct irdma_qp *iwqp;
23172315
struct irdma_cm_info nfo;
@@ -2359,7 +2357,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
23592357
}
23602358

23612359
cm_core->cm_free_ah(cm_node);
2362-
kfree(cm_node);
23632360
}
23642361

23652362
/**
@@ -2387,8 +2384,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
23872384

23882385
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
23892386

2390-
/* wait for all list walkers to exit their grace period */
2391-
call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb);
2387+
irdma_destroy_connection(cm_node);
2388+
2389+
kfree_rcu(cm_node, rcu_head);
23922390
}
23932391

23942392
/**
@@ -3246,15 +3244,10 @@ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
32463244
*/
32473245
void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
32483246
{
3249-
unsigned long flags;
3250-
32513247
if (!cm_core)
32523248
return;
32533249

3254-
spin_lock_irqsave(&cm_core->ht_lock, flags);
3255-
if (timer_pending(&cm_core->tcp_timer))
3256-
del_timer_sync(&cm_core->tcp_timer);
3257-
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
3250+
del_timer_sync(&cm_core->tcp_timer);
32583251

32593252
destroy_workqueue(cm_core->event_wq);
32603253
cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
@@ -3467,12 +3460,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
34673460
}
34683461

34693462
cm_id = iwqp->cm_id;
3470-
/* make sure we havent already closed this connection */
3471-
if (!cm_id) {
3472-
spin_unlock_irqrestore(&iwqp->lock, flags);
3473-
return;
3474-
}
3475-
34763463
original_hw_tcp_state = iwqp->hw_tcp_state;
34773464
original_ibqp_state = iwqp->ibqp_state;
34783465
last_ae = iwqp->last_aeq;
@@ -3494,11 +3481,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
34943481
disconn_status = -ECONNRESET;
34953482
}
34963483

3497-
if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
3498-
original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
3499-
last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
3500-
last_ae == IRDMA_AE_BAD_CLOSE ||
3501-
last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
3484+
if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
3485+
original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
3486+
last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
3487+
last_ae == IRDMA_AE_BAD_CLOSE ||
3488+
last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
35023489
issue_close = 1;
35033490
iwqp->cm_id = NULL;
35043491
qp->term_flags = 0;

drivers/infiniband/hw/irdma/utils.c

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,18 +258,16 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
258258
u32 local_ipaddr[4] = {};
259259
bool ipv4 = true;
260260

261-
real_dev = rdma_vlan_dev_real_dev(netdev);
262-
if (!real_dev)
263-
real_dev = netdev;
264-
265-
ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
266-
if (!ibdev)
267-
return NOTIFY_DONE;
268-
269-
iwdev = to_iwdev(ibdev);
270-
271261
switch (event) {
272262
case NETEVENT_NEIGH_UPDATE:
263+
real_dev = rdma_vlan_dev_real_dev(netdev);
264+
if (!real_dev)
265+
real_dev = netdev;
266+
ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
267+
if (!ibdev)
268+
return NOTIFY_DONE;
269+
270+
iwdev = to_iwdev(ibdev);
273271
p = (__be32 *)neigh->primary_key;
274272
if (neigh->tbl->family == AF_INET6) {
275273
ipv4 = false;
@@ -290,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
290288
irdma_manage_arp_cache(iwdev->rf, neigh->ha,
291289
local_ipaddr, ipv4,
292290
IRDMA_ARP_DELETE);
291+
ib_device_put(ibdev);
293292
break;
294293
default:
295294
break;
296295
}
297296

298-
ib_device_put(ibdev);
299-
300297
return NOTIFY_DONE;
301298
}
302299

drivers/infiniband/hw/irdma/verbs.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,13 +1618,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
16181618

16191619
if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
16201620
if (dont_wait) {
1621-
if (iwqp->cm_id && iwqp->hw_tcp_state) {
1621+
if (iwqp->hw_tcp_state) {
16221622
spin_lock_irqsave(&iwqp->lock, flags);
16231623
iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
16241624
iwqp->last_aeq = IRDMA_AE_RESET_SENT;
16251625
spin_unlock_irqrestore(&iwqp->lock, flags);
1626-
irdma_cm_disconn(iwqp);
16271626
}
1627+
irdma_cm_disconn(iwqp);
16281628
} else {
16291629
int close_timer_started;
16301630

drivers/infiniband/sw/rxe/rxe_mcast.c

Lines changed: 35 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
3838
}
3939

4040
/**
41-
* rxe_mcast_delete - delete multicast address from rxe device
41+
* rxe_mcast_del - delete multicast address from rxe device
4242
* @rxe: rxe device object
4343
* @mgid: multicast address as a gid
4444
*
4545
* Returns 0 on success else an error
4646
*/
47-
static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
47+
static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
4848
{
4949
unsigned char ll_addr[ETH_ALEN];
5050

@@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
143143
struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
144144
{
145145
struct rxe_mcg *mcg;
146-
unsigned long flags;
147146

148-
spin_lock_irqsave(&rxe->mcg_lock, flags);
147+
spin_lock_bh(&rxe->mcg_lock);
149148
mcg = __rxe_lookup_mcg(rxe, mgid);
150-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
149+
spin_unlock_bh(&rxe->mcg_lock);
151150

152151
return mcg;
153152
}
@@ -159,17 +158,10 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
159158
* @mcg: new mcg object
160159
*
161160
* Context: caller should hold rxe->mcg lock
162-
* Returns: 0 on success else an error
163161
*/
164-
static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
165-
struct rxe_mcg *mcg)
162+
static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
163+
struct rxe_mcg *mcg)
166164
{
167-
int err;
168-
169-
err = rxe_mcast_add(rxe, mgid);
170-
if (unlikely(err))
171-
return err;
172-
173165
kref_init(&mcg->ref_cnt);
174166
memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
175167
INIT_LIST_HEAD(&mcg->qp_list);
@@ -184,8 +176,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
184176
*/
185177
kref_get(&mcg->ref_cnt);
186178
__rxe_insert_mcg(mcg);
187-
188-
return 0;
189179
}
190180

191181
/**
@@ -198,7 +188,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
198188
static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
199189
{
200190
struct rxe_mcg *mcg, *tmp;
201-
unsigned long flags;
202191
int err;
203192

204193
if (rxe->attr.max_mcast_grp == 0)
@@ -209,36 +198,38 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
209198
if (mcg)
210199
return mcg;
211200

201+
/* check to see if we have reached limit */
202+
if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
203+
err = -ENOMEM;
204+
goto err_dec;
205+
}
206+
212207
/* speculative alloc of new mcg */
213208
mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
214209
if (!mcg)
215210
return ERR_PTR(-ENOMEM);
216211

217-
spin_lock_irqsave(&rxe->mcg_lock, flags);
212+
spin_lock_bh(&rxe->mcg_lock);
218213
/* re-check to see if someone else just added it */
219214
tmp = __rxe_lookup_mcg(rxe, mgid);
220215
if (tmp) {
216+
spin_unlock_bh(&rxe->mcg_lock);
217+
atomic_dec(&rxe->mcg_num);
221218
kfree(mcg);
222-
mcg = tmp;
223-
goto out;
219+
return tmp;
224220
}
225221

226-
if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
227-
err = -ENOMEM;
228-
goto err_dec;
229-
}
222+
__rxe_init_mcg(rxe, mgid, mcg);
223+
spin_unlock_bh(&rxe->mcg_lock);
230224

231-
err = __rxe_init_mcg(rxe, mgid, mcg);
232-
if (err)
233-
goto err_dec;
234-
out:
235-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
236-
return mcg;
225+
/* add mcast address outside of lock */
226+
err = rxe_mcast_add(rxe, mgid);
227+
if (!err)
228+
return mcg;
237229

230+
kfree(mcg);
238231
err_dec:
239232
atomic_dec(&rxe->mcg_num);
240-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
241-
kfree(mcg);
242233
return ERR_PTR(err);
243234
}
244235

@@ -268,7 +259,6 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
268259
__rxe_remove_mcg(mcg);
269260
kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
270261

271-
rxe_mcast_delete(mcg->rxe, &mcg->mgid);
272262
atomic_dec(&rxe->mcg_num);
273263
}
274264

@@ -280,11 +270,12 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
280270
*/
281271
static void rxe_destroy_mcg(struct rxe_mcg *mcg)
282272
{
283-
unsigned long flags;
273+
/* delete mcast address outside of lock */
274+
rxe_mcast_del(mcg->rxe, &mcg->mgid);
284275

285-
spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
276+
spin_lock_bh(&mcg->rxe->mcg_lock);
286277
__rxe_destroy_mcg(mcg);
287-
spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
278+
spin_unlock_bh(&mcg->rxe->mcg_lock);
288279
}
289280

290281
/**
@@ -339,25 +330,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
339330
{
340331
struct rxe_dev *rxe = mcg->rxe;
341332
struct rxe_mca *mca, *tmp;
342-
unsigned long flags;
343333
int err;
344334

345335
/* check to see if the qp is already a member of the group */
346-
spin_lock_irqsave(&rxe->mcg_lock, flags);
336+
spin_lock_bh(&rxe->mcg_lock);
347337
list_for_each_entry(mca, &mcg->qp_list, qp_list) {
348338
if (mca->qp == qp) {
349-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
339+
spin_unlock_bh(&rxe->mcg_lock);
350340
return 0;
351341
}
352342
}
353-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
343+
spin_unlock_bh(&rxe->mcg_lock);
354344

355345
/* speculative alloc new mca without using GFP_ATOMIC */
356346
mca = kzalloc(sizeof(*mca), GFP_KERNEL);
357347
if (!mca)
358348
return -ENOMEM;
359349

360-
spin_lock_irqsave(&rxe->mcg_lock, flags);
350+
spin_lock_bh(&rxe->mcg_lock);
361351
/* re-check to see if someone else just attached qp */
362352
list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
363353
if (tmp->qp == qp) {
@@ -371,7 +361,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
371361
if (err)
372362
kfree(mca);
373363
out:
374-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
364+
spin_unlock_bh(&rxe->mcg_lock);
375365
return err;
376366
}
377367

@@ -405,9 +395,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
405395
{
406396
struct rxe_dev *rxe = mcg->rxe;
407397
struct rxe_mca *mca, *tmp;
408-
unsigned long flags;
409398

410-
spin_lock_irqsave(&rxe->mcg_lock, flags);
399+
spin_lock_bh(&rxe->mcg_lock);
411400
list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
412401
if (mca->qp == qp) {
413402
__rxe_cleanup_mca(mca, mcg);
@@ -421,13 +410,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
421410
if (atomic_read(&mcg->qp_num) <= 0)
422411
__rxe_destroy_mcg(mcg);
423412

424-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
413+
spin_unlock_bh(&rxe->mcg_lock);
425414
return 0;
426415
}
427416
}
428417

429418
/* we didn't find the qp on the list */
430-
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
419+
spin_unlock_bh(&rxe->mcg_lock);
431420
return -EINVAL;
432421
}
433422

0 commit comments

Comments
 (0)