Skip to content

Commit 8cbe71e

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe: "A fairly modest set of bug fixes, nothing abnormal from the merge window The ucma patch is a bit on the larger side, but given the regression was recently added I've opted to forward it to the rc stream. - Fix a ucma memory leak introduced in v5.9 while fixing the Syzkaller bugs - Don't fail when the xarray wraps for user verbs objects - User triggerable oops regression from the umem page size rework - Error unwind bugs in usnic, ocrdma, mlx5 and cma" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/cma: Fix error flow in default_roce_mode_store RDMA/mlx5: Fix wrong free of blue flame register on error IB/mlx5: Fix error unwinding when set_has_smi_cap fails RDMA/umem: Avoid undefined behavior of rounddown_pow_of_two() RDMA/ocrdma: Fix use after free in ocrdma_dealloc_ucontext_pd() RDMA/usnic: Fix memleak in find_free_vf_and_create_qp_grp RDMA/restrack: Don't treat as an error allocation ID wrapping RDMA/ucma: Do not miss ctx destruction steps in some cases
2 parents 0bc9bc1 + 7c7b3e5 commit 8cbe71e

7 files changed

Lines changed: 83 additions & 68 deletions

File tree

drivers/infiniband/core/cma_configfs.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,10 @@ static ssize_t default_roce_mode_store(struct config_item *item,
131131
return ret;
132132

133133
gid_type = ib_cache_gid_parse_type_str(buf);
134-
if (gid_type < 0)
134+
if (gid_type < 0) {
135+
cma_configfs_params_put(cma_dev);
135136
return -EINVAL;
137+
}
136138

137139
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
138140

drivers/infiniband/core/restrack.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
254254
} else {
255255
ret = xa_alloc_cyclic(&rt->xa, &res->id, res, xa_limit_32b,
256256
&rt->next_id, GFP_KERNEL);
257+
ret = (ret < 0) ? ret : 0;
257258
}
258259

259260
out:

drivers/infiniband/core/ucma.c

Lines changed: 72 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,6 @@ struct ucma_context {
9595
u64 uid;
9696

9797
struct list_head list;
98-
/* sync between removal event and id destroy, protected by file mut */
99-
int destroying;
10098
struct work_struct close_work;
10199
};
102100

@@ -122,7 +120,7 @@ static DEFINE_XARRAY_ALLOC(ctx_table);
122120
static DEFINE_XARRAY_ALLOC(multicast_table);
123121

124122
static const struct file_operations ucma_fops;
125-
static int __destroy_id(struct ucma_context *ctx);
123+
static int ucma_destroy_private_ctx(struct ucma_context *ctx);
126124

127125
static inline struct ucma_context *_ucma_find_context(int id,
128126
struct ucma_file *file)
@@ -179,19 +177,14 @@ static void ucma_close_id(struct work_struct *work)
179177

180178
/* once all inflight tasks are finished, we close all underlying
181179
* resources. The context is still alive till its explicit destryoing
182-
* by its creator.
180+
* by its creator. This puts back the xarray's reference.
183181
*/
184182
ucma_put_ctx(ctx);
185183
wait_for_completion(&ctx->comp);
186184
/* No new events will be generated after destroying the id. */
187185
rdma_destroy_id(ctx->cm_id);
188186

189-
/*
190-
* At this point ctx->ref is zero so the only place the ctx can be is in
191-
* a uevent or in __destroy_id(). Since the former doesn't touch
192-
* ctx->cm_id and the latter sync cancels this, there is no races with
193-
* this store.
194-
*/
187+
/* Reading the cm_id without holding a positive ref is not allowed */
195188
ctx->cm_id = NULL;
196189
}
197190

@@ -204,7 +197,6 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
204197
return NULL;
205198

206199
INIT_WORK(&ctx->close_work, ucma_close_id);
207-
refcount_set(&ctx->ref, 1);
208200
init_completion(&ctx->comp);
209201
/* So list_del() will work if we don't do ucma_finish_ctx() */
210202
INIT_LIST_HEAD(&ctx->list);
@@ -218,6 +210,13 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)
218210
return ctx;
219211
}
220212

213+
static void ucma_set_ctx_cm_id(struct ucma_context *ctx,
214+
struct rdma_cm_id *cm_id)
215+
{
216+
refcount_set(&ctx->ref, 1);
217+
ctx->cm_id = cm_id;
218+
}
219+
221220
static void ucma_finish_ctx(struct ucma_context *ctx)
222221
{
223222
lockdep_assert_held(&ctx->file->mut);
@@ -303,7 +302,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
303302
ctx = ucma_alloc_ctx(listen_ctx->file);
304303
if (!ctx)
305304
goto err_backlog;
306-
ctx->cm_id = cm_id;
305+
ucma_set_ctx_cm_id(ctx, cm_id);
307306

308307
uevent = ucma_create_uevent(listen_ctx, event);
309308
if (!uevent)
@@ -321,8 +320,7 @@ static int ucma_connect_event_handler(struct rdma_cm_id *cm_id,
321320
return 0;
322321

323322
err_alloc:
324-
xa_erase(&ctx_table, ctx->id);
325-
kfree(ctx);
323+
ucma_destroy_private_ctx(ctx);
326324
err_backlog:
327325
atomic_inc(&listen_ctx->backlog);
328326
/* Returning error causes the new ID to be destroyed */
@@ -356,8 +354,12 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
356354
wake_up_interruptible(&ctx->file->poll_wait);
357355
}
358356

359-
if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying)
360-
queue_work(system_unbound_wq, &ctx->close_work);
357+
if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) {
358+
xa_lock(&ctx_table);
359+
if (xa_load(&ctx_table, ctx->id) == ctx)
360+
queue_work(system_unbound_wq, &ctx->close_work);
361+
xa_unlock(&ctx_table);
362+
}
361363
return 0;
362364
}
363365

@@ -461,13 +463,12 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
461463
ret = PTR_ERR(cm_id);
462464
goto err1;
463465
}
464-
ctx->cm_id = cm_id;
466+
ucma_set_ctx_cm_id(ctx, cm_id);
465467

466468
resp.id = ctx->id;
467469
if (copy_to_user(u64_to_user_ptr(cmd.response),
468470
&resp, sizeof(resp))) {
469-
xa_erase(&ctx_table, ctx->id);
470-
__destroy_id(ctx);
471+
ucma_destroy_private_ctx(ctx);
471472
return -EFAULT;
472473
}
473474

@@ -477,8 +478,7 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
477478
return 0;
478479

479480
err1:
480-
xa_erase(&ctx_table, ctx->id);
481-
kfree(ctx);
481+
ucma_destroy_private_ctx(ctx);
482482
return ret;
483483
}
484484

@@ -516,68 +516,73 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
516516
rdma_unlock_handler(mc->ctx->cm_id);
517517
}
518518

519-
/*
520-
* ucma_free_ctx is called after the underlying rdma CM-ID is destroyed. At
521-
* this point, no new events will be reported from the hardware. However, we
522-
* still need to cleanup the UCMA context for this ID. Specifically, there
523-
* might be events that have not yet been consumed by the user space software.
524-
* mutex. After that we release them as needed.
525-
*/
526-
static int ucma_free_ctx(struct ucma_context *ctx)
519+
static int ucma_cleanup_ctx_events(struct ucma_context *ctx)
527520
{
528521
int events_reported;
529522
struct ucma_event *uevent, *tmp;
530523
LIST_HEAD(list);
531524

532-
ucma_cleanup_multicast(ctx);
533-
534-
/* Cleanup events not yet reported to the user. */
525+
/* Cleanup events not yet reported to the user.*/
535526
mutex_lock(&ctx->file->mut);
536527
list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
537-
if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx)
528+
if (uevent->ctx != ctx)
529+
continue;
530+
531+
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
532+
xa_cmpxchg(&ctx_table, uevent->conn_req_ctx->id,
533+
uevent->conn_req_ctx, XA_ZERO_ENTRY,
534+
GFP_KERNEL) == uevent->conn_req_ctx) {
538535
list_move_tail(&uevent->list, &list);
536+
continue;
537+
}
538+
list_del(&uevent->list);
539+
kfree(uevent);
539540
}
540541
list_del(&ctx->list);
541542
events_reported = ctx->events_reported;
542543
mutex_unlock(&ctx->file->mut);
543544

544545
/*
545-
* If this was a listening ID then any connections spawned from it
546-
* that have not been delivered to userspace are cleaned up too.
547-
* Must be done outside any locks.
546+
* If this was a listening ID then any connections spawned from it that
547+
* have not been delivered to userspace are cleaned up too. Must be done
548+
* outside any locks.
548549
*/
549550
list_for_each_entry_safe(uevent, tmp, &list, list) {
550-
list_del(&uevent->list);
551-
if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST &&
552-
uevent->conn_req_ctx != ctx)
553-
__destroy_id(uevent->conn_req_ctx);
551+
ucma_destroy_private_ctx(uevent->conn_req_ctx);
554552
kfree(uevent);
555553
}
556-
557-
mutex_destroy(&ctx->mutex);
558-
kfree(ctx);
559554
return events_reported;
560555
}
561556

562-
static int __destroy_id(struct ucma_context *ctx)
557+
/*
558+
* When this is called the xarray must have a XA_ZERO_ENTRY in the ctx->id (ie
559+
* the ctx is not public to the user). This either because:
560+
* - ucma_finish_ctx() hasn't been called
561+
* - xa_cmpxchg() succeed to remove the entry (only one thread can succeed)
562+
*/
563+
static int ucma_destroy_private_ctx(struct ucma_context *ctx)
563564
{
565+
int events_reported;
566+
564567
/*
565-
* If the refcount is already 0 then ucma_close_id() has already
566-
* destroyed the cm_id, otherwise holding the refcount keeps cm_id
567-
* valid. Prevent queue_work() from being called.
568+
* Destroy the underlying cm_id. New work queuing is prevented now by
569+
* the removal from the xarray. Once the work is cancled ref will either
570+
* be 0 because the work ran to completion and consumed the ref from the
571+
* xarray, or it will be positive because we still have the ref from the
572+
* xarray. This can also be 0 in cases where cm_id was never set
568573
*/
569-
if (refcount_inc_not_zero(&ctx->ref)) {
570-
rdma_lock_handler(ctx->cm_id);
571-
ctx->destroying = 1;
572-
rdma_unlock_handler(ctx->cm_id);
573-
ucma_put_ctx(ctx);
574-
}
575-
576574
cancel_work_sync(&ctx->close_work);
577-
/* At this point it's guaranteed that there is no inflight closing task */
578-
if (ctx->cm_id)
575+
if (refcount_read(&ctx->ref))
579576
ucma_close_id(&ctx->close_work);
580-
return ucma_free_ctx(ctx);
577+
578+
events_reported = ucma_cleanup_ctx_events(ctx);
579+
ucma_cleanup_multicast(ctx);
580+
581+
WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, XA_ZERO_ENTRY, NULL,
582+
GFP_KERNEL) != NULL);
583+
mutex_destroy(&ctx->mutex);
584+
kfree(ctx);
585+
return events_reported;
581586
}
582587

583588
static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
@@ -596,14 +601,17 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
596601

597602
xa_lock(&ctx_table);
598603
ctx = _ucma_find_context(cmd.id, file);
599-
if (!IS_ERR(ctx))
600-
__xa_erase(&ctx_table, ctx->id);
604+
if (!IS_ERR(ctx)) {
605+
if (__xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
606+
GFP_KERNEL) != ctx)
607+
ctx = ERR_PTR(-ENOENT);
608+
}
601609
xa_unlock(&ctx_table);
602610

603611
if (IS_ERR(ctx))
604612
return PTR_ERR(ctx);
605613

606-
resp.events_reported = __destroy_id(ctx);
614+
resp.events_reported = ucma_destroy_private_ctx(ctx);
607615
if (copy_to_user(u64_to_user_ptr(cmd.response),
608616
&resp, sizeof(resp)))
609617
ret = -EFAULT;
@@ -1777,15 +1785,16 @@ static int ucma_close(struct inode *inode, struct file *filp)
17771785
* prevented by this being a FD release function. The list_add_tail() in
17781786
* ucma_connect_event_handler() can run concurrently, however it only
17791787
* adds to the list *after* a listening ID. By only reading the first of
1780-
* the list, and relying on __destroy_id() to block
1788+
* the list, and relying on ucma_destroy_private_ctx() to block
17811789
* ucma_connect_event_handler(), no additional locking is needed.
17821790
*/
17831791
while (!list_empty(&file->ctx_list)) {
17841792
struct ucma_context *ctx = list_first_entry(
17851793
&file->ctx_list, struct ucma_context, list);
17861794

1787-
xa_erase(&ctx_table, ctx->id);
1788-
__destroy_id(ctx);
1795+
WARN_ON(xa_cmpxchg(&ctx_table, ctx->id, ctx, XA_ZERO_ENTRY,
1796+
GFP_KERNEL) != ctx);
1797+
ucma_destroy_private_ctx(ctx);
17891798
}
17901799
kfree(file);
17911800
return 0;

drivers/infiniband/core/umem.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
135135
*/
136136
if (mask)
137137
pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
138-
return rounddown_pow_of_two(pgsz_bitmap);
138+
return pgsz_bitmap ? rounddown_pow_of_two(pgsz_bitmap) : 0;
139139
}
140140
EXPORT_SYMBOL(ib_umem_find_best_pgsz);
141141

drivers/infiniband/hw/mlx5/main.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3956,7 +3956,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
39563956

39573957
err = set_has_smi_cap(dev);
39583958
if (err)
3959-
return err;
3959+
goto err_mp;
39603960

39613961
if (!mlx5_core_mp_enabled(mdev)) {
39623962
for (i = 1; i <= dev->num_ports; i++) {
@@ -4319,7 +4319,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
43194319

43204320
err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
43214321
if (err)
4322-
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
4322+
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
43234323

43244324
return err;
43254325
}

drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
434434
pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
435435
__func__, dev->id, pd->id);
436436
}
437-
kfree(uctx->cntxt_pd);
438437
uctx->cntxt_pd = NULL;
439438
_ocrdma_dealloc_pd(dev, pd);
439+
kfree(pd);
440440
}
441441

442442
static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)

drivers/infiniband/hw/usnic/usnic_ib_verbs.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
214214

215215
}
216216
usnic_uiom_free_dev_list(dev_list);
217+
dev_list = NULL;
217218
}
218219

219220
/* Try to find resources on an unused vf */
@@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
239240
qp_grp_check:
240241
if (IS_ERR_OR_NULL(qp_grp)) {
241242
usnic_err("Failed to allocate qp_grp\n");
243+
if (usnic_ib_share_vf)
244+
usnic_uiom_free_dev_list(dev_list);
242245
return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
243246
}
244247
return qp_grp;

0 commit comments

Comments
 (0)