Skip to content

Commit 311aa68

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Usual smallish cycle. The NFS biovec work to push it down into RDMA instead of indirecting through a scatterlist is pretty nice to see, been talked about for a long time now. - Various code improvements in irdma, rtrs, qedr, ocrdma, irdma, rxe - Small driver improvements and minor bug fixes to hns, mlx5, rxe, mana, mlx5, irdma - Robusness improvements in completion processing for EFA - New query_port_speed() verb to move past limited IBA defined speed steps - Support for SG_GAPS in rts and many other small improvements - Rare list corruption fix in iwcm - Better support different page sizes in rxe - Device memory support for mana - Direct bio vec to kernel MR for use by NFS-RDMA - QP rate limiting for bnxt_re - Remote triggerable NULL pointer crash in siw - DMA-buf exporter support for RDMA mmaps like doorbells" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (66 commits) RDMA/mlx5: Implement DMABUF export ops RDMA/uverbs: Add DMABUF object type and operations RDMA/uverbs: Support external FD uobjects RDMA/siw: Fix potential NULL pointer dereference in header processing RDMA/umad: Reject negative data_len in ib_umad_write IB/core: Extend rate limit support for RC QPs RDMA/mlx5: Support rate limit only for Raw Packet QP RDMA/bnxt_re: Report QP rate limit in debugfs RDMA/bnxt_re: Report packet pacing capabilities when querying device RDMA/bnxt_re: Add support for QP rate limiting MAINTAINERS: Drop RDMA files from Hyper-V section RDMA/uverbs: Add __GFP_NOWARN to ib_uverbs_unmarshall_recv() kmalloc svcrdma: use bvec-based RDMA read/write API RDMA/core: add rdma_rw_max_sge() helper for SQ sizing RDMA/core: add MR support for bvec-based RDMA operations RDMA/core: use IOVA-based DMA mapping for bvec RDMA operations RDMA/core: add bio_vec based RDMA read/write API RDMA/irdma: Use kvzalloc for paged memory DMA address array RDMA/rxe: Fix race condition in QP timer handlers RDMA/mana_ib: Add device‑memory support ...
2 parents e812928 + d6c58f4 commit 311aa68

77 files changed

Lines changed: 2645 additions & 719 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

MAINTAINERS

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11842,7 +11842,6 @@ F: arch/x86/kernel/cpu/mshyperv.c
1184211842
F: drivers/clocksource/hyperv_timer.c
1184311843
F: drivers/hid/hid-hyperv.c
1184411844
F: drivers/hv/
11845-
F: drivers/infiniband/hw/mana/
1184611845
F: drivers/input/serio/hyperv-keyboard.c
1184711846
F: drivers/iommu/hyperv-iommu.c
1184811847
F: drivers/net/ethernet/microsoft/
@@ -11861,7 +11860,6 @@ F: include/hyperv/hvhdk_mini.h
1186111860
F: include/linux/hyperv.h
1186211861
F: include/net/mana
1186311862
F: include/uapi/linux/hyperv.h
11864-
F: include/uapi/rdma/mana-abi.h
1186511863
F: net/vmw_vsock/hyperv_transport.c
1186611864
F: tools/hv/
1186711865

@@ -17468,6 +17466,7 @@ MICROSOFT MANA RDMA DRIVER
1746817466
M: Long Li <longli@microsoft.com>
1746917467
M: Konstantin Taranov <kotaranov@microsoft.com>
1747017468
L: linux-rdma@vger.kernel.org
17469+
L: linux-hyperv@vger.kernel.org
1747117470
S: Supported
1747217471
F: drivers/infiniband/hw/mana/
1747317472
F: include/net/mana

drivers/infiniband/core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ ib_umad-y := user_mad.o
3333
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
3434
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
3535
uverbs_std_types_cq.o \
36+
uverbs_std_types_dmabuf.o \
3637
uverbs_std_types_dmah.o \
3738
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
3839
uverbs_std_types_mr.o uverbs_std_types_counters.o \

drivers/infiniband/core/cache.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1537,7 +1537,8 @@ static void ib_cache_event_task(struct work_struct *_work)
15371537
* the cache.
15381538
*/
15391539
ret = ib_cache_update(work->event.device, work->event.element.port_num,
1540-
work->event.event == IB_EVENT_GID_CHANGE,
1540+
work->event.event == IB_EVENT_GID_CHANGE ||
1541+
work->event.event == IB_EVENT_CLIENT_REREGISTER,
15411542
work->event.event == IB_EVENT_PKEY_CHANGE,
15421543
work->enforce_security);
15431544

drivers/infiniband/core/device.c

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -361,34 +361,6 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
361361
return NULL;
362362
}
363363

364-
/**
365-
* ib_device_get_by_name - Find an IB device by name
366-
* @name: The name to look for
367-
* @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
368-
*
369-
* Find and hold an ib_device by its name. The caller must call
370-
* ib_device_put() on the returned pointer.
371-
*/
372-
struct ib_device *ib_device_get_by_name(const char *name,
373-
enum rdma_driver_id driver_id)
374-
{
375-
struct ib_device *device;
376-
377-
down_read(&devices_rwsem);
378-
device = __ib_device_get_by_name(name);
379-
if (device && driver_id != RDMA_DRIVER_UNKNOWN &&
380-
device->ops.driver_id != driver_id)
381-
device = NULL;
382-
383-
if (device) {
384-
if (!ib_device_try_get(device))
385-
device = NULL;
386-
}
387-
up_read(&devices_rwsem);
388-
return device;
389-
}
390-
EXPORT_SYMBOL(ib_device_get_by_name);
391-
392364
static int rename_compat_devs(struct ib_device *device)
393365
{
394366
struct ib_core_device *cdev;
@@ -2793,6 +2765,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
27932765
SET_DEVICE_OP(dev_ops, map_mr_sg);
27942766
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
27952767
SET_DEVICE_OP(dev_ops, mmap);
2768+
SET_DEVICE_OP(dev_ops, mmap_get_pfns);
27962769
SET_DEVICE_OP(dev_ops, mmap_free);
27972770
SET_DEVICE_OP(dev_ops, modify_ah);
27982771
SET_DEVICE_OP(dev_ops, modify_cq);
@@ -2803,6 +2776,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
28032776
SET_DEVICE_OP(dev_ops, modify_srq);
28042777
SET_DEVICE_OP(dev_ops, modify_wq);
28052778
SET_DEVICE_OP(dev_ops, peek_cq);
2779+
SET_DEVICE_OP(dev_ops, pgoff_to_mmap_entry);
28062780
SET_DEVICE_OP(dev_ops, pre_destroy_cq);
28072781
SET_DEVICE_OP(dev_ops, poll_cq);
28082782
SET_DEVICE_OP(dev_ops, port_groups);
@@ -2816,6 +2790,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
28162790
SET_DEVICE_OP(dev_ops, query_gid);
28172791
SET_DEVICE_OP(dev_ops, query_pkey);
28182792
SET_DEVICE_OP(dev_ops, query_port);
2793+
SET_DEVICE_OP(dev_ops, query_port_speed);
28192794
SET_DEVICE_OP(dev_ops, query_qp);
28202795
SET_DEVICE_OP(dev_ops, query_srq);
28212796
SET_DEVICE_OP(dev_ops, query_ucontext);
@@ -2875,7 +2850,6 @@ int ib_add_sub_device(struct ib_device *parent,
28752850

28762851
return ret;
28772852
}
2878-
EXPORT_SYMBOL(ib_add_sub_device);
28792853

28802854
int ib_del_sub_device_and_put(struct ib_device *sub)
28812855
{
@@ -2896,7 +2870,6 @@ int ib_del_sub_device_and_put(struct ib_device *sub)
28962870

28972871
return 0;
28982872
}
2899-
EXPORT_SYMBOL(ib_del_sub_device_and_put);
29002873

29012874
#ifdef CONFIG_INFINIBAND_VIRT_DMA
29022875
int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)

drivers/infiniband/core/ib_core_uverbs.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55
* Copyright 2019 Marvell. All rights reserved.
66
*/
77
#include <linux/xarray.h>
8+
#include <linux/dma-buf.h>
9+
#include <linux/dma-resv.h>
810
#include "uverbs.h"
911
#include "core_priv.h"
1012

13+
MODULE_IMPORT_NS("DMA_BUF");
14+
1115
/**
1216
* rdma_umap_priv_init() - Initialize the private data of a vma
1317
*
@@ -229,12 +233,29 @@ EXPORT_SYMBOL(rdma_user_mmap_entry_put);
229233
*/
230234
void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
231235
{
236+
struct ib_uverbs_dmabuf_file *uverbs_dmabuf, *tmp;
237+
232238
if (!entry)
233239
return;
234240

241+
mutex_lock(&entry->dmabufs_lock);
235242
xa_lock(&entry->ucontext->mmap_xa);
236243
entry->driver_removed = true;
237244
xa_unlock(&entry->ucontext->mmap_xa);
245+
list_for_each_entry_safe(uverbs_dmabuf, tmp, &entry->dmabufs, dmabufs_elm) {
246+
dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
247+
list_del(&uverbs_dmabuf->dmabufs_elm);
248+
uverbs_dmabuf->revoked = true;
249+
dma_buf_move_notify(uverbs_dmabuf->dmabuf);
250+
dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
251+
DMA_RESV_USAGE_BOOKKEEP, false,
252+
MAX_SCHEDULE_TIMEOUT);
253+
dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
254+
kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
255+
wait_for_completion(&uverbs_dmabuf->comp);
256+
}
257+
mutex_unlock(&entry->dmabufs_lock);
258+
238259
kref_put(&entry->ref, rdma_user_mmap_entry_free);
239260
}
240261
EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
@@ -274,6 +295,9 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
274295
return -EINVAL;
275296

276297
kref_init(&entry->ref);
298+
INIT_LIST_HEAD(&entry->dmabufs);
299+
mutex_init(&entry->dmabufs_lock);
300+
277301
entry->ucontext = ucontext;
278302

279303
/*

drivers/infiniband/core/iwcm.c

Lines changed: 21 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ static struct workqueue_struct *iwcm_wq;
9595
struct iwcm_work {
9696
struct work_struct work;
9797
struct iwcm_id_private *cm_id;
98-
struct list_head list;
9998
struct iw_cm_event event;
10099
struct list_head free_list;
101100
};
@@ -178,7 +177,6 @@ static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
178177
return -ENOMEM;
179178
}
180179
work->cm_id = cm_id_priv;
181-
INIT_LIST_HEAD(&work->list);
182180
put_work(work);
183181
}
184182
return 0;
@@ -213,7 +211,6 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
213211
static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
214212
{
215213
if (refcount_dec_and_test(&cm_id_priv->refcount)) {
216-
BUG_ON(!list_empty(&cm_id_priv->work_list));
217214
free_cm_id(cm_id_priv);
218215
return true;
219216
}
@@ -260,7 +257,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
260257
refcount_set(&cm_id_priv->refcount, 1);
261258
init_waitqueue_head(&cm_id_priv->connect_wait);
262259
init_completion(&cm_id_priv->destroy_comp);
263-
INIT_LIST_HEAD(&cm_id_priv->work_list);
264260
INIT_LIST_HEAD(&cm_id_priv->work_free_list);
265261

266262
return &cm_id_priv->id;
@@ -1007,13 +1003,13 @@ static int process_event(struct iwcm_id_private *cm_id_priv,
10071003
}
10081004

10091005
/*
1010-
* Process events on the work_list for the cm_id. If the callback
1011-
* function requests that the cm_id be deleted, a flag is set in the
1012-
* cm_id flags to indicate that when the last reference is
1013-
* removed, the cm_id is to be destroyed. This is necessary to
1014-
* distinguish between an object that will be destroyed by the app
1015-
* thread asleep on the destroy_comp list vs. an object destroyed
1016-
* here synchronously when the last reference is removed.
1006+
* Process events for the cm_id. If the callback function requests
1007+
* that the cm_id be deleted, a flag is set in the cm_id flags to
1008+
* indicate that when the last reference is removed, the cm_id is
1009+
* to be destroyed. This is necessary to distinguish between an
1010+
* object that will be destroyed by the app thread asleep on the
1011+
* destroy_comp list vs. an object destroyed here synchronously
1012+
* when the last reference is removed.
10171013
*/
10181014
static void cm_work_handler(struct work_struct *_work)
10191015
{
@@ -1024,35 +1020,26 @@ static void cm_work_handler(struct work_struct *_work)
10241020
int ret = 0;
10251021

10261022
spin_lock_irqsave(&cm_id_priv->lock, flags);
1027-
while (!list_empty(&cm_id_priv->work_list)) {
1028-
work = list_first_entry(&cm_id_priv->work_list,
1029-
struct iwcm_work, list);
1030-
list_del_init(&work->list);
1031-
levent = work->event;
1032-
put_work(work);
1033-
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1034-
1035-
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
1036-
ret = process_event(cm_id_priv, &levent);
1037-
if (ret) {
1038-
destroy_cm_id(&cm_id_priv->id);
1039-
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
1040-
}
1041-
} else
1042-
pr_debug("dropping event %d\n", levent.event);
1043-
if (iwcm_deref_id(cm_id_priv))
1044-
return;
1045-
spin_lock_irqsave(&cm_id_priv->lock, flags);
1046-
}
1023+
levent = work->event;
1024+
put_work(work);
10471025
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1026+
1027+
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
1028+
ret = process_event(cm_id_priv, &levent);
1029+
if (ret) {
1030+
destroy_cm_id(&cm_id_priv->id);
1031+
WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
1032+
}
1033+
} else
1034+
pr_debug("dropping event %d\n", levent.event);
1035+
if (iwcm_deref_id(cm_id_priv))
1036+
return;
10481037
}
10491038

10501039
/*
10511040
* This function is called on interrupt context. Schedule events on
10521041
* the iwcm_wq thread to allow callback functions to downcall into
1053-
* the CM and/or block. Events are queued to a per-CM_ID
1054-
* work_list. If this is the first event on the work_list, the work
1055-
* element is also queued on the iwcm_wq thread.
1042+
* the CM and/or block.
10561043
*
10571044
* Each event holds a reference on the cm_id. Until the last posted
10581045
* event has been delivered and processed, the cm_id cannot be
@@ -1094,7 +1081,6 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
10941081
}
10951082

10961083
refcount_inc(&cm_id_priv->refcount);
1097-
list_add_tail(&work->list, &cm_id_priv->work_list);
10981084
queue_work(iwcm_wq, &work->work);
10991085
out:
11001086
spin_unlock_irqrestore(&cm_id_priv->lock, flags);

drivers/infiniband/core/iwcm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ struct iwcm_id_private {
5050
struct ib_qp *qp;
5151
struct completion destroy_comp;
5252
wait_queue_head_t connect_wait;
53-
struct list_head work_list;
5453
spinlock_t lock;
5554
refcount_t refcount;
5655
struct list_head work_free_list;

drivers/infiniband/core/rdma_core.c

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
465465

466466
fd_type =
467467
container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
468-
if (WARN_ON(fd_type->fops->release != &uverbs_uobject_fd_release &&
468+
if (WARN_ON(fd_type->fops && fd_type->fops->release != &uverbs_uobject_fd_release &&
469469
fd_type->fops->release != &uverbs_async_event_release)) {
470470
ret = ERR_PTR(-EINVAL);
471471
goto err_fd;
@@ -477,14 +477,16 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
477477
goto err_fd;
478478
}
479479

480-
/* Note that uverbs_uobject_fd_release() is called during abort */
481-
filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
482-
fd_type->flags);
483-
if (IS_ERR(filp)) {
484-
ret = ERR_CAST(filp);
485-
goto err_getfile;
480+
if (fd_type->fops) {
481+
/* Note that uverbs_uobject_fd_release() is called during abort */
482+
filp = anon_inode_getfile(fd_type->name, fd_type->fops, NULL,
483+
fd_type->flags);
484+
if (IS_ERR(filp)) {
485+
ret = ERR_CAST(filp);
486+
goto err_getfile;
487+
}
488+
uobj->object = filp;
486489
}
487-
uobj->object = filp;
488490

489491
uobj->id = new_fd;
490492
return uobj;
@@ -561,7 +563,9 @@ static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
561563
{
562564
struct file *filp = uobj->object;
563565

564-
fput(filp);
566+
if (filp)
567+
fput(filp);
568+
565569
put_unused_fd(uobj->id);
566570
}
567571

@@ -628,11 +632,14 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
628632
/* This shouldn't be used anymore. Use the file object instead */
629633
uobj->id = 0;
630634

631-
/*
632-
* NOTE: Once we install the file we loose ownership of our kref on
633-
* uobj. It will be put by uverbs_uobject_fd_release()
634-
*/
635-
filp->private_data = uobj;
635+
if (!filp->private_data) {
636+
/*
637+
* NOTE: Once we install the file we loose ownership of our kref on
638+
* uobj. It will be put by uverbs_uobject_fd_release()
639+
*/
640+
filp->private_data = uobj;
641+
}
642+
636643
fd_install(fd, filp);
637644
}
638645

@@ -802,21 +809,10 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
802809
};
803810
EXPORT_SYMBOL(uverbs_idr_class);
804811

805-
/*
806-
* Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
807-
* file_operations release method.
808-
*/
809-
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
812+
int uverbs_uobject_release(struct ib_uobject *uobj)
810813
{
811814
struct ib_uverbs_file *ufile;
812-
struct ib_uobject *uobj;
813815

814-
/*
815-
* This can only happen if the fput came from alloc_abort_fd_uobject()
816-
*/
817-
if (!filp->private_data)
818-
return 0;
819-
uobj = filp->private_data;
820816
ufile = uobj->ufile;
821817

822818
if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
@@ -843,6 +839,21 @@ int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
843839
uverbs_uobject_put(uobj);
844840
return 0;
845841
}
842+
843+
/*
844+
* Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
845+
* file_operations release method.
846+
*/
847+
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
848+
{
849+
/*
850+
* This can only happen if the fput came from alloc_abort_fd_uobject()
851+
*/
852+
if (!filp->private_data)
853+
return 0;
854+
855+
return uverbs_uobject_release(filp->private_data);
856+
}
846857
EXPORT_SYMBOL(uverbs_uobject_fd_release);
847858

848859
/*

0 commit comments

Comments
 (0)