Skip to content

Commit 0ac6f40

Browse files
yishaihrleon
authored andcommitted
RDMA/uverbs: Add DMABUF object type and operations
Expose DMABUF functionality to userspace through the uverbs interface, enabling InfiniBand/RDMA devices to export PCI based memory regions (e.g. device memory) as DMABUF file descriptors. This allows zero-copy sharing of RDMA memory with other subsystems that support the dma-buf framework. A new UVERBS_OBJECT_DMABUF object type and allocation method were introduced. During allocation, uverbs invokes the driver to supply the rdma_user_mmap_entry associated with the given page offset (pgoff). Based on the returned rdma_user_mmap_entry, uverbs requests the driver to provide the corresponding physical-memory details as well as the driver’s PCI provider information. Using this information, dma_buf_export() is called; if it succeeds, uobj->object is set to the underlying file pointer returned by the dma-buf framework. The file descriptor number follows the standard uverbs allocation flow, but the file pointer comes from the dma-buf subsystem, including its own fops and private data. When an mmap entry is removed, uverbs iterates over its associated DMABUFs, marks them as revoked, and calls dma_buf_move_notify() so that their importers are notified. The same procedure applies during the disassociate flow; final cleanup occurs when the application closes the file. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Signed-off-by: Edward Srouji <edwards@nvidia.com> Link: https://patch.msgid.link/20260201-dmabuf-export-v3-2-da238b614fe3@nvidia.com Signed-off-by: Leon Romanovsky <leon@kernel.org>
1 parent 9ad95a0 commit 0ac6f40

11 files changed

Lines changed: 286 additions & 12 deletions

File tree

drivers/infiniband/core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ ib_umad-y := user_mad.o
3333
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
3434
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
3535
uverbs_std_types_cq.o \
36+
uverbs_std_types_dmabuf.o \
3637
uverbs_std_types_dmah.o \
3738
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
3839
uverbs_std_types_mr.o uverbs_std_types_counters.o \

drivers/infiniband/core/device.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2765,6 +2765,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
27652765
SET_DEVICE_OP(dev_ops, map_mr_sg);
27662766
SET_DEVICE_OP(dev_ops, map_mr_sg_pi);
27672767
SET_DEVICE_OP(dev_ops, mmap);
2768+
SET_DEVICE_OP(dev_ops, mmap_get_pfns);
27682769
SET_DEVICE_OP(dev_ops, mmap_free);
27692770
SET_DEVICE_OP(dev_ops, modify_ah);
27702771
SET_DEVICE_OP(dev_ops, modify_cq);
@@ -2775,6 +2776,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
27752776
SET_DEVICE_OP(dev_ops, modify_srq);
27762777
SET_DEVICE_OP(dev_ops, modify_wq);
27772778
SET_DEVICE_OP(dev_ops, peek_cq);
2779+
SET_DEVICE_OP(dev_ops, pgoff_to_mmap_entry);
27782780
SET_DEVICE_OP(dev_ops, pre_destroy_cq);
27792781
SET_DEVICE_OP(dev_ops, poll_cq);
27802782
SET_DEVICE_OP(dev_ops, port_groups);

drivers/infiniband/core/ib_core_uverbs.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@
55
* Copyright 2019 Marvell. All rights reserved.
66
*/
77
#include <linux/xarray.h>
8+
#include <linux/dma-buf.h>
9+
#include <linux/dma-resv.h>
810
#include "uverbs.h"
911
#include "core_priv.h"
1012

13+
MODULE_IMPORT_NS("DMA_BUF");
14+
1115
/**
1216
* rdma_umap_priv_init() - Initialize the private data of a vma
1317
*
@@ -229,12 +233,29 @@ EXPORT_SYMBOL(rdma_user_mmap_entry_put);
229233
*/
230234
void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
231235
{
236+
struct ib_uverbs_dmabuf_file *uverbs_dmabuf, *tmp;
237+
232238
if (!entry)
233239
return;
234240

241+
mutex_lock(&entry->dmabufs_lock);
235242
xa_lock(&entry->ucontext->mmap_xa);
236243
entry->driver_removed = true;
237244
xa_unlock(&entry->ucontext->mmap_xa);
245+
list_for_each_entry_safe(uverbs_dmabuf, tmp, &entry->dmabufs, dmabufs_elm) {
246+
dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
247+
list_del(&uverbs_dmabuf->dmabufs_elm);
248+
uverbs_dmabuf->revoked = true;
249+
dma_buf_move_notify(uverbs_dmabuf->dmabuf);
250+
dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
251+
DMA_RESV_USAGE_BOOKKEEP, false,
252+
MAX_SCHEDULE_TIMEOUT);
253+
dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
254+
kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
255+
wait_for_completion(&uverbs_dmabuf->comp);
256+
}
257+
mutex_unlock(&entry->dmabufs_lock);
258+
238259
kref_put(&entry->ref, rdma_user_mmap_entry_free);
239260
}
240261
EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
@@ -274,6 +295,9 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
274295
return -EINVAL;
275296

276297
kref_init(&entry->ref);
298+
INIT_LIST_HEAD(&entry->dmabufs);
299+
mutex_init(&entry->dmabufs_lock);
300+
277301
entry->ucontext = ucontext;
278302

279303
/*

drivers/infiniband/core/rdma_core.c

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -809,21 +809,10 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
809809
};
810810
EXPORT_SYMBOL(uverbs_idr_class);
811811

812-
/*
813-
* Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
814-
* file_operations release method.
815-
*/
816-
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
812+
int uverbs_uobject_release(struct ib_uobject *uobj)
817813
{
818814
struct ib_uverbs_file *ufile;
819-
struct ib_uobject *uobj;
820815

821-
/*
822-
* This can only happen if the fput came from alloc_abort_fd_uobject()
823-
*/
824-
if (!filp->private_data)
825-
return 0;
826-
uobj = filp->private_data;
827816
ufile = uobj->ufile;
828817

829818
if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
@@ -850,6 +839,21 @@ int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
850839
uverbs_uobject_put(uobj);
851840
return 0;
852841
}
842+
843+
/*
844+
* Users of UVERBS_TYPE_ALLOC_FD should set this function as the struct
845+
* file_operations release method.
846+
*/
847+
int uverbs_uobject_fd_release(struct inode *inode, struct file *filp)
848+
{
849+
/*
850+
* This can only happen if the fput came from alloc_abort_fd_uobject()
851+
*/
852+
if (!filp->private_data)
853+
return 0;
854+
855+
return uverbs_uobject_release(filp->private_data);
856+
}
853857
EXPORT_SYMBOL(uverbs_uobject_fd_release);
854858

855859
/*

drivers/infiniband/core/rdma_core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ extern const struct uapi_definition uverbs_def_obj_counters[];
156156
extern const struct uapi_definition uverbs_def_obj_cq[];
157157
extern const struct uapi_definition uverbs_def_obj_device[];
158158
extern const struct uapi_definition uverbs_def_obj_dm[];
159+
extern const struct uapi_definition uverbs_def_obj_dmabuf[];
159160
extern const struct uapi_definition uverbs_def_obj_dmah[];
160161
extern const struct uapi_definition uverbs_def_obj_flow_action[];
161162
extern const struct uapi_definition uverbs_def_obj_intf[];

drivers/infiniband/core/uverbs.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,18 @@ struct ib_uverbs_completion_event_file {
133133
struct ib_uverbs_event_queue ev_queue;
134134
};
135135

136+
struct ib_uverbs_dmabuf_file {
137+
struct ib_uobject uobj;
138+
struct dma_buf *dmabuf;
139+
struct list_head dmabufs_elm;
140+
struct rdma_user_mmap_entry *mmap_entry;
141+
struct phys_vec phys_vec;
142+
struct p2pdma_provider *provider;
143+
struct kref kref;
144+
struct completion comp;
145+
u8 revoked :1;
146+
};
147+
136148
struct ib_uverbs_event {
137149
union {
138150
struct ib_uverbs_async_event_desc async;
@@ -290,4 +302,13 @@ ib_uverbs_get_async_event(struct uverbs_attr_bundle *attrs,
290302
void copy_port_attr_to_resp(struct ib_port_attr *attr,
291303
struct ib_uverbs_query_port_resp *resp,
292304
struct ib_device *ib_dev, u8 port_num);
305+
306+
static inline void ib_uverbs_dmabuf_done(struct kref *kref)
307+
{
308+
struct ib_uverbs_dmabuf_file *priv =
309+
container_of(kref, struct ib_uverbs_dmabuf_file, kref);
310+
311+
complete(&priv->comp);
312+
}
313+
293314
#endif /* UVERBS_H */
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2+
/*
3+
* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4+
*/
5+
6+
#include <linux/dma-buf-mapping.h>
7+
#include <linux/pci-p2pdma.h>
8+
#include <linux/dma-resv.h>
9+
#include <rdma/uverbs_std_types.h>
10+
#include "rdma_core.h"
11+
#include "uverbs.h"
12+
13+
static int uverbs_dmabuf_attach(struct dma_buf *dmabuf,
14+
struct dma_buf_attachment *attachment)
15+
{
16+
if (!attachment->peer2peer)
17+
return -EOPNOTSUPP;
18+
19+
return 0;
20+
}
21+
22+
static struct sg_table *
23+
uverbs_dmabuf_map(struct dma_buf_attachment *attachment,
24+
enum dma_data_direction dir)
25+
{
26+
struct ib_uverbs_dmabuf_file *priv = attachment->dmabuf->priv;
27+
struct sg_table *ret;
28+
29+
dma_resv_assert_held(priv->dmabuf->resv);
30+
31+
if (priv->revoked)
32+
return ERR_PTR(-ENODEV);
33+
34+
ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
35+
&priv->phys_vec, 1, priv->phys_vec.len,
36+
dir);
37+
if (IS_ERR(ret))
38+
return ret;
39+
40+
kref_get(&priv->kref);
41+
return ret;
42+
}
43+
44+
static void uverbs_dmabuf_unmap(struct dma_buf_attachment *attachment,
45+
struct sg_table *sgt,
46+
enum dma_data_direction dir)
47+
{
48+
struct ib_uverbs_dmabuf_file *priv = attachment->dmabuf->priv;
49+
50+
dma_resv_assert_held(priv->dmabuf->resv);
51+
dma_buf_free_sgt(attachment, sgt, dir);
52+
kref_put(&priv->kref, ib_uverbs_dmabuf_done);
53+
}
54+
55+
static int uverbs_dmabuf_pin(struct dma_buf_attachment *attach)
56+
{
57+
return -EOPNOTSUPP;
58+
}
59+
60+
static void uverbs_dmabuf_unpin(struct dma_buf_attachment *attach)
61+
{
62+
}
63+
64+
static void uverbs_dmabuf_release(struct dma_buf *dmabuf)
65+
{
66+
struct ib_uverbs_dmabuf_file *priv = dmabuf->priv;
67+
68+
/*
69+
* This can only happen if the fput came from alloc_abort_fd_uobject()
70+
*/
71+
if (!priv->uobj.context)
72+
return;
73+
74+
uverbs_uobject_release(&priv->uobj);
75+
}
76+
77+
static const struct dma_buf_ops uverbs_dmabuf_ops = {
78+
.attach = uverbs_dmabuf_attach,
79+
.map_dma_buf = uverbs_dmabuf_map,
80+
.unmap_dma_buf = uverbs_dmabuf_unmap,
81+
.pin = uverbs_dmabuf_pin,
82+
.unpin = uverbs_dmabuf_unpin,
83+
.release = uverbs_dmabuf_release,
84+
};
85+
86+
static int UVERBS_HANDLER(UVERBS_METHOD_DMABUF_ALLOC)(
87+
struct uverbs_attr_bundle *attrs)
88+
{
89+
struct ib_uobject *uobj =
90+
uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DMABUF_HANDLE)
91+
->obj_attr.uobject;
92+
struct ib_uverbs_dmabuf_file *uverbs_dmabuf =
93+
container_of(uobj, struct ib_uverbs_dmabuf_file, uobj);
94+
struct ib_device *ib_dev = attrs->context->device;
95+
struct rdma_user_mmap_entry *mmap_entry;
96+
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
97+
off_t pg_off;
98+
int ret;
99+
100+
ret = uverbs_get_const(&pg_off, attrs, UVERBS_ATTR_ALLOC_DMABUF_PGOFF);
101+
if (ret)
102+
return ret;
103+
104+
mmap_entry = ib_dev->ops.pgoff_to_mmap_entry(attrs->context, pg_off);
105+
if (!mmap_entry)
106+
return -EINVAL;
107+
108+
ret = ib_dev->ops.mmap_get_pfns(mmap_entry, &uverbs_dmabuf->phys_vec,
109+
&uverbs_dmabuf->provider);
110+
if (ret)
111+
goto err;
112+
113+
exp_info.ops = &uverbs_dmabuf_ops;
114+
exp_info.size = uverbs_dmabuf->phys_vec.len;
115+
exp_info.flags = O_CLOEXEC;
116+
exp_info.priv = uverbs_dmabuf;
117+
118+
uverbs_dmabuf->dmabuf = dma_buf_export(&exp_info);
119+
if (IS_ERR(uverbs_dmabuf->dmabuf)) {
120+
ret = PTR_ERR(uverbs_dmabuf->dmabuf);
121+
goto err;
122+
}
123+
124+
kref_init(&uverbs_dmabuf->kref);
125+
init_completion(&uverbs_dmabuf->comp);
126+
INIT_LIST_HEAD(&uverbs_dmabuf->dmabufs_elm);
127+
mutex_lock(&mmap_entry->dmabufs_lock);
128+
if (mmap_entry->driver_removed)
129+
ret = -EIO;
130+
else
131+
list_add_tail(&uverbs_dmabuf->dmabufs_elm, &mmap_entry->dmabufs);
132+
mutex_unlock(&mmap_entry->dmabufs_lock);
133+
if (ret)
134+
goto err_revoked;
135+
136+
uobj->object = uverbs_dmabuf->dmabuf->file;
137+
uverbs_dmabuf->mmap_entry = mmap_entry;
138+
uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_ALLOC_DMABUF_HANDLE);
139+
return 0;
140+
141+
err_revoked:
142+
dma_buf_put(uverbs_dmabuf->dmabuf);
143+
err:
144+
rdma_user_mmap_entry_put(mmap_entry);
145+
return ret;
146+
}
147+
148+
DECLARE_UVERBS_NAMED_METHOD(
149+
UVERBS_METHOD_DMABUF_ALLOC,
150+
UVERBS_ATTR_FD(UVERBS_ATTR_ALLOC_DMABUF_HANDLE,
151+
UVERBS_OBJECT_DMABUF,
152+
UVERBS_ACCESS_NEW,
153+
UA_MANDATORY),
154+
UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DMABUF_PGOFF,
155+
UVERBS_ATTR_TYPE(u64),
156+
UA_MANDATORY));
157+
158+
static void uverbs_dmabuf_fd_destroy_uobj(struct ib_uobject *uobj,
159+
enum rdma_remove_reason why)
160+
{
161+
struct ib_uverbs_dmabuf_file *uverbs_dmabuf =
162+
container_of(uobj, struct ib_uverbs_dmabuf_file, uobj);
163+
bool wait_for_comp = false;
164+
165+
mutex_lock(&uverbs_dmabuf->mmap_entry->dmabufs_lock);
166+
dma_resv_lock(uverbs_dmabuf->dmabuf->resv, NULL);
167+
if (!uverbs_dmabuf->revoked) {
168+
uverbs_dmabuf->revoked = true;
169+
list_del(&uverbs_dmabuf->dmabufs_elm);
170+
dma_buf_move_notify(uverbs_dmabuf->dmabuf);
171+
dma_resv_wait_timeout(uverbs_dmabuf->dmabuf->resv,
172+
DMA_RESV_USAGE_BOOKKEEP, false,
173+
MAX_SCHEDULE_TIMEOUT);
174+
wait_for_comp = true;
175+
}
176+
dma_resv_unlock(uverbs_dmabuf->dmabuf->resv);
177+
if (wait_for_comp) {
178+
kref_put(&uverbs_dmabuf->kref, ib_uverbs_dmabuf_done);
179+
/* Let's wait till all DMA unmap are completed. */
180+
wait_for_completion(&uverbs_dmabuf->comp);
181+
}
182+
mutex_unlock(&uverbs_dmabuf->mmap_entry->dmabufs_lock);
183+
184+
/* Matches the get done as part of pgoff_to_mmap_entry() */
185+
rdma_user_mmap_entry_put(uverbs_dmabuf->mmap_entry);
186+
}
187+
188+
DECLARE_UVERBS_NAMED_OBJECT(
189+
UVERBS_OBJECT_DMABUF,
190+
UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_dmabuf_file),
191+
uverbs_dmabuf_fd_destroy_uobj,
192+
NULL, NULL, O_RDONLY),
193+
&UVERBS_METHOD(UVERBS_METHOD_DMABUF_ALLOC));
194+
195+
const struct uapi_definition uverbs_def_obj_dmabuf[] = {
196+
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DMABUF),
197+
UAPI_DEF_OBJ_NEEDS_FN(mmap_get_pfns),
198+
UAPI_DEF_OBJ_NEEDS_FN(pgoff_to_mmap_entry),
199+
{}
200+
};

drivers/infiniband/core/uverbs_uapi.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ static const struct uapi_definition uverbs_core_api[] = {
631631
UAPI_DEF_CHAIN(uverbs_def_obj_cq),
632632
UAPI_DEF_CHAIN(uverbs_def_obj_device),
633633
UAPI_DEF_CHAIN(uverbs_def_obj_dm),
634+
UAPI_DEF_CHAIN(uverbs_def_obj_dmabuf),
634635
UAPI_DEF_CHAIN(uverbs_def_obj_dmah),
635636
UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
636637
UAPI_DEF_CHAIN(uverbs_def_obj_intf),

0 commit comments

Comments
 (0)