Skip to content

Commit 2fd585f

Browse files
jgunthorpeAlex Williamson
authored andcommitted
vfio: Provide better generic support for open/release vfio_device_ops
Currently the driver ops have an open/release pair that is called once each time a device FD is opened or closed. Add an additional set of open/close_device() ops which are called when the device FD is opened for the first time and closed for the last time. An analysis shows that all of the drivers require this semantic. Some are open coding it as part of their reflck implementation, and some are just buggy and miss it completely. To retain the current semantics PCI and FSL depend on, introduce the idea of a "device set" which is a grouping of vfio_device's that share the same lock around opening. The device set is established by providing a 'set_id' pointer. All vfio_device's that provide the same pointer will be joined to the same singleton memory and lock across the whole set. This effectively replaces the oddly named reflck. After conversion the set_id will be sourced from: - A struct device from a fsl_mc_device (fsl) - A struct pci_slot (pci) - A struct pci_bus (pci) - The struct vfio_device (everything) The design ensures that the above pointers are live as long as the vfio_device is registered, so they form reliable unique keys to group vfio_devices into sets. This implementation uses xarray instead of searching through the driver core structures, which simplifies the somewhat tricky locking in this area. Following patches convert all the drivers. Signed-off-by: Yishai Hadas <yishaih@nvidia.com> Reviewed-by: Cornelia Huck <cohuck@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Link: https://lore.kernel.org/r/4-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent ae03c37 commit 2fd585f

4 files changed

Lines changed: 174 additions & 24 deletions

File tree

drivers/vfio/mdev/vfio_mdev.c

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,33 @@
1717

1818
#include "mdev_private.h"
1919

20+
static int vfio_mdev_open_device(struct vfio_device *core_vdev)
21+
{
22+
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
23+
struct mdev_parent *parent = mdev->type->parent;
24+
25+
if (unlikely(!parent->ops->open_device))
26+
return 0;
27+
28+
return parent->ops->open_device(mdev);
29+
}
30+
31+
static void vfio_mdev_close_device(struct vfio_device *core_vdev)
32+
{
33+
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
34+
struct mdev_parent *parent = mdev->type->parent;
35+
36+
if (likely(parent->ops->close_device))
37+
parent->ops->close_device(mdev);
38+
}
39+
2040
static int vfio_mdev_open(struct vfio_device *core_vdev)
2141
{
2242
struct mdev_device *mdev = to_mdev_device(core_vdev->dev);
2343
struct mdev_parent *parent = mdev->type->parent;
2444

2545
if (unlikely(!parent->ops->open))
26-
return -EINVAL;
46+
return 0;
2747

2848
return parent->ops->open(mdev);
2949
}
@@ -44,7 +64,7 @@ static long vfio_mdev_unlocked_ioctl(struct vfio_device *core_vdev,
4464
struct mdev_parent *parent = mdev->type->parent;
4565

4666
if (unlikely(!parent->ops->ioctl))
47-
return -EINVAL;
67+
return 0;
4868

4969
return parent->ops->ioctl(mdev, cmd, arg);
5070
}
@@ -100,6 +120,8 @@ static void vfio_mdev_request(struct vfio_device *core_vdev, unsigned int count)
100120

101121
static const struct vfio_device_ops vfio_mdev_dev_ops = {
102122
.name = "vfio-mdev",
123+
.open_device = vfio_mdev_open_device,
124+
.close_device = vfio_mdev_close_device,
103125
.open = vfio_mdev_open,
104126
.release = vfio_mdev_release,
105127
.ioctl = vfio_mdev_unlocked_ioctl,

drivers/vfio/vfio.c

Lines changed: 127 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,79 @@ module_param_named(enable_unsafe_noiommu_mode,
9696
MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
9797
#endif
9898

99+
static DEFINE_XARRAY(vfio_device_set_xa);
100+
101+
int vfio_assign_device_set(struct vfio_device *device, void *set_id)
102+
{
103+
unsigned long idx = (unsigned long)set_id;
104+
struct vfio_device_set *new_dev_set;
105+
struct vfio_device_set *dev_set;
106+
107+
if (WARN_ON(!set_id))
108+
return -EINVAL;
109+
110+
/*
111+
* Atomically acquire a singleton object in the xarray for this set_id
112+
*/
113+
xa_lock(&vfio_device_set_xa);
114+
dev_set = xa_load(&vfio_device_set_xa, idx);
115+
if (dev_set)
116+
goto found_get_ref;
117+
xa_unlock(&vfio_device_set_xa);
118+
119+
new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL);
120+
if (!new_dev_set)
121+
return -ENOMEM;
122+
mutex_init(&new_dev_set->lock);
123+
INIT_LIST_HEAD(&new_dev_set->device_list);
124+
new_dev_set->set_id = set_id;
125+
126+
xa_lock(&vfio_device_set_xa);
127+
dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set,
128+
GFP_KERNEL);
129+
if (!dev_set) {
130+
dev_set = new_dev_set;
131+
goto found_get_ref;
132+
}
133+
134+
kfree(new_dev_set);
135+
if (xa_is_err(dev_set)) {
136+
xa_unlock(&vfio_device_set_xa);
137+
return xa_err(dev_set);
138+
}
139+
140+
found_get_ref:
141+
dev_set->device_count++;
142+
xa_unlock(&vfio_device_set_xa);
143+
mutex_lock(&dev_set->lock);
144+
device->dev_set = dev_set;
145+
list_add_tail(&device->dev_set_list, &dev_set->device_list);
146+
mutex_unlock(&dev_set->lock);
147+
return 0;
148+
}
149+
EXPORT_SYMBOL_GPL(vfio_assign_device_set);
150+
151+
static void vfio_release_device_set(struct vfio_device *device)
152+
{
153+
struct vfio_device_set *dev_set = device->dev_set;
154+
155+
if (!dev_set)
156+
return;
157+
158+
mutex_lock(&dev_set->lock);
159+
list_del(&device->dev_set_list);
160+
mutex_unlock(&dev_set->lock);
161+
162+
xa_lock(&vfio_device_set_xa);
163+
if (!--dev_set->device_count) {
164+
__xa_erase(&vfio_device_set_xa,
165+
(unsigned long)dev_set->set_id);
166+
mutex_destroy(&dev_set->lock);
167+
kfree(dev_set);
168+
}
169+
xa_unlock(&vfio_device_set_xa);
170+
}
171+
99172
/*
100173
* vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
101174
* and remove functions, any use cases other than acquiring the first
@@ -751,6 +824,7 @@ EXPORT_SYMBOL_GPL(vfio_init_group_dev);
751824

752825
void vfio_uninit_group_dev(struct vfio_device *device)
753826
{
827+
vfio_release_device_set(device);
754828
}
755829
EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
756830

@@ -760,6 +834,13 @@ int vfio_register_group_dev(struct vfio_device *device)
760834
struct iommu_group *iommu_group;
761835
struct vfio_group *group;
762836

837+
/*
838+
* If the driver doesn't specify a set then the device is added to a
839+
* singleton set just for itself.
840+
*/
841+
if (!device->dev_set)
842+
vfio_assign_device_set(device, device);
843+
763844
iommu_group = iommu_group_get(device->dev);
764845
if (!iommu_group)
765846
return -EINVAL;
@@ -1361,7 +1442,8 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
13611442
{
13621443
struct vfio_device *device;
13631444
struct file *filep;
1364-
int ret;
1445+
int fdno;
1446+
int ret = 0;
13651447

13661448
if (0 == atomic_read(&group->container_users) ||
13671449
!group->container->iommu_driver || !vfio_group_viable(group))
@@ -1375,38 +1457,38 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
13751457
return PTR_ERR(device);
13761458

13771459
if (!try_module_get(device->dev->driver->owner)) {
1378-
vfio_device_put(device);
1379-
return -ENODEV;
1460+
ret = -ENODEV;
1461+
goto err_device_put;
13801462
}
13811463

1382-
ret = device->ops->open(device);
1383-
if (ret) {
1384-
module_put(device->dev->driver->owner);
1385-
vfio_device_put(device);
1386-
return ret;
1464+
mutex_lock(&device->dev_set->lock);
1465+
device->open_count++;
1466+
if (device->open_count == 1 && device->ops->open_device) {
1467+
ret = device->ops->open_device(device);
1468+
if (ret)
1469+
goto err_undo_count;
1470+
}
1471+
mutex_unlock(&device->dev_set->lock);
1472+
1473+
if (device->ops->open) {
1474+
ret = device->ops->open(device);
1475+
if (ret)
1476+
goto err_close_device;
13871477
}
13881478

13891479
/*
13901480
* We can't use anon_inode_getfd() because we need to modify
13911481
* the f_mode flags directly to allow more than just ioctls
13921482
*/
1393-
ret = get_unused_fd_flags(O_CLOEXEC);
1394-
if (ret < 0) {
1395-
device->ops->release(device);
1396-
module_put(device->dev->driver->owner);
1397-
vfio_device_put(device);
1398-
return ret;
1399-
}
1483+
fdno = ret = get_unused_fd_flags(O_CLOEXEC);
1484+
if (ret < 0)
1485+
goto err_release;
14001486

14011487
filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
14021488
device, O_RDWR);
14031489
if (IS_ERR(filep)) {
1404-
put_unused_fd(ret);
14051490
ret = PTR_ERR(filep);
1406-
device->ops->release(device);
1407-
module_put(device->dev->driver->owner);
1408-
vfio_device_put(device);
1409-
return ret;
1491+
goto err_fd;
14101492
}
14111493

14121494
/*
@@ -1418,12 +1500,28 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
14181500

14191501
atomic_inc(&group->container_users);
14201502

1421-
fd_install(ret, filep);
1503+
fd_install(fdno, filep);
14221504

14231505
if (group->noiommu)
14241506
dev_warn(device->dev, "vfio-noiommu device opened by user "
14251507
"(%s:%d)\n", current->comm, task_pid_nr(current));
1508+
return fdno;
14261509

1510+
err_fd:
1511+
put_unused_fd(fdno);
1512+
err_release:
1513+
if (device->ops->release)
1514+
device->ops->release(device);
1515+
err_close_device:
1516+
mutex_lock(&device->dev_set->lock);
1517+
if (device->open_count == 1 && device->ops->close_device)
1518+
device->ops->close_device(device);
1519+
err_undo_count:
1520+
device->open_count--;
1521+
mutex_unlock(&device->dev_set->lock);
1522+
module_put(device->dev->driver->owner);
1523+
err_device_put:
1524+
vfio_device_put(device);
14271525
return ret;
14281526
}
14291527

@@ -1561,7 +1659,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
15611659
{
15621660
struct vfio_device *device = filep->private_data;
15631661

1564-
device->ops->release(device);
1662+
if (device->ops->release)
1663+
device->ops->release(device);
1664+
1665+
mutex_lock(&device->dev_set->lock);
1666+
if (!--device->open_count && device->ops->close_device)
1667+
device->ops->close_device(device);
1668+
mutex_unlock(&device->dev_set->lock);
15651669

15661670
module_put(device->dev->driver->owner);
15671671

@@ -2364,6 +2468,7 @@ static void __exit vfio_cleanup(void)
23642468
class_destroy(vfio.class);
23652469
vfio.class = NULL;
23662470
misc_deregister(&vfio_dev);
2471+
xa_destroy(&vfio_device_set_xa);
23672472
}
23682473

23692474
module_init(vfio_init);

include/linux/mdev.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ struct mdev_parent_ops {
111111

112112
int (*create)(struct mdev_device *mdev);
113113
int (*remove)(struct mdev_device *mdev);
114+
int (*open_device)(struct mdev_device *mdev);
115+
void (*close_device)(struct mdev_device *mdev);
114116
int (*open)(struct mdev_device *mdev);
115117
void (*release)(struct mdev_device *mdev);
116118
ssize_t (*read)(struct mdev_device *mdev, char __user *buf,

include/linux/vfio.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,37 @@
1515
#include <linux/poll.h>
1616
#include <uapi/linux/vfio.h>
1717

18+
/*
19+
* VFIO devices can be placed in a set, this allows all devices to share this
20+
* structure and the VFIO core will provide a lock that is held around
21+
* open_device()/close_device() for all devices in the set.
22+
*/
23+
struct vfio_device_set {
24+
void *set_id;
25+
struct mutex lock;
26+
struct list_head device_list;
27+
unsigned int device_count;
28+
};
29+
1830
struct vfio_device {
1931
struct device *dev;
2032
const struct vfio_device_ops *ops;
2133
struct vfio_group *group;
34+
struct vfio_device_set *dev_set;
35+
struct list_head dev_set_list;
2236

2337
/* Members below here are private, not for driver use */
2438
refcount_t refcount;
39+
unsigned int open_count;
2540
struct completion comp;
2641
struct list_head group_next;
2742
};
2843

2944
/**
3045
* struct vfio_device_ops - VFIO bus driver device callbacks
3146
*
47+
* @open_device: Called when the first file descriptor is opened for this device
48+
* @close_device: Opposite of open_device
3249
* @open: Called when userspace creates new file descriptor for device
3350
* @release: Called when userspace releases file descriptor for device
3451
* @read: Perform read(2) on device file descriptor
@@ -43,6 +60,8 @@ struct vfio_device {
4360
*/
4461
struct vfio_device_ops {
4562
char *name;
63+
int (*open_device)(struct vfio_device *vdev);
64+
void (*close_device)(struct vfio_device *vdev);
4665
int (*open)(struct vfio_device *vdev);
4766
void (*release)(struct vfio_device *vdev);
4867
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
@@ -67,6 +86,8 @@ void vfio_unregister_group_dev(struct vfio_device *device);
6786
extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
6887
extern void vfio_device_put(struct vfio_device *device);
6988

89+
int vfio_assign_device_set(struct vfio_device *device, void *set_id);
90+
7091
/* events for the backend driver notify callback */
7192
enum vfio_iommu_notify_type {
7293
VFIO_IOMMU_CONTAINER_CLOSE = 0,

0 commit comments

Comments
 (0)