Skip to content

Commit db44c17

Browse files
jgunthorpeAlex Williamson
authored andcommitted
vfio/pci: Reorganize VFIO_DEVICE_PCI_HOT_RESET to use the device set
Like vfio_pci_dev_set_try_reset() this code wants to reset all of the devices in the "reset group" which is the same membership as the device set. Instead of trying to reconstruct the device set from the PCI list go directly from the device set's device list to execute the reset. The same basic structure as vfio_pci_dev_set_try_reset() is used. The 'vfio_devices' struct is replaced with the device set linked list and we simply sweep it multiple times under the lock. This eliminates a memory allocation and get/put traffic and another improperly locked test of pci_dev_driver(). Reviewed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Cornelia Huck <cohuck@redhat.com> Link: https://lore.kernel.org/r/10-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent a882c16 commit db44c17

1 file changed

Lines changed: 89 additions & 124 deletions

File tree

drivers/vfio/pci/vfio_pci.c

Lines changed: 89 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,11 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
223223
}
224224
}
225225

226+
struct vfio_pci_group_info;
226227
static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
227228
static void vfio_pci_disable(struct vfio_pci_device *vdev);
228-
static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
229+
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
230+
struct vfio_pci_group_info *groups);
229231

230232
/*
231233
* INTx masking requires the ability to disable INTx signaling via PCI_COMMAND
@@ -643,37 +645,11 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
643645
return 0;
644646
}
645647

646-
struct vfio_pci_group_entry {
647-
struct vfio_group *group;
648-
int id;
649-
};
650-
651648
struct vfio_pci_group_info {
652649
int count;
653-
struct vfio_pci_group_entry *groups;
650+
struct vfio_group **groups;
654651
};
655652

656-
static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
657-
{
658-
struct vfio_pci_group_info *info = data;
659-
struct iommu_group *group;
660-
int id, i;
661-
662-
group = iommu_group_get(&pdev->dev);
663-
if (!group)
664-
return -EPERM;
665-
666-
id = iommu_group_id(group);
667-
668-
for (i = 0; i < info->count; i++)
669-
if (info->groups[i].id == id)
670-
break;
671-
672-
iommu_group_put(group);
673-
674-
return (i == info->count) ? -EINVAL : 0;
675-
}
676-
677653
static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
678654
{
679655
for (; pdev; pdev = pdev->bus->self)
@@ -751,12 +727,6 @@ int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
751727
return 0;
752728
}
753729

754-
struct vfio_devices {
755-
struct vfio_pci_device **devices;
756-
int cur_index;
757-
int max_index;
758-
};
759-
760730
static long vfio_pci_ioctl(struct vfio_device *core_vdev,
761731
unsigned int cmd, unsigned long arg)
762732
{
@@ -1125,11 +1095,10 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
11251095
} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
11261096
struct vfio_pci_hot_reset hdr;
11271097
int32_t *group_fds;
1128-
struct vfio_pci_group_entry *groups;
1098+
struct vfio_group **groups;
11291099
struct vfio_pci_group_info info;
1130-
struct vfio_devices devs = { .cur_index = 0 };
11311100
bool slot = false;
1132-
int i, group_idx, mem_idx = 0, count = 0, ret = 0;
1101+
int group_idx, count = 0, ret = 0;
11331102

11341103
minsz = offsetofend(struct vfio_pci_hot_reset, count);
11351104

@@ -1196,9 +1165,7 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
11961165
break;
11971166
}
11981167

1199-
groups[group_idx].group = group;
1200-
groups[group_idx].id =
1201-
vfio_external_user_iommu_id(group);
1168+
groups[group_idx] = group;
12021169
}
12031170

12041171
kfree(group_fds);
@@ -1210,64 +1177,11 @@ static long vfio_pci_ioctl(struct vfio_device *core_vdev,
12101177
info.count = hdr.count;
12111178
info.groups = groups;
12121179

1213-
/*
1214-
* Test whether all the affected devices are contained
1215-
* by the set of groups provided by the user.
1216-
*/
1217-
ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1218-
vfio_pci_validate_devs,
1219-
&info, slot);
1220-
if (ret)
1221-
goto hot_reset_release;
1222-
1223-
devs.max_index = count;
1224-
devs.devices = kcalloc(count, sizeof(struct vfio_device *),
1225-
GFP_KERNEL);
1226-
if (!devs.devices) {
1227-
ret = -ENOMEM;
1228-
goto hot_reset_release;
1229-
}
1230-
1231-
/*
1232-
* We need to get memory_lock for each device, but devices
1233-
* can share mmap_lock, therefore we need to zap and hold
1234-
* the vma_lock for each device, and only then get each
1235-
* memory_lock.
1236-
*/
1237-
ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
1238-
vfio_pci_try_zap_and_vma_lock_cb,
1239-
&devs, slot);
1240-
if (ret)
1241-
goto hot_reset_release;
1242-
1243-
for (; mem_idx < devs.cur_index; mem_idx++) {
1244-
struct vfio_pci_device *tmp = devs.devices[mem_idx];
1245-
1246-
ret = down_write_trylock(&tmp->memory_lock);
1247-
if (!ret) {
1248-
ret = -EBUSY;
1249-
goto hot_reset_release;
1250-
}
1251-
mutex_unlock(&tmp->vma_lock);
1252-
}
1253-
1254-
/* User has access, do the reset */
1255-
ret = pci_reset_bus(vdev->pdev);
1180+
ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
12561181

12571182
hot_reset_release:
1258-
for (i = 0; i < devs.cur_index; i++) {
1259-
struct vfio_pci_device *tmp = devs.devices[i];
1260-
1261-
if (i < mem_idx)
1262-
up_write(&tmp->memory_lock);
1263-
else
1264-
mutex_unlock(&tmp->vma_lock);
1265-
vfio_device_put(&tmp->vdev);
1266-
}
1267-
kfree(devs.devices);
1268-
12691183
for (group_idx--; group_idx >= 0; group_idx--)
1270-
vfio_group_put_external_user(groups[group_idx].group);
1184+
vfio_group_put_external_user(groups[group_idx]);
12711185

12721186
kfree(groups);
12731187
return ret;
@@ -2146,37 +2060,15 @@ static struct pci_driver vfio_pci_driver = {
21462060
.err_handler = &vfio_err_handlers,
21472061
};
21482062

2149-
static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
2063+
static bool vfio_dev_in_groups(struct vfio_pci_device *vdev,
2064+
struct vfio_pci_group_info *groups)
21502065
{
2151-
struct vfio_devices *devs = data;
2152-
struct vfio_device *device;
2153-
struct vfio_pci_device *vdev;
2154-
2155-
if (devs->cur_index == devs->max_index)
2156-
return -ENOSPC;
2157-
2158-
device = vfio_device_get_from_dev(&pdev->dev);
2159-
if (!device)
2160-
return -EINVAL;
2161-
2162-
if (pci_dev_driver(pdev) != &vfio_pci_driver) {
2163-
vfio_device_put(device);
2164-
return -EBUSY;
2165-
}
2166-
2167-
vdev = container_of(device, struct vfio_pci_device, vdev);
2066+
unsigned int i;
21682067

2169-
/*
2170-
* Locking multiple devices is prone to deadlock, runaway and
2171-
* unwind if we hit contention.
2172-
*/
2173-
if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
2174-
vfio_device_put(device);
2175-
return -EBUSY;
2176-
}
2177-
2178-
devs->devices[devs->cur_index++] = vdev;
2179-
return 0;
2068+
for (i = 0; i < groups->count; i++)
2069+
if (groups->groups[i] == vdev->vdev.group)
2070+
return true;
2071+
return false;
21802072
}
21812073

21822074
static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
@@ -2226,6 +2118,79 @@ vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set)
22262118
return pdev;
22272119
}
22282120

2121+
/*
2122+
* We need to get memory_lock for each device, but devices can share mmap_lock,
2123+
* therefore we need to zap and hold the vma_lock for each device, and only then
2124+
* get each memory_lock.
2125+
*/
2126+
static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
2127+
struct vfio_pci_group_info *groups)
2128+
{
2129+
struct vfio_pci_device *cur_mem;
2130+
struct vfio_pci_device *cur_vma;
2131+
struct vfio_pci_device *cur;
2132+
struct pci_dev *pdev;
2133+
bool is_mem = true;
2134+
int ret;
2135+
2136+
mutex_lock(&dev_set->lock);
2137+
cur_mem = list_first_entry(&dev_set->device_list,
2138+
struct vfio_pci_device, vdev.dev_set_list);
2139+
2140+
pdev = vfio_pci_dev_set_resettable(dev_set);
2141+
if (!pdev) {
2142+
ret = -EINVAL;
2143+
goto err_unlock;
2144+
}
2145+
2146+
list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
2147+
/*
2148+
* Test whether all the affected devices are contained by the
2149+
* set of groups provided by the user.
2150+
*/
2151+
if (!vfio_dev_in_groups(cur_vma, groups)) {
2152+
ret = -EINVAL;
2153+
goto err_undo;
2154+
}
2155+
2156+
/*
2157+
* Locking multiple devices is prone to deadlock, runaway and
2158+
* unwind if we hit contention.
2159+
*/
2160+
if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) {
2161+
ret = -EBUSY;
2162+
goto err_undo;
2163+
}
2164+
}
2165+
cur_vma = NULL;
2166+
2167+
list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) {
2168+
if (!down_write_trylock(&cur_mem->memory_lock)) {
2169+
ret = -EBUSY;
2170+
goto err_undo;
2171+
}
2172+
mutex_unlock(&cur_mem->vma_lock);
2173+
}
2174+
cur_mem = NULL;
2175+
2176+
ret = pci_reset_bus(pdev);
2177+
2178+
err_undo:
2179+
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
2180+
if (cur == cur_mem)
2181+
is_mem = false;
2182+
if (cur == cur_vma)
2183+
break;
2184+
if (is_mem)
2185+
up_write(&cur->memory_lock);
2186+
else
2187+
mutex_unlock(&cur->vma_lock);
2188+
}
2189+
err_unlock:
2190+
mutex_unlock(&dev_set->lock);
2191+
return ret;
2192+
}
2193+
22292194
static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
22302195
{
22312196
struct vfio_pci_device *cur;

0 commit comments

Comments
 (0)