Skip to content

Commit a882c16

Browse files
jgunthorpeAlex Williamson
authored andcommitted
vfio/pci: Change vfio_pci_try_bus_reset() to use the dev_set
vfio_pci_try_bus_reset() is triggering a reset of the entire_dev set if any device within it has accumulated a needs_reset. This reset can only be done once all of the drivers operating the PCI devices to be reset are in a known safe state. Make this clearer by directly operating on the dev_set instead of the vfio_pci_device. Rename the function to vfio_pci_dev_set_try_reset(). Use the device list inside the dev_set to check that all drivers are in a safe state instead of working backwards from the pci_device. The dev_set->lock directly prevents devices from joining/leaving the set, or changing their state, which further implies the pci_device cannot change drivers or that the vfio_device be freed, eliminating the need for get/put's. If a pci_device to be reset is not in the dev_set then the reset cannot be used as we can't know what the state of that driver is. Directly measure this by checking that every pci_device is in the dev_set - which effectively proves that VFIO drivers are attached to everything. Remove the odd interaction around vfio_pci_set_power_state() - have the only caller avoid its redundant vfio_pci_set_power_state() instead of avoiding it inside vfio_pci_dev_set_try_reset(). This restructuring corrects a call to pci_dev_driver() without holding the device_lock() and removes a hard wiring to &vfio_pci_driver. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Cornelia Huck <cohuck@redhat.com> Link: https://lore.kernel.org/r/9-v4-9ea22c5e6afb+1adf-vfio_reflck_jgg@nvidia.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent 2cd8b14 commit a882c16

1 file changed

Lines changed: 82 additions & 92 deletions

File tree

drivers/vfio/pci/vfio_pci.c

Lines changed: 82 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
223223
}
224224
}
225225

226-
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
226+
static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
227227
static void vfio_pci_disable(struct vfio_pci_device *vdev);
228228
static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data);
229229

@@ -404,6 +404,9 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
404404
struct vfio_pci_ioeventfd *ioeventfd, *ioeventfd_tmp;
405405
int i, bar;
406406

407+
/* For needs_reset */
408+
lockdep_assert_held(&vdev->vdev.dev_set->lock);
409+
407410
/* Stop the device from further DMA */
408411
pci_clear_master(pdev);
409412

@@ -487,9 +490,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
487490
out:
488491
pci_disable_device(pdev);
489492

490-
vfio_pci_try_bus_reset(vdev);
491-
492-
if (!disable_idle_d3)
493+
if (!vfio_pci_dev_set_try_reset(vdev->vdev.dev_set) && !disable_idle_d3)
493494
vfio_pci_set_power_state(vdev, PCI_D3hot);
494495
}
495496

@@ -2145,7 +2146,7 @@ static struct pci_driver vfio_pci_driver = {
21452146
.err_handler = &vfio_err_handlers,
21462147
};
21472148

2148-
static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
2149+
static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
21492150
{
21502151
struct vfio_devices *devs = data;
21512152
struct vfio_device *device;
@@ -2165,8 +2166,11 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
21652166

21662167
vdev = container_of(device, struct vfio_pci_device, vdev);
21672168

2168-
/* Fault if the device is not unused */
2169-
if (device->open_count) {
2169+
/*
2170+
* Locking multiple devices is prone to deadlock, runaway and
2171+
* unwind if we hit contention.
2172+
*/
2173+
if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
21702174
vfio_device_put(device);
21712175
return -EBUSY;
21722176
}
@@ -2175,112 +2179,98 @@ static int vfio_pci_get_unused_devs(struct pci_dev *pdev, void *data)
21752179
return 0;
21762180
}
21772181

2178-
static int vfio_pci_try_zap_and_vma_lock_cb(struct pci_dev *pdev, void *data)
2182+
static int vfio_pci_is_device_in_set(struct pci_dev *pdev, void *data)
21792183
{
2180-
struct vfio_devices *devs = data;
2181-
struct vfio_device *device;
2182-
struct vfio_pci_device *vdev;
2184+
struct vfio_device_set *dev_set = data;
2185+
struct vfio_device *cur;
21832186

2184-
if (devs->cur_index == devs->max_index)
2185-
return -ENOSPC;
2186-
2187-
device = vfio_device_get_from_dev(&pdev->dev);
2188-
if (!device)
2189-
return -EINVAL;
2187+
list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
2188+
if (cur->dev == &pdev->dev)
2189+
return 0;
2190+
return -EBUSY;
2191+
}
21902192

2191-
if (pci_dev_driver(pdev) != &vfio_pci_driver) {
2192-
vfio_device_put(device);
2193-
return -EBUSY;
2194-
}
2193+
/*
2194+
* vfio-core considers a group to be viable and will create a vfio_device even
2195+
* if some devices are bound to drivers like pci-stub or pcieport. Here we
2196+
* require all PCI devices to be inside our dev_set since that ensures they stay
2197+
* put and that every driver controlling the device can co-ordinate with the
2198+
* device reset.
2199+
*
2200+
* Returns the pci_dev to pass to pci_reset_bus() if every PCI device to be
2201+
* reset is inside the dev_set, and pci_reset_bus() can succeed. NULL otherwise.
2202+
*/
2203+
static struct pci_dev *
2204+
vfio_pci_dev_set_resettable(struct vfio_device_set *dev_set)
2205+
{
2206+
struct pci_dev *pdev;
21952207

2196-
vdev = container_of(device, struct vfio_pci_device, vdev);
2208+
lockdep_assert_held(&dev_set->lock);
21972209

21982210
/*
2199-
* Locking multiple devices is prone to deadlock, runaway and
2200-
* unwind if we hit contention.
2211+
* By definition all PCI devices in the dev_set share the same PCI
2212+
* reset, so any pci_dev will have the same outcomes for
2213+
* pci_probe_reset_*() and pci_reset_bus().
22012214
*/
2202-
if (!vfio_pci_zap_and_vma_lock(vdev, true)) {
2203-
vfio_device_put(device);
2204-
return -EBUSY;
2205-
}
2215+
pdev = list_first_entry(&dev_set->device_list, struct vfio_pci_device,
2216+
vdev.dev_set_list)->pdev;
22062217

2207-
devs->devices[devs->cur_index++] = vdev;
2208-
return 0;
2218+
/* pci_reset_bus() is supported */
2219+
if (pci_probe_reset_slot(pdev->slot) && pci_probe_reset_bus(pdev->bus))
2220+
return NULL;
2221+
2222+
if (vfio_pci_for_each_slot_or_bus(pdev, vfio_pci_is_device_in_set,
2223+
dev_set,
2224+
!pci_probe_reset_slot(pdev->slot)))
2225+
return NULL;
2226+
return pdev;
2227+
}
2228+
2229+
static bool vfio_pci_dev_set_needs_reset(struct vfio_device_set *dev_set)
2230+
{
2231+
struct vfio_pci_device *cur;
2232+
bool needs_reset = false;
2233+
2234+
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
2235+
/* No VFIO device in the set can have an open device FD */
2236+
if (cur->vdev.open_count)
2237+
return false;
2238+
needs_reset |= cur->needs_reset;
2239+
}
2240+
return needs_reset;
22092241
}
22102242

22112243
/*
2212-
* If a bus or slot reset is available for the provided device and:
2244+
* If a bus or slot reset is available for the provided dev_set and:
22132245
* - All of the devices affected by that bus or slot reset are unused
2214-
* (!refcnt)
22152246
* - At least one of the affected devices is marked dirty via
22162247
* needs_reset (such as by lack of FLR support)
2217-
* Then attempt to perform that bus or slot reset. Callers are required
2218-
* to hold vdev->dev_set->lock, protecting the bus/slot reset group from
2219-
* concurrent opens. A vfio_device reference is acquired for each device
2220-
* to prevent unbinds during the reset operation.
2221-
*
2222-
* NB: vfio-core considers a group to be viable even if some devices are
2223-
* bound to drivers like pci-stub or pcieport. Here we require all devices
2224-
* to be bound to vfio_pci since that's the only way we can be sure they
2225-
* stay put.
2248+
* Then attempt to perform that bus or slot reset.
2249+
* Returns true if the dev_set was reset.
22262250
*/
2227-
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
2251+
static bool vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set)
22282252
{
2229-
struct vfio_devices devs = { .cur_index = 0 };
2230-
int i = 0, ret = -EINVAL;
2231-
bool slot = false;
2232-
struct vfio_pci_device *tmp;
2233-
2234-
if (!pci_probe_reset_slot(vdev->pdev->slot))
2235-
slot = true;
2236-
else if (pci_probe_reset_bus(vdev->pdev->bus))
2237-
return;
2238-
2239-
if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
2240-
&i, slot) || !i)
2241-
return;
2242-
2243-
devs.max_index = i;
2244-
devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
2245-
if (!devs.devices)
2246-
return;
2247-
2248-
if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
2249-
vfio_pci_get_unused_devs,
2250-
&devs, slot))
2251-
goto put_devs;
2252-
2253-
/* Does at least one need a reset? */
2254-
for (i = 0; i < devs.cur_index; i++) {
2255-
tmp = devs.devices[i];
2256-
if (tmp->needs_reset) {
2257-
ret = pci_reset_bus(vdev->pdev);
2258-
break;
2259-
}
2260-
}
2253+
struct vfio_pci_device *cur;
2254+
struct pci_dev *pdev;
2255+
int ret;
22612256

2262-
put_devs:
2263-
for (i = 0; i < devs.cur_index; i++) {
2264-
tmp = devs.devices[i];
2257+
if (!vfio_pci_dev_set_needs_reset(dev_set))
2258+
return false;
22652259

2266-
/*
2267-
* If reset was successful, affected devices no longer need
2268-
* a reset and we should return all the collateral devices
2269-
* to low power. If not successful, we either didn't reset
2270-
* the bus or timed out waiting for it, so let's not touch
2271-
* the power state.
2272-
*/
2273-
if (!ret) {
2274-
tmp->needs_reset = false;
2260+
pdev = vfio_pci_dev_set_resettable(dev_set);
2261+
if (!pdev)
2262+
return false;
22752263

2276-
if (tmp != vdev && !disable_idle_d3)
2277-
vfio_pci_set_power_state(tmp, PCI_D3hot);
2278-
}
2264+
ret = pci_reset_bus(pdev);
2265+
if (ret)
2266+
return false;
22792267

2280-
vfio_device_put(&tmp->vdev);
2268+
list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) {
2269+
cur->needs_reset = false;
2270+
if (!disable_idle_d3)
2271+
vfio_pci_set_power_state(cur, PCI_D3hot);
22812272
}
2282-
2283-
kfree(devs.devices);
2273+
return true;
22842274
}
22852275

22862276
static void __exit vfio_pci_cleanup(void)

0 commit comments

Comments
 (0)