Skip to content

Commit f5b16b8

Browse files
nicolincjoergroedel
authored andcommitted
PCI: Suspend iommu function prior to resetting a device
PCIe permits a device to ignore ATS invalidation TLPs while processing a reset. This creates a problem visible to the OS where an ATS invalidation command will time out: e.g. an SVA domain will have no coordination with a reset event and can racily issue ATS invalidations to a resetting device. The PCIe r6.0, sec 10.3.1 IMPLEMENTATION NOTE recommends SW to disable and block ATS before initiating a Function Level Reset. It also mentions that other reset methods could have the same vulnerability as well. The IOMMU subsystem provides pci_dev_reset_iommu_prepare/done() callback helpers for this matter. Use them in all the existing reset functions. This will attach the device to its iommu_group->blocking_domain during the device reset, so as to allow IOMMU driver to: - invoke pci_disable_ats() and pci_enable_ats(), if necessary - wait for all ATS invalidations to complete - stop issuing new ATS invalidations - fence any incoming ATS queries Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Acked-by: Bjorn Helgaas <bhelgaas@google.com> Tested-by: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
1 parent c279e83 commit f5b16b8

3 files changed

Lines changed: 87 additions & 10 deletions

File tree

drivers/pci/pci-acpi.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <linux/delay.h>
1111
#include <linux/init.h>
12+
#include <linux/iommu.h>
1213
#include <linux/irqdomain.h>
1314
#include <linux/pci.h>
1415
#include <linux/msi.h>
@@ -971,19 +972,27 @@ void pci_set_acpi_fwnode(struct pci_dev *dev)
971972
int pci_dev_acpi_reset(struct pci_dev *dev, bool probe)
972973
{
973974
acpi_handle handle = ACPI_HANDLE(&dev->dev);
975+
int ret;
974976

975977
if (!handle || !acpi_has_method(handle, "_RST"))
976978
return -ENOTTY;
977979

978980
if (probe)
979981
return 0;
980982

983+
ret = pci_dev_reset_iommu_prepare(dev);
984+
if (ret) {
985+
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
986+
return ret;
987+
}
988+
981989
if (ACPI_FAILURE(acpi_evaluate_object(handle, "_RST", NULL, NULL))) {
982990
pci_warn(dev, "ACPI _RST failed\n");
983-
return -ENOTTY;
991+
ret = -ENOTTY;
984992
}
985993

986-
return 0;
994+
pci_dev_reset_iommu_done(dev);
995+
return ret;
987996
}
988997

989998
bool acpi_pci_power_manageable(struct pci_dev *dev)

drivers/pci/pci.c

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/delay.h>
1414
#include <linux/dmi.h>
1515
#include <linux/init.h>
16+
#include <linux/iommu.h>
1617
#include <linux/msi.h>
1718
#include <linux/of.h>
1819
#include <linux/pci.h>
@@ -25,6 +26,7 @@
2526
#include <linux/logic_pio.h>
2627
#include <linux/device.h>
2728
#include <linux/pm_runtime.h>
29+
#include <linux/pci-ats.h>
2830
#include <linux/pci_hotplug.h>
2931
#include <linux/vmalloc.h>
3032
#include <asm/dma.h>
@@ -4330,13 +4332,22 @@ EXPORT_SYMBOL(pci_wait_for_pending_transaction);
43304332
*/
43314333
int pcie_flr(struct pci_dev *dev)
43324334
{
4335+
int ret;
4336+
43334337
if (!pci_wait_for_pending_transaction(dev))
43344338
pci_err(dev, "timed out waiting for pending transaction; performing function level reset anyway\n");
43354339

4340+
/* Have to call it after waiting for pending DMA transaction */
4341+
ret = pci_dev_reset_iommu_prepare(dev);
4342+
if (ret) {
4343+
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
4344+
return ret;
4345+
}
4346+
43364347
pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
43374348

43384349
if (dev->imm_ready)
4339-
return 0;
4350+
goto done;
43404351

43414352
/*
43424353
* Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
@@ -4345,7 +4356,10 @@ int pcie_flr(struct pci_dev *dev)
43454356
*/
43464357
msleep(100);
43474358

4348-
return pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
4359+
ret = pci_dev_wait(dev, "FLR", PCIE_RESET_READY_POLL_MS);
4360+
done:
4361+
pci_dev_reset_iommu_done(dev);
4362+
return ret;
43494363
}
43504364
EXPORT_SYMBOL_GPL(pcie_flr);
43514365

@@ -4373,6 +4387,7 @@ EXPORT_SYMBOL_GPL(pcie_reset_flr);
43734387

43744388
static int pci_af_flr(struct pci_dev *dev, bool probe)
43754389
{
4390+
int ret;
43764391
int pos;
43774392
u8 cap;
43784393

@@ -4399,10 +4414,17 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
43994414
PCI_AF_STATUS_TP << 8))
44004415
pci_err(dev, "timed out waiting for pending transaction; performing AF function level reset anyway\n");
44014416

4417+
/* Have to call it after waiting for pending DMA transaction */
4418+
ret = pci_dev_reset_iommu_prepare(dev);
4419+
if (ret) {
4420+
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
4421+
return ret;
4422+
}
4423+
44024424
pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
44034425

44044426
if (dev->imm_ready)
4405-
return 0;
4427+
goto done;
44064428

44074429
/*
44084430
* Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
@@ -4412,7 +4434,10 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
44124434
*/
44134435
msleep(100);
44144436

4415-
return pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
4437+
ret = pci_dev_wait(dev, "AF_FLR", PCIE_RESET_READY_POLL_MS);
4438+
done:
4439+
pci_dev_reset_iommu_done(dev);
4440+
return ret;
44164441
}
44174442

44184443
/**
@@ -4433,6 +4458,7 @@ static int pci_af_flr(struct pci_dev *dev, bool probe)
44334458
static int pci_pm_reset(struct pci_dev *dev, bool probe)
44344459
{
44354460
u16 csr;
4461+
int ret;
44364462

44374463
if (!dev->pm_cap || dev->dev_flags & PCI_DEV_FLAGS_NO_PM_RESET)
44384464
return -ENOTTY;
@@ -4447,6 +4473,12 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
44474473
if (dev->current_state != PCI_D0)
44484474
return -EINVAL;
44494475

4476+
ret = pci_dev_reset_iommu_prepare(dev);
4477+
if (ret) {
4478+
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
4479+
return ret;
4480+
}
4481+
44504482
csr &= ~PCI_PM_CTRL_STATE_MASK;
44514483
csr |= PCI_D3hot;
44524484
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
@@ -4457,7 +4489,9 @@ static int pci_pm_reset(struct pci_dev *dev, bool probe)
44574489
pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
44584490
pci_dev_d3_sleep(dev);
44594491

4460-
return pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
4492+
ret = pci_dev_wait(dev, "PM D3hot->D0", PCIE_RESET_READY_POLL_MS);
4493+
pci_dev_reset_iommu_done(dev);
4494+
return ret;
44614495
}
44624496

44634497
/**
@@ -4885,10 +4919,20 @@ static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
48854919
return -ENOTTY;
48864920
}
48874921

4922+
rc = pci_dev_reset_iommu_prepare(dev);
4923+
if (rc) {
4924+
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc);
4925+
return rc;
4926+
}
4927+
48884928
rc = pci_dev_reset_slot_function(dev, probe);
48894929
if (rc != -ENOTTY)
4890-
return rc;
4891-
return pci_parent_bus_reset(dev, probe);
4930+
goto done;
4931+
4932+
rc = pci_parent_bus_reset(dev, probe);
4933+
done:
4934+
pci_dev_reset_iommu_done(dev);
4935+
return rc;
48924936
}
48934937

48944938
static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
@@ -4912,6 +4956,12 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
49124956
if (rc)
49134957
return -ENOTTY;
49144958

4959+
rc = pci_dev_reset_iommu_prepare(dev);
4960+
if (rc) {
4961+
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", rc);
4962+
return rc;
4963+
}
4964+
49154965
if (reg & PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR) {
49164966
val = reg;
49174967
} else {
@@ -4926,6 +4976,7 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe)
49264976
pci_write_config_word(bridge, dvsec + PCI_DVSEC_CXL_PORT_CTL,
49274977
reg);
49284978

4979+
pci_dev_reset_iommu_done(dev);
49294980
return rc;
49304981
}
49314982

drivers/pci/quirks.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <linux/pci.h>
2222
#include <linux/isa-dma.h> /* isa_dma_bridge_buggy */
2323
#include <linux/init.h>
24+
#include <linux/iommu.h>
2425
#include <linux/delay.h>
2526
#include <linux/acpi.h>
2627
#include <linux/dmi.h>
@@ -4228,6 +4229,22 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
42284229
{ 0 }
42294230
};
42304231

4232+
static int __pci_dev_specific_reset(struct pci_dev *dev, bool probe,
4233+
const struct pci_dev_reset_methods *i)
4234+
{
4235+
int ret;
4236+
4237+
ret = pci_dev_reset_iommu_prepare(dev);
4238+
if (ret) {
4239+
pci_err(dev, "failed to stop IOMMU for a PCI reset: %d\n", ret);
4240+
return ret;
4241+
}
4242+
4243+
ret = i->reset(dev, probe);
4244+
pci_dev_reset_iommu_done(dev);
4245+
return ret;
4246+
}
4247+
42314248
/*
42324249
* These device-specific reset methods are here rather than in a driver
42334250
* because when a host assigns a device to a guest VM, the host may need
@@ -4242,7 +4259,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, bool probe)
42424259
i->vendor == (u16)PCI_ANY_ID) &&
42434260
(i->device == dev->device ||
42444261
i->device == (u16)PCI_ANY_ID))
4245-
return i->reset(dev, probe);
4262+
return __pci_dev_specific_reset(dev, probe, i);
42464263
}
42474264

42484265
return -ENOTTY;

0 commit comments

Comments
 (0)