Skip to content

Commit c3b1ede

Browse files
LuBaolujoergroedel
authored andcommitted
iommu/vt-d: Fix race condition during PASID entry replacement
The Intel VT-d PASID table entry is 512 bits (64 bytes). When replacing an active PASID entry (e.g., during domain replacement), the current implementation calculates a new entry on the stack and copies it to the table using a single structure assignment. struct pasid_entry *pte, new_pte; pte = intel_pasid_get_entry(dev, pasid); pasid_pte_config_first_level(iommu, &new_pte, ...); *pte = new_pte; Because the hardware may fetch the 512-bit PASID entry in multiple 128-bit chunks, updating the entire entry while it is active (Present bit set) risks a "torn" read. In this scenario, the IOMMU hardware could observe an inconsistent state — partially new data and partially old data — leading to unpredictable behavior or spurious faults. Fix this by removing the unsafe "replace" helpers and following the "clear-then-update" flow, which ensures the Present bit is cleared and the required invalidation handshake is completed before the new configuration is applied. Fixes: 7543ee6 ("iommu/vt-d: Add pasid replace helpers") Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Samiullah Khawaja <skhawaja@google.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Link: https://lore.kernel.org/r/20260120061816.2132558-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
1 parent c1e4f1d commit c3b1ede

4 files changed

Lines changed: 16 additions & 220 deletions

File tree

drivers/iommu/intel/iommu.c

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,36 +1252,31 @@ int __domain_setup_first_level(struct intel_iommu *iommu, struct device *dev,
12521252
ioasid_t pasid, u16 did, phys_addr_t fsptptr,
12531253
int flags, struct iommu_domain *old)
12541254
{
1255-
if (!old)
1256-
return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid,
1257-
did, flags);
1258-
return intel_pasid_replace_first_level(iommu, dev, fsptptr, pasid, did,
1259-
iommu_domain_did(old, iommu),
1260-
flags);
1255+
if (old)
1256+
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1257+
1258+
return intel_pasid_setup_first_level(iommu, dev, fsptptr, pasid, did, flags);
12611259
}
12621260

12631261
static int domain_setup_second_level(struct intel_iommu *iommu,
12641262
struct dmar_domain *domain,
12651263
struct device *dev, ioasid_t pasid,
12661264
struct iommu_domain *old)
12671265
{
1268-
if (!old)
1269-
return intel_pasid_setup_second_level(iommu, domain,
1270-
dev, pasid);
1271-
return intel_pasid_replace_second_level(iommu, domain, dev,
1272-
iommu_domain_did(old, iommu),
1273-
pasid);
1266+
if (old)
1267+
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1268+
1269+
return intel_pasid_setup_second_level(iommu, domain, dev, pasid);
12741270
}
12751271

12761272
static int domain_setup_passthrough(struct intel_iommu *iommu,
12771273
struct device *dev, ioasid_t pasid,
12781274
struct iommu_domain *old)
12791275
{
1280-
if (!old)
1281-
return intel_pasid_setup_pass_through(iommu, dev, pasid);
1282-
return intel_pasid_replace_pass_through(iommu, dev,
1283-
iommu_domain_did(old, iommu),
1284-
pasid);
1276+
if (old)
1277+
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
1278+
1279+
return intel_pasid_setup_pass_through(iommu, dev, pasid);
12851280
}
12861281

12871282
static int domain_setup_first_level(struct intel_iommu *iommu,

drivers/iommu/intel/nested.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,10 @@ static int domain_setup_nested(struct intel_iommu *iommu,
136136
struct device *dev, ioasid_t pasid,
137137
struct iommu_domain *old)
138138
{
139-
if (!old)
140-
return intel_pasid_setup_nested(iommu, dev, pasid, domain);
141-
return intel_pasid_replace_nested(iommu, dev, pasid,
142-
iommu_domain_did(old, iommu),
143-
domain);
139+
if (old)
140+
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
141+
142+
return intel_pasid_setup_nested(iommu, dev, pasid, domain);
144143
}
145144

146145
static int intel_nested_set_dev_pasid(struct iommu_domain *domain,

drivers/iommu/intel/pasid.c

Lines changed: 0 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -417,50 +417,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, struct device *dev,
417417
return 0;
418418
}
419419

420-
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
421-
struct device *dev, phys_addr_t fsptptr,
422-
u32 pasid, u16 did, u16 old_did,
423-
int flags)
424-
{
425-
struct pasid_entry *pte, new_pte;
426-
427-
if (!ecap_flts(iommu->ecap)) {
428-
pr_err("No first level translation support on %s\n",
429-
iommu->name);
430-
return -EINVAL;
431-
}
432-
433-
if ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)) {
434-
pr_err("No 5-level paging support for first-level on %s\n",
435-
iommu->name);
436-
return -EINVAL;
437-
}
438-
439-
pasid_pte_config_first_level(iommu, &new_pte, fsptptr, did, flags);
440-
441-
spin_lock(&iommu->lock);
442-
pte = intel_pasid_get_entry(dev, pasid);
443-
if (!pte) {
444-
spin_unlock(&iommu->lock);
445-
return -ENODEV;
446-
}
447-
448-
if (!pasid_pte_is_present(pte)) {
449-
spin_unlock(&iommu->lock);
450-
return -EINVAL;
451-
}
452-
453-
WARN_ON(old_did != pasid_get_domain_id(pte));
454-
455-
*pte = new_pte;
456-
spin_unlock(&iommu->lock);
457-
458-
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
459-
intel_iommu_drain_pasid_prq(dev, pasid);
460-
461-
return 0;
462-
}
463-
464420
/*
465421
* Set up the scalable mode pasid entry for second only translation type.
466422
*/
@@ -527,51 +483,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
527483
return 0;
528484
}
529485

530-
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
531-
struct dmar_domain *domain,
532-
struct device *dev, u16 old_did,
533-
u32 pasid)
534-
{
535-
struct pasid_entry *pte, new_pte;
536-
u16 did;
537-
538-
/*
539-
* If hardware advertises no support for second level
540-
* translation, return directly.
541-
*/
542-
if (!ecap_slts(iommu->ecap)) {
543-
pr_err("No second level translation support on %s\n",
544-
iommu->name);
545-
return -EINVAL;
546-
}
547-
548-
did = domain_id_iommu(domain, iommu);
549-
550-
pasid_pte_config_second_level(iommu, &new_pte, domain, did);
551-
552-
spin_lock(&iommu->lock);
553-
pte = intel_pasid_get_entry(dev, pasid);
554-
if (!pte) {
555-
spin_unlock(&iommu->lock);
556-
return -ENODEV;
557-
}
558-
559-
if (!pasid_pte_is_present(pte)) {
560-
spin_unlock(&iommu->lock);
561-
return -EINVAL;
562-
}
563-
564-
WARN_ON(old_did != pasid_get_domain_id(pte));
565-
566-
*pte = new_pte;
567-
spin_unlock(&iommu->lock);
568-
569-
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
570-
intel_iommu_drain_pasid_prq(dev, pasid);
571-
572-
return 0;
573-
}
574-
575486
/*
576487
* Set up dirty tracking on a second only or nested translation type.
577488
*/
@@ -684,38 +595,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
684595
return 0;
685596
}
686597

687-
int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
688-
struct device *dev, u16 old_did,
689-
u32 pasid)
690-
{
691-
struct pasid_entry *pte, new_pte;
692-
u16 did = FLPT_DEFAULT_DID;
693-
694-
pasid_pte_config_pass_through(iommu, &new_pte, did);
695-
696-
spin_lock(&iommu->lock);
697-
pte = intel_pasid_get_entry(dev, pasid);
698-
if (!pte) {
699-
spin_unlock(&iommu->lock);
700-
return -ENODEV;
701-
}
702-
703-
if (!pasid_pte_is_present(pte)) {
704-
spin_unlock(&iommu->lock);
705-
return -EINVAL;
706-
}
707-
708-
WARN_ON(old_did != pasid_get_domain_id(pte));
709-
710-
*pte = new_pte;
711-
spin_unlock(&iommu->lock);
712-
713-
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
714-
intel_iommu_drain_pasid_prq(dev, pasid);
715-
716-
return 0;
717-
}
718-
719598
/*
720599
* Set the page snoop control for a pasid entry which has been set up.
721600
*/
@@ -849,69 +728,6 @@ int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
849728
return 0;
850729
}
851730

852-
int intel_pasid_replace_nested(struct intel_iommu *iommu,
853-
struct device *dev, u32 pasid,
854-
u16 old_did, struct dmar_domain *domain)
855-
{
856-
struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
857-
struct dmar_domain *s2_domain = domain->s2_domain;
858-
u16 did = domain_id_iommu(domain, iommu);
859-
struct pasid_entry *pte, new_pte;
860-
861-
/* Address width should match the address width supported by hardware */
862-
switch (s1_cfg->addr_width) {
863-
case ADDR_WIDTH_4LEVEL:
864-
break;
865-
case ADDR_WIDTH_5LEVEL:
866-
if (!cap_fl5lp_support(iommu->cap)) {
867-
dev_err_ratelimited(dev,
868-
"5-level paging not supported\n");
869-
return -EINVAL;
870-
}
871-
break;
872-
default:
873-
dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
874-
s1_cfg->addr_width);
875-
return -EINVAL;
876-
}
877-
878-
if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
879-
pr_err_ratelimited("No supervisor request support on %s\n",
880-
iommu->name);
881-
return -EINVAL;
882-
}
883-
884-
if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
885-
pr_err_ratelimited("No extended access flag support on %s\n",
886-
iommu->name);
887-
return -EINVAL;
888-
}
889-
890-
pasid_pte_config_nestd(iommu, &new_pte, s1_cfg, s2_domain, did);
891-
892-
spin_lock(&iommu->lock);
893-
pte = intel_pasid_get_entry(dev, pasid);
894-
if (!pte) {
895-
spin_unlock(&iommu->lock);
896-
return -ENODEV;
897-
}
898-
899-
if (!pasid_pte_is_present(pte)) {
900-
spin_unlock(&iommu->lock);
901-
return -EINVAL;
902-
}
903-
904-
WARN_ON(old_did != pasid_get_domain_id(pte));
905-
906-
*pte = new_pte;
907-
spin_unlock(&iommu->lock);
908-
909-
intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
910-
intel_iommu_drain_pasid_prq(dev, pasid);
911-
912-
return 0;
913-
}
914-
915731
/*
916732
* Interfaces to setup or teardown a pasid table to the scalable-mode
917733
* context table entry:

drivers/iommu/intel/pasid.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -316,20 +316,6 @@ int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
316316
struct device *dev, u32 pasid);
317317
int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
318318
u32 pasid, struct dmar_domain *domain);
319-
int intel_pasid_replace_first_level(struct intel_iommu *iommu,
320-
struct device *dev, phys_addr_t fsptptr,
321-
u32 pasid, u16 did, u16 old_did, int flags);
322-
int intel_pasid_replace_second_level(struct intel_iommu *iommu,
323-
struct dmar_domain *domain,
324-
struct device *dev, u16 old_did,
325-
u32 pasid);
326-
int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
327-
struct device *dev, u16 old_did,
328-
u32 pasid);
329-
int intel_pasid_replace_nested(struct intel_iommu *iommu,
330-
struct device *dev, u32 pasid,
331-
u16 old_did, struct dmar_domain *domain);
332-
333319
void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
334320
struct device *dev, u32 pasid,
335321
bool fault_ignore);

0 commit comments

Comments
 (0)