Skip to content

Commit 5901002

Browse files
ktbowmandavejiang
authored andcommitted
PCI/AER: Move CXL RCH error handling to aer_cxl_rch.c
The Restricted CXL Host (RCH) AER error handling logic currently resides in the AER driver file, aer.c. CXL specific changes conditionally compiled using #ifdefs. Improve the AER driver maintainability by separating the RCH specific logic from the AER driver's core functionality and removing the ifdefs. Introduce drivers/pci/pcie/aer_cxl_rch.c for moving the RCH AER logic into. Conditionally compile the file using the CONFIG_CXL_RCH_RAS Kconfig. Move the CXL logic into the new file but leave CXL helper function is_internal_error() in aer.c for now as it will be moved in future patch for CXL Virtual Hierarchy handling. To maintain compilation after the move other changes are required. Change cxl_rch_handle_error(), cxl_rch_enable_rcec(), and is_internal_error() to be non-static inorder for accessing from the AER driver. Update the new file with the SPDX and 2023 AMD copyright notations because the RCH bits were initially contributed in 2023 by AMD. See commit: commit 0a86756 ("PCI/AER: Forward RCH downstream port-detected errors to the CXL.mem dev handler") Signed-off-by: Terry Bowman <terry.bowman@amd.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Link: https://patch.msgid.link/20260114182055.46029-12-terry.bowman@amd.com Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent 51ce56b commit 5901002

4 files changed

Lines changed: 114 additions & 101 deletions

File tree

drivers/pci/pcie/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o
88

99
obj-y += aspm.o
1010
obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o
11+
obj-$(CONFIG_CXL_RAS) += aer_cxl_rch.o
1112
obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o
1213
obj-$(CONFIG_PCIE_PME) += pme.o
1314
obj-$(CONFIG_PCIE_DPC) += dpc.o

drivers/pci/pcie/aer.c

Lines changed: 1 addition & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1150,111 +1150,14 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev)
11501150
*/
11511151
EXPORT_SYMBOL_FOR_MODULES(pci_aer_unmask_internal_errors, "cxl_core");
11521152

1153-
#ifdef CONFIG_PCIEAER_CXL
1154-
static bool is_cxl_mem_dev(struct pci_dev *dev)
1155-
{
1156-
/*
1157-
* The capability, status, and control fields in Device 0,
1158-
* Function 0 DVSEC control the CXL functionality of the
1159-
* entire device (CXL 3.0, 8.1.3).
1160-
*/
1161-
if (dev->devfn != PCI_DEVFN(0, 0))
1162-
return false;
1163-
1164-
/*
1165-
* CXL Memory Devices must have the 502h class code set (CXL
1166-
* 3.0, 8.1.12.1).
1167-
*/
1168-
if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
1169-
return false;
1170-
1171-
return true;
1172-
}
1173-
1153+
#ifdef CONFIG_CXL_RAS
11741154
bool is_aer_internal_error(struct aer_err_info *info)
11751155
{
11761156
if (info->severity == AER_CORRECTABLE)
11771157
return info->status & PCI_ERR_COR_INTERNAL;
11781158

11791159
return info->status & PCI_ERR_UNC_INTN;
11801160
}
1181-
1182-
static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
1183-
{
1184-
struct aer_err_info *info = (struct aer_err_info *)data;
1185-
const struct pci_error_handlers *err_handler;
1186-
1187-
if (!is_cxl_mem_dev(dev) || !pcie_aer_is_native(dev))
1188-
return 0;
1189-
1190-
/* Protect dev->driver */
1191-
device_lock(&dev->dev);
1192-
1193-
err_handler = dev->driver ? dev->driver->err_handler : NULL;
1194-
if (!err_handler)
1195-
goto out;
1196-
1197-
if (info->severity == AER_CORRECTABLE) {
1198-
if (err_handler->cor_error_detected)
1199-
err_handler->cor_error_detected(dev);
1200-
} else if (err_handler->error_detected) {
1201-
if (info->severity == AER_NONFATAL)
1202-
err_handler->error_detected(dev, pci_channel_io_normal);
1203-
else if (info->severity == AER_FATAL)
1204-
err_handler->error_detected(dev, pci_channel_io_frozen);
1205-
}
1206-
out:
1207-
device_unlock(&dev->dev);
1208-
return 0;
1209-
}
1210-
1211-
static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
1212-
{
1213-
/*
1214-
* Internal errors of an RCEC indicate an AER error in an
1215-
* RCH's downstream port. Check and handle them in the CXL.mem
1216-
* device driver.
1217-
*/
1218-
if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
1219-
is_aer_internal_error(info))
1220-
pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
1221-
}
1222-
1223-
static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
1224-
{
1225-
bool *handles_cxl = data;
1226-
1227-
if (!*handles_cxl)
1228-
*handles_cxl = is_cxl_mem_dev(dev) && pcie_aer_is_native(dev);
1229-
1230-
/* Non-zero terminates iteration */
1231-
return *handles_cxl;
1232-
}
1233-
1234-
static bool handles_cxl_errors(struct pci_dev *rcec)
1235-
{
1236-
bool handles_cxl = false;
1237-
1238-
if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
1239-
pcie_aer_is_native(rcec))
1240-
pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
1241-
1242-
return handles_cxl;
1243-
}
1244-
1245-
static void cxl_rch_enable_rcec(struct pci_dev *rcec)
1246-
{
1247-
if (!handles_cxl_errors(rcec))
1248-
return;
1249-
1250-
pci_aer_unmask_internal_errors(rcec);
1251-
pci_info(rcec, "CXL: Internal errors unmasked");
1252-
}
1253-
1254-
#else
1255-
static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { }
1256-
static inline void cxl_rch_handle_error(struct pci_dev *dev,
1257-
struct aer_err_info *info) { }
12581161
#endif
12591162

12601163
/**

drivers/pci/pcie/aer_cxl_rch.c

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/* Copyright(c) 2023 AMD Corporation. All rights reserved. */
3+
4+
#include <linux/pci.h>
5+
#include <linux/aer.h>
6+
#include <linux/bitfield.h>
7+
#include "../pci.h"
8+
#include "portdrv.h"
9+
10+
static bool is_cxl_mem_dev(struct pci_dev *dev)
11+
{
12+
/*
13+
* The capability, status, and control fields in Device 0,
14+
* Function 0 DVSEC control the CXL functionality of the
15+
* entire device (CXL 3.0, 8.1.3).
16+
*/
17+
if (dev->devfn != PCI_DEVFN(0, 0))
18+
return false;
19+
20+
/*
21+
* CXL Memory Devices must have the 502h class code set (CXL
22+
* 3.0, 8.1.12.1).
23+
*/
24+
if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL)
25+
return false;
26+
27+
return true;
28+
}
29+
30+
static bool cxl_error_is_native(struct pci_dev *dev)
31+
{
32+
struct pci_host_bridge *host = pci_find_host_bridge(dev->bus);
33+
34+
return (pcie_ports_native || host->native_aer);
35+
}
36+
37+
static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
38+
{
39+
struct aer_err_info *info = (struct aer_err_info *)data;
40+
const struct pci_error_handlers *err_handler;
41+
42+
if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev))
43+
return 0;
44+
45+
device_lock(&dev->dev);
46+
47+
err_handler = dev->driver ? dev->driver->err_handler : NULL;
48+
if (!err_handler)
49+
goto out;
50+
51+
if (info->severity == AER_CORRECTABLE) {
52+
if (err_handler->cor_error_detected)
53+
err_handler->cor_error_detected(dev);
54+
} else if (err_handler->error_detected) {
55+
if (info->severity == AER_NONFATAL)
56+
err_handler->error_detected(dev, pci_channel_io_normal);
57+
else if (info->severity == AER_FATAL)
58+
err_handler->error_detected(dev, pci_channel_io_frozen);
59+
}
60+
out:
61+
device_unlock(&dev->dev);
62+
return 0;
63+
}
64+
65+
void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
66+
{
67+
/*
68+
* Internal errors of an RCEC indicate an AER error in an
69+
* RCH's downstream port. Check and handle them in the CXL.mem
70+
* device driver.
71+
*/
72+
if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
73+
is_aer_internal_error(info))
74+
pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
75+
}
76+
77+
static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
78+
{
79+
bool *handles_cxl = data;
80+
81+
if (!*handles_cxl)
82+
*handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev);
83+
84+
/* Non-zero terminates iteration */
85+
return *handles_cxl;
86+
}
87+
88+
static bool handles_cxl_errors(struct pci_dev *rcec)
89+
{
90+
bool handles_cxl = false;
91+
92+
if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC &&
93+
pcie_aer_is_native(rcec))
94+
pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl);
95+
96+
return handles_cxl;
97+
}
98+
99+
void cxl_rch_enable_rcec(struct pci_dev *rcec)
100+
{
101+
if (!handles_cxl_errors(rcec))
102+
return;
103+
104+
pci_aer_unmask_internal_errors(rcec);
105+
pci_info(rcec, "CXL: Internal errors unmasked");
106+
}

drivers/pci/pcie/portdrv.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,13 @@ struct device *pcie_port_find_device(struct pci_dev *dev, u32 service);
126126

127127
struct aer_err_info;
128128

129-
#ifdef CONFIG_PCIEAER_CXL
129+
#ifdef CONFIG_CXL_RAS
130130
bool is_aer_internal_error(struct aer_err_info *info);
131+
void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info);
132+
void cxl_rch_enable_rcec(struct pci_dev *rcec);
131133
#else
132134
static inline bool is_aer_internal_error(struct aer_err_info *info) { return false; }
133-
#endif /* CONFIG_PCIEAER_CXL */
134-
135+
static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { }
136+
static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { }
137+
#endif /* CONFIG_CXL_RAS */
135138
#endif /* _PORTDRV_H_ */

0 commit comments

Comments
 (0)