Skip to content

Commit 7ff8b1d

Browse files
committed
cxl/pci: Remove CXL VH handling in CONFIG_PCIEAER_CXL conditional blocks from core/pci.c
Create new config CONFIG_CXL_RAS and put all CXL RAS items behind the config. The config will depend on CPER and PCIE AER to build. Move the related VH RAS code from core/pci.c to core/ras.c. Restricted CXL host (RCH) RAS functions will be moved in a future patch. Cc: Robert Richter <rrichter@amd.com> Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Co-developed-by: Terry Bowman <terry.bowman@amd.com> Signed-off-by: Terry Bowman <terry.bowman@amd.com> Reviewed-by: Dan Williams <dan.j.williams@intel.com> Link: https://patch.msgid.link/20260114182055.46029-8-terry.bowman@amd.com Signed-off-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
1 parent bcfa289 commit 7ff8b1d

8 files changed

Lines changed: 233 additions & 195 deletions

File tree

drivers/cxl/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,4 +233,8 @@ config CXL_MCE
233233
def_bool y
234234
depends on X86_MCE && MEMORY_FAILURE
235235

236+
config CXL_RAS
237+
def_bool y
238+
depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI
239+
236240
endif

drivers/cxl/core/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ cxl_core-y += pci.o
1414
cxl_core-y += hdm.o
1515
cxl_core-y += pmu.o
1616
cxl_core-y += cdat.o
17-
cxl_core-y += ras.o
1817
cxl_core-$(CONFIG_TRACING) += trace.o
1918
cxl_core-$(CONFIG_CXL_REGION) += region.o
2019
cxl_core-$(CONFIG_CXL_MCE) += mce.o
2120
cxl_core-$(CONFIG_CXL_FEATURES) += features.o
2221
cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
22+
cxl_core-$(CONFIG_CXL_RAS) += ras.o

drivers/cxl/core/core.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,39 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c);
144144
int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port,
145145
struct access_coordinate *c);
146146

147+
#ifdef CONFIG_CXL_RAS
147148
int cxl_ras_init(void);
148149
void cxl_ras_exit(void);
150+
bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
151+
void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
152+
#else
153+
static inline int cxl_ras_init(void)
154+
{
155+
return 0;
156+
}
157+
158+
static inline void cxl_ras_exit(void)
159+
{
160+
}
161+
162+
static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
163+
{
164+
return false;
165+
}
166+
static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { }
167+
#endif /* CONFIG_CXL_RAS */
168+
169+
/* Restricted CXL Host specific RAS functions */
170+
#ifdef CONFIG_CXL_RAS
171+
void cxl_dport_map_rch_aer(struct cxl_dport *dport);
172+
void cxl_disable_rch_root_ints(struct cxl_dport *dport);
173+
void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
174+
#else
175+
static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { }
176+
static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { }
177+
static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
178+
#endif /* CONFIG_CXL_RAS */
179+
149180
int cxl_gpf_port_setup(struct cxl_dport *dport);
150181

151182
struct cxl_hdm;

drivers/cxl/core/pci.c

Lines changed: 4 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -632,81 +632,8 @@ void read_cdat_data(struct cxl_port *port)
632632
}
633633
EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
634634

635-
static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
636-
void __iomem *ras_base)
637-
{
638-
void __iomem *addr;
639-
u32 status;
640-
641-
if (!ras_base)
642-
return;
643-
644-
addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
645-
status = readl(addr);
646-
if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
647-
writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
648-
trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
649-
}
650-
}
651-
652-
/* CXL spec rev3.0 8.2.4.16.1 */
653-
static void header_log_copy(void __iomem *ras_base, u32 *log)
654-
{
655-
void __iomem *addr;
656-
u32 *log_addr;
657-
int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
658-
659-
addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET;
660-
log_addr = log;
661-
662-
for (i = 0; i < log_u32_size; i++) {
663-
*log_addr = readl(addr);
664-
log_addr++;
665-
addr += sizeof(u32);
666-
}
667-
}
668-
669-
/*
670-
* Log the state of the RAS status registers and prepare them to log the
671-
* next error status. Return 1 if reset needed.
672-
*/
673-
static bool cxl_handle_ras(struct cxl_dev_state *cxlds,
674-
void __iomem *ras_base)
675-
{
676-
u32 hl[CXL_HEADERLOG_SIZE_U32];
677-
void __iomem *addr;
678-
u32 status;
679-
u32 fe;
680-
681-
if (!ras_base)
682-
return false;
683-
684-
addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
685-
status = readl(addr);
686-
if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
687-
return false;
688-
689-
/* If multiple errors, log header points to first error from ctrl reg */
690-
if (hweight32(status) > 1) {
691-
void __iomem *rcc_addr =
692-
ras_base + CXL_RAS_CAP_CONTROL_OFFSET;
693-
694-
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
695-
readl(rcc_addr)));
696-
} else {
697-
fe = status;
698-
}
699-
700-
header_log_copy(ras_base, hl);
701-
trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl);
702-
writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
703-
704-
return true;
705-
}
706-
707-
#ifdef CONFIG_PCIEAER_CXL
708-
709-
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
635+
#ifdef CONFIG_CXL_RAS
636+
void cxl_dport_map_rch_aer(struct cxl_dport *dport)
710637
{
711638
resource_size_t aer_phys;
712639
struct device *host;
@@ -721,19 +648,7 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
721648
}
722649
}
723650

724-
static void cxl_dport_map_ras(struct cxl_dport *dport)
725-
{
726-
struct cxl_register_map *map = &dport->reg_map;
727-
struct device *dev = dport->dport_dev;
728-
729-
if (!map->component_map.ras.valid)
730-
dev_dbg(dev, "RAS registers not found\n");
731-
else if (cxl_map_component_regs(map, &dport->regs.component,
732-
BIT(CXL_CM_CAP_CAP_ID_RAS)))
733-
dev_dbg(dev, "Failed to map RAS capability.\n");
734-
}
735-
736-
static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
651+
void cxl_disable_rch_root_ints(struct cxl_dport *dport)
737652
{
738653
void __iomem *aer_base = dport->regs.dport_aer;
739654
u32 aer_cmd_mask, aer_cmd;
@@ -757,28 +672,6 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
757672
writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
758673
}
759674

760-
/**
761-
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
762-
* @dport: the cxl_dport that needs to be initialized
763-
* @host: host device for devm operations
764-
*/
765-
void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
766-
{
767-
dport->reg_map.host = host;
768-
cxl_dport_map_ras(dport);
769-
770-
if (dport->rch) {
771-
struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
772-
773-
if (!host_bridge->native_aer)
774-
return;
775-
776-
cxl_dport_map_rch_aer(dport);
777-
cxl_disable_rch_root_ints(dport);
778-
}
779-
}
780-
EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
781-
782675
/*
783676
* Copy the AER capability registers using 32 bit read accesses.
784677
* This is necessary because RCRB AER capability is MMIO mapped. Clear the
@@ -827,7 +720,7 @@ static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
827720
return false;
828721
}
829722

830-
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
723+
void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
831724
{
832725
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
833726
struct aer_capability_regs aer_regs;
@@ -852,82 +745,8 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
852745
else
853746
cxl_handle_ras(cxlds, dport->regs.ras);
854747
}
855-
856-
#else
857-
static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
858748
#endif
859749

860-
void cxl_cor_error_detected(struct pci_dev *pdev)
861-
{
862-
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
863-
struct device *dev = &cxlds->cxlmd->dev;
864-
865-
scoped_guard(device, dev) {
866-
if (!dev->driver) {
867-
dev_warn(&pdev->dev,
868-
"%s: memdev disabled, abort error handling\n",
869-
dev_name(dev));
870-
return;
871-
}
872-
873-
if (cxlds->rcd)
874-
cxl_handle_rdport_errors(cxlds);
875-
876-
cxl_handle_cor_ras(cxlds, cxlds->regs.ras);
877-
}
878-
}
879-
EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL");
880-
881-
pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
882-
pci_channel_state_t state)
883-
{
884-
struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
885-
struct cxl_memdev *cxlmd = cxlds->cxlmd;
886-
struct device *dev = &cxlmd->dev;
887-
bool ue;
888-
889-
scoped_guard(device, dev) {
890-
if (!dev->driver) {
891-
dev_warn(&pdev->dev,
892-
"%s: memdev disabled, abort error handling\n",
893-
dev_name(dev));
894-
return PCI_ERS_RESULT_DISCONNECT;
895-
}
896-
897-
if (cxlds->rcd)
898-
cxl_handle_rdport_errors(cxlds);
899-
/*
900-
* A frozen channel indicates an impending reset which is fatal to
901-
* CXL.mem operation, and will likely crash the system. On the off
902-
* chance the situation is recoverable dump the status of the RAS
903-
* capability registers and bounce the active state of the memdev.
904-
*/
905-
ue = cxl_handle_ras(cxlds, cxlds->regs.ras);
906-
}
907-
908-
909-
switch (state) {
910-
case pci_channel_io_normal:
911-
if (ue) {
912-
device_release_driver(dev);
913-
return PCI_ERS_RESULT_NEED_RESET;
914-
}
915-
return PCI_ERS_RESULT_CAN_RECOVER;
916-
case pci_channel_io_frozen:
917-
dev_warn(&pdev->dev,
918-
"%s: frozen state error detected, disable CXL.mem\n",
919-
dev_name(dev));
920-
device_release_driver(dev);
921-
return PCI_ERS_RESULT_NEED_RESET;
922-
case pci_channel_io_perm_failure:
923-
dev_warn(&pdev->dev,
924-
"failure state error detected, request disconnect\n");
925-
return PCI_ERS_RESULT_DISCONNECT;
926-
}
927-
return PCI_ERS_RESULT_NEED_RESET;
928-
}
929-
EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL");
930-
931750
static int cxl_flit_size(struct pci_dev *pdev)
932751
{
933752
if (cxl_pci_flit_256(pdev))

0 commit comments

Comments
 (0)