Skip to content

Commit 95350ef

Browse files
fdefrancrafaeljw
authored andcommitted
ACPI: extlog: Trace CPER CXL Protocol Error Section
When Firmware First is enabled, BIOS handles errors first and then it makes them available to the kernel via the Common Platform Error Record (CPER) sections (UEFI 2.11 Appendix N.2.13). Linux parses the CPER sections via one of two similar paths, either ELOG or GHES. The errors managed by ELOG are signaled to the BIOS by the I/O Machine Check Architecture (I/O MCA). Currently, ELOG and GHES show some inconsistencies in how they report to userspace via trace events. Therefore, make the two mentioned paths act similarly by tracing the CPER CXL Protocol Error Section. Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com> Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com> Link: https://patch.msgid.link/20260114101543.85926-6-fabio.m.de.francesco@linux.intel.com Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
1 parent ba8af8e commit 95350ef

4 files changed

Lines changed: 30 additions & 1 deletion

File tree

drivers/acpi/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ config ACPI_EXTLOG
494494
tristate "Extended Error Log support"
495495
depends on X86_MCE && X86_LOCAL_APIC && EDAC
496496
select UEFI_CPER
497+
select ACPI_APEI
498+
select ACPI_APEI_GHES
497499
help
498500
Certain usages such as Predictive Failure Analysis (PFA) require
499501
more information about the error than what can be described in

drivers/acpi/acpi_extlog.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/ratelimit.h>
1313
#include <linux/edac.h>
1414
#include <linux/ras.h>
15+
#include <cxl/event.h>
1516
#include <acpi/ghes.h>
1617
#include <asm/cpu.h>
1718
#include <asm/mce.h>
@@ -162,6 +163,23 @@ static void extlog_print_pcie(struct cper_sec_pcie *pcie_err,
162163
#endif
163164
}
164165

166+
static void
167+
extlog_cxl_cper_handle_prot_err(struct cxl_cper_sec_prot_err *prot_err,
168+
int severity)
169+
{
170+
#ifdef ACPI_APEI_PCIEAER
171+
struct cxl_cper_prot_err_work_data wd;
172+
173+
if (cxl_cper_sec_prot_err_valid(prot_err))
174+
return;
175+
176+
if (cxl_cper_setup_prot_err_work_data(&wd, prot_err, severity))
177+
return;
178+
179+
cxl_cper_handle_prot_err(&wd);
180+
#endif
181+
}
182+
165183
static int extlog_print(struct notifier_block *nb, unsigned long val,
166184
void *data)
167185
{
@@ -213,6 +231,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
213231
if (gdata->error_data_length >= sizeof(*mem))
214232
trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
215233
(u8)gdata->error_severity);
234+
} else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
235+
struct cxl_cper_sec_prot_err *prot_err =
236+
acpi_hest_get_payload(gdata);
237+
238+
extlog_cxl_cper_handle_prot_err(prot_err,
239+
gdata->error_severity);
216240
} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
217241
struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
218242

drivers/cxl/core/ras.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ static int match_memdev_by_parent(struct device *dev, const void *uport)
6363
return 0;
6464
}
6565

66-
static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
66+
void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
6767
{
6868
unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
6969
data->prot_err.agent_addr.function);
@@ -104,6 +104,7 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
104104
else
105105
cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
106106
}
107+
EXPORT_SYMBOL_GPL(cxl_cper_handle_prot_err);
107108

108109
static void cxl_cper_prot_err_work_fn(struct work_struct *work)
109110
{

include/cxl/event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,4 +340,6 @@ cxl_cper_setup_prot_err_work_data(struct cxl_cper_prot_err_work_data *wd,
340340
}
341341
#endif
342342

343+
void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *wd);
344+
343345
#endif /* _LINUX_CXL_EVENT_H */

0 commit comments

Comments
 (0)