Skip to content

Commit 3edbe8a

Browse files
committed
Merge tag 'ras_core_for_v6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 RAS updates from Borislav Petkov: - Convert the hw error storm handling into a finer-grained, per-bank solution which allows for more timely detection and reporting of errors - Start a documentation section which will hold down relevant RAS features description and how they should be used - Add new AMD error bank types - Slim down and remove error type descriptions from the kernel side of error decoding to rasdaemon which can be used from now on to decode hw errors on AMD - Mark pages containing uncorrectable errors as poison so that kdump can avoid them and thus not cause another panic - The usual cleanups and fixlets * tag 'ras_core_for_v6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Handle Intel threshold interrupt storms x86/mce: Add per-bank CMCI storm mitigation x86/mce: Remove old CMCI storm mitigation code Documentation: Begin a RAS section x86/MCE/AMD: Add new MA_LLC, USR_DP, and USR_CP bank types EDAC/mce_amd: Remove SMCA Extended Error code descriptions x86/mce/amd, EDAC/mce_amd: Move long names to decoder module x86/mce/inject: Clear test status value x86/mce: Remove redundant check from mce_device_create() x86/mce: Mark fatal MCE's page as poison to avoid panic in the kdump kernel
2 parents bef91c2 + 1f68ce2 commit 3edbe8a

10 files changed

Lines changed: 457 additions & 738 deletions

File tree

Documentation/RAS/ras.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
Reliability, Availability and Serviceability features
4+
=====================================================
5+
6+
This documents different aspects of the RAS functionality present in the
7+
kernel.
8+
9+
Error decoding
10+
---------------
11+
12+
* x86
13+
14+
Error decoding on AMD systems should be done using the rasdaemon tool:
15+
https://github.com/mchehab/rasdaemon/
16+
17+
While the daemon is running, it would automatically log and decode
18+
errors. If not, one can still decode such errors by supplying the
19+
hardware information from the error::
20+
21+
$ rasdaemon -p --status <STATUS> --ipid <IPID> --smca
22+
23+
Also, the user can pass particular family and model to decode the error
24+
string::
25+
26+
$ rasdaemon -p --status <STATUS> --ipid <IPID> --smca --family <CPU Family> --model <CPU Model> --bank <BANK_NUM>

Documentation/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ to ReStructured Text format, or are simply too old.
113113
:maxdepth: 1
114114

115115
staging/index
116+
RAS/ras
116117

117118

118119
Translations

arch/x86/include/asm/mce.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ enum smca_bank_types {
311311
SMCA_PIE, /* Power, Interrupts, etc. */
312312
SMCA_UMC, /* Unified Memory Controller */
313313
SMCA_UMC_V2,
314+
SMCA_MA_LLC, /* Memory Attached Last Level Cache */
314315
SMCA_PB, /* Parameter Block */
315316
SMCA_PSP, /* Platform Security Processor */
316317
SMCA_PSP_V2,
@@ -326,14 +327,15 @@ enum smca_bank_types {
326327
SMCA_SHUB, /* System HUB Unit */
327328
SMCA_SATA, /* SATA Unit */
328329
SMCA_USB, /* USB Unit */
330+
SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */
331+
SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */
329332
SMCA_GMI_PCS, /* GMI PCS Unit */
330333
SMCA_XGMI_PHY, /* xGMI PHY Unit */
331334
SMCA_WAFL_PHY, /* WAFL PHY Unit */
332335
SMCA_GMI_PHY, /* GMI PHY Unit */
333336
N_SMCA_BANK_TYPES
334337
};
335338

336-
extern const char *smca_get_long_name(enum smca_bank_types t);
337339
extern bool amd_mce_is_memory_error(struct mce *m);
338340

339341
extern int mce_threshold_create_device(unsigned int cpu);

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 36 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -87,60 +87,49 @@ struct smca_bank {
8787
static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks);
8888
static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts);
8989

90-
struct smca_bank_name {
91-
const char *name; /* Short name for sysfs */
92-
const char *long_name; /* Long name for pretty-printing */
93-
};
94-
95-
static struct smca_bank_name smca_names[] = {
96-
[SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" },
97-
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
98-
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
99-
[SMCA_DE] = { "decode_unit", "Decode Unit" },
100-
[SMCA_RESERVED] = { "reserved", "Reserved" },
101-
[SMCA_EX] = { "execution_unit", "Execution Unit" },
102-
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
103-
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
104-
[SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
105-
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
90+
static const char * const smca_names[] = {
91+
[SMCA_LS ... SMCA_LS_V2] = "load_store",
92+
[SMCA_IF] = "insn_fetch",
93+
[SMCA_L2_CACHE] = "l2_cache",
94+
[SMCA_DE] = "decode_unit",
95+
[SMCA_RESERVED] = "reserved",
96+
[SMCA_EX] = "execution_unit",
97+
[SMCA_FP] = "floating_point",
98+
[SMCA_L3_CACHE] = "l3_cache",
99+
[SMCA_CS ... SMCA_CS_V2] = "coherent_slave",
100+
[SMCA_PIE] = "pie",
106101

107102
/* UMC v2 is separate because both of them can exist in a single system. */
108-
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
109-
[SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" },
110-
[SMCA_PB] = { "param_block", "Parameter Block" },
111-
[SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
112-
[SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" },
113-
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
114-
[SMCA_MPDMA] = { "mpdma", "MPDMA Unit" },
115-
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
116-
[SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" },
117-
[SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" },
118-
[SMCA_NBIF] = { "nbif", "NBIF Unit" },
119-
[SMCA_SHUB] = { "shub", "System Hub Unit" },
120-
[SMCA_SATA] = { "sata", "SATA Unit" },
121-
[SMCA_USB] = { "usb", "USB Unit" },
122-
[SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" },
123-
[SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" },
124-
[SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" },
125-
[SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" },
103+
[SMCA_UMC] = "umc",
104+
[SMCA_UMC_V2] = "umc_v2",
105+
[SMCA_MA_LLC] = "ma_llc",
106+
[SMCA_PB] = "param_block",
107+
[SMCA_PSP ... SMCA_PSP_V2] = "psp",
108+
[SMCA_SMU ... SMCA_SMU_V2] = "smu",
109+
[SMCA_MP5] = "mp5",
110+
[SMCA_MPDMA] = "mpdma",
111+
[SMCA_NBIO] = "nbio",
112+
[SMCA_PCIE ... SMCA_PCIE_V2] = "pcie",
113+
[SMCA_XGMI_PCS] = "xgmi_pcs",
114+
[SMCA_NBIF] = "nbif",
115+
[SMCA_SHUB] = "shub",
116+
[SMCA_SATA] = "sata",
117+
[SMCA_USB] = "usb",
118+
[SMCA_USR_DP] = "usr_dp",
119+
[SMCA_USR_CP] = "usr_cp",
120+
[SMCA_GMI_PCS] = "gmi_pcs",
121+
[SMCA_XGMI_PHY] = "xgmi_phy",
122+
[SMCA_WAFL_PHY] = "wafl_phy",
123+
[SMCA_GMI_PHY] = "gmi_phy",
126124
};
127125

128126
static const char *smca_get_name(enum smca_bank_types t)
129127
{
130128
if (t >= N_SMCA_BANK_TYPES)
131129
return NULL;
132130

133-
return smca_names[t].name;
134-
}
135-
136-
const char *smca_get_long_name(enum smca_bank_types t)
137-
{
138-
if (t >= N_SMCA_BANK_TYPES)
139-
return NULL;
140-
141-
return smca_names[t].long_name;
131+
return smca_names[t];
142132
}
143-
EXPORT_SYMBOL_GPL(smca_get_long_name);
144133

145134
enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
146135
{
@@ -178,6 +167,7 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
178167
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
179168
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
180169
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
170+
{ SMCA_MA_LLC, HWID_MCATYPE(0x2E, 0x4) },
181171

182172
/* Unified Memory Controller MCA type */
183173
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
@@ -212,6 +202,8 @@ static const struct smca_hwid smca_hwid_mcatypes[] = {
212202
{ SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) },
213203
{ SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) },
214204
{ SMCA_USB, HWID_MCATYPE(0xAA, 0x0) },
205+
{ SMCA_USR_DP, HWID_MCATYPE(0x170, 0x0) },
206+
{ SMCA_USR_CP, HWID_MCATYPE(0x180, 0x0) },
215207
{ SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) },
216208
{ SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) },
217209
{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) },

arch/x86/kernel/cpu/mce/core.c

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include <linux/sync_core.h>
4545
#include <linux/task_work.h>
4646
#include <linux/hardirq.h>
47+
#include <linux/kexec.h>
4748

4849
#include <asm/intel-family.h>
4950
#include <asm/processor.h>
@@ -233,6 +234,7 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
233234
struct llist_node *pending;
234235
struct mce_evt_llist *l;
235236
int apei_err = 0;
237+
struct page *p;
236238

237239
/*
238240
* Allow instrumentation around external facilities usage. Not that it
@@ -286,6 +288,20 @@ static noinstr void mce_panic(const char *msg, struct mce *final, char *exp)
286288
if (!fake_panic) {
287289
if (panic_timeout == 0)
288290
panic_timeout = mca_cfg.panic_timeout;
291+
292+
/*
293+
* Kdump skips the poisoned page in order to avoid
294+
* touching the error bits again. Poison the page even
295+
* if the error is fatal and the machine is about to
296+
* panic.
297+
*/
298+
if (kexec_crash_loaded()) {
299+
if (final && (final->status & MCI_STATUS_ADDRV)) {
300+
p = pfn_to_online_page(final->addr >> PAGE_SHIFT);
301+
if (p)
302+
SetPageHWPoison(p);
303+
}
304+
}
289305
panic(msg);
290306
} else
291307
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
@@ -670,6 +686,16 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
670686
barrier();
671687
m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
672688

689+
/*
690+
* Update storm tracking here, before checking for the
691+
* MCI_STATUS_VAL bit. Valid corrected errors count
692+
* towards declaring, or maintaining, storm status. No
693+
* error in a bank counts towards avoiding, or ending,
694+
* storm status.
695+
*/
696+
if (!mca_cfg.cmci_disabled)
697+
mce_track_storm(&m);
698+
673699
/* If this entry is not valid, ignore it */
674700
if (!(m.status & MCI_STATUS_VAL))
675701
continue;
@@ -1601,13 +1627,6 @@ static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
16011627
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
16021628
static DEFINE_PER_CPU(struct timer_list, mce_timer);
16031629

1604-
static unsigned long mce_adjust_timer_default(unsigned long interval)
1605-
{
1606-
return interval;
1607-
}
1608-
1609-
static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
1610-
16111630
static void __start_timer(struct timer_list *t, unsigned long interval)
16121631
{
16131632
unsigned long when = jiffies + interval;
@@ -1637,15 +1656,9 @@ static void mce_timer_fn(struct timer_list *t)
16371656

16381657
iv = __this_cpu_read(mce_next_interval);
16391658

1640-
if (mce_available(this_cpu_ptr(&cpu_info))) {
1659+
if (mce_available(this_cpu_ptr(&cpu_info)))
16411660
mc_poll_banks();
16421661

1643-
if (mce_intel_cmci_poll()) {
1644-
iv = mce_adjust_timer(iv);
1645-
goto done;
1646-
}
1647-
}
1648-
16491662
/*
16501663
* Alert userspace if needed. If we logged an MCE, reduce the polling
16511664
* interval, otherwise increase the polling interval.
@@ -1655,23 +1668,29 @@ static void mce_timer_fn(struct timer_list *t)
16551668
else
16561669
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
16571670

1658-
done:
1659-
__this_cpu_write(mce_next_interval, iv);
1660-
__start_timer(t, iv);
1671+
if (mce_get_storm_mode()) {
1672+
__start_timer(t, HZ);
1673+
} else {
1674+
__this_cpu_write(mce_next_interval, iv);
1675+
__start_timer(t, iv);
1676+
}
16611677
}
16621678

16631679
/*
1664-
* Ensure that the timer is firing in @interval from now.
1680+
* When a storm starts on any bank on this CPU, switch to polling
1681+
* once per second. When the storm ends, revert to the default
1682+
* polling interval.
16651683
*/
1666-
void mce_timer_kick(unsigned long interval)
1684+
void mce_timer_kick(bool storm)
16671685
{
16681686
struct timer_list *t = this_cpu_ptr(&mce_timer);
1669-
unsigned long iv = __this_cpu_read(mce_next_interval);
16701687

1671-
__start_timer(t, interval);
1688+
mce_set_storm_mode(storm);
16721689

1673-
if (interval < iv)
1674-
__this_cpu_write(mce_next_interval, interval);
1690+
if (storm)
1691+
__start_timer(t, HZ);
1692+
else
1693+
__this_cpu_write(mce_next_interval, check_interval * HZ);
16751694
}
16761695

16771696
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1995,7 +2014,6 @@ static void mce_zhaoxin_feature_init(struct cpuinfo_x86 *c)
19952014

19962015
intel_init_cmci();
19972016
intel_init_lmce();
1998-
mce_adjust_timer = cmci_intel_adjust_timer;
19992017
}
20002018

20012019
static void mce_zhaoxin_feature_clear(struct cpuinfo_x86 *c)
@@ -2008,7 +2026,6 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
20082026
switch (c->x86_vendor) {
20092027
case X86_VENDOR_INTEL:
20102028
mce_intel_feature_init(c);
2011-
mce_adjust_timer = cmci_intel_adjust_timer;
20122029
break;
20132030

20142031
case X86_VENDOR_AMD: {
@@ -2568,9 +2585,6 @@ static int mce_device_create(unsigned int cpu)
25682585
int err;
25692586
int i, j;
25702587

2571-
if (!mce_available(&boot_cpu_data))
2572-
return -EIO;
2573-
25742588
dev = per_cpu(mce_device, cpu);
25752589
if (dev)
25762590
return 0;
@@ -2665,8 +2679,6 @@ static void mce_reenable_cpu(void)
26652679

26662680
static int mce_cpu_dead(unsigned int cpu)
26672681
{
2668-
mce_intel_hcpu_update(cpu);
2669-
26702682
/* intentionally ignoring frozen here */
26712683
if (!cpuhp_tasks_frozen)
26722684
cmci_rediscover();

arch/x86/kernel/cpu/mce/inject.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,7 @@ static void check_hw_inj_possible(void)
746746

747747
wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), status);
748748
rdmsrl_safe(mca_msr_reg(bank, MCA_STATUS), &status);
749+
wrmsrl_safe(mca_msr_reg(bank, MCA_STATUS), 0);
749750

750751
if (!status) {
751752
hw_injection_possible = false;

0 commit comments

Comments
 (0)