Skip to content

Commit 7eee1e9

Browse files
yghannambp3tk0v
authored andcommitted
x86/mce: Separate global and per-CPU quirks
Many quirks are global configuration settings and a handful apply to each CPU. Move the per-CPU quirks to vendor init to execute them on each online CPU. Set the global quirks during BSP-only init so they're only executed once and early. Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Reviewed-by: Tony Luck <tony.luck@intel.com> Reviewed-by: Nikolay Borisov <nik.borisov@suse.com> Tested-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com
1 parent a46b2bb commit 7eee1e9

3 files changed

Lines changed: 65 additions & 62 deletions

File tree

arch/x86/kernel/cpu/mce/amd.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,13 +646,37 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
646646
wrmsrq(MSR_K7_HWCR, hwcr);
647647
}
648648

649+
static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
650+
{
651+
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
652+
653+
/* This should be disabled by the BIOS, but isn't always */
654+
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
655+
/*
656+
* disable GART TBL walk error reporting, which
657+
* trips off incorrectly with the IOMMU & 3ware
658+
* & Cerberus:
659+
*/
660+
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
661+
}
662+
663+
/*
664+
* Various K7s with broken bank 0 around. Always disable
665+
* by default.
666+
*/
667+
if (c->x86 == 6 && this_cpu_read(mce_num_banks))
668+
mce_banks[0].ctl = 0;
669+
}
670+
649671
/* cpu init entry point, called from mce.c with preempt off */
650672
void mce_amd_feature_init(struct cpuinfo_x86 *c)
651673
{
652674
unsigned int bank, block, cpu = smp_processor_id();
653675
u32 low = 0, high = 0, address = 0;
654676
int offset = -1;
655677

678+
amd_apply_cpu_quirks(c);
679+
656680
mce_flags.amd_threshold = 1;
657681

658682
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {

arch/x86/kernel/cpu/mce/core.c

Lines changed: 23 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,8 +1807,9 @@ static void __mcheck_cpu_mce_banks_init(void)
18071807
struct mce_bank *b = &mce_banks[i];
18081808

18091809
/*
1810-
* Init them all, __mcheck_cpu_apply_quirks() is going to apply
1811-
* the required vendor quirks before
1810+
* Init them all by default.
1811+
*
1812+
* The required vendor quirks will be applied before
18121813
* __mcheck_cpu_init_prepare_banks() does the final bank setup.
18131814
*/
18141815
b->ctl = -1ULL;
@@ -1880,20 +1881,8 @@ static void __mcheck_cpu_init_prepare_banks(void)
18801881
}
18811882
}
18821883

1883-
static void apply_quirks_amd(struct cpuinfo_x86 *c)
1884+
static void amd_apply_global_quirks(struct cpuinfo_x86 *c)
18841885
{
1885-
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1886-
1887-
/* This should be disabled by the BIOS, but isn't always */
1888-
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
1889-
/*
1890-
* disable GART TBL walk error reporting, which
1891-
* trips off incorrectly with the IOMMU & 3ware
1892-
* & Cerberus:
1893-
*/
1894-
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1895-
}
1896-
18971886
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
18981887
/*
18991888
* Lots of broken BIOS around that don't clear them
@@ -1902,13 +1891,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
19021891
mca_cfg.bootlog = 0;
19031892
}
19041893

1905-
/*
1906-
* Various K7s with broken bank 0 around. Always disable
1907-
* by default.
1908-
*/
1909-
if (c->x86 == 6 && this_cpu_read(mce_num_banks))
1910-
mce_banks[0].ctl = 0;
1911-
19121894
/*
19131895
* overflow_recov is supported for F15h Models 00h-0fh
19141896
* even though we don't have a CPUID bit for it.
@@ -1920,25 +1902,12 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
19201902
mce_flags.zen_ifu_quirk = 1;
19211903
}
19221904

1923-
static void apply_quirks_intel(struct cpuinfo_x86 *c)
1905+
static void intel_apply_global_quirks(struct cpuinfo_x86 *c)
19241906
{
1925-
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
1926-
19271907
/* Older CPUs (prior to family 6) don't need quirks. */
19281908
if (c->x86_vfm < INTEL_PENTIUM_PRO)
19291909
return;
19301910

1931-
/*
1932-
* SDM documents that on family 6 bank 0 should not be written
1933-
* because it aliases to another special BIOS controlled
1934-
* register.
1935-
* But it's not aliased anymore on model 0x1a+
1936-
* Don't ignore bank 0 completely because there could be a
1937-
* valid event later, merely don't write CTL0.
1938-
*/
1939-
if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
1940-
mce_banks[0].init = false;
1941-
19421911
/*
19431912
* All newer Intel systems support MCE broadcasting. Enable
19441913
* synchronization with a one second timeout.
@@ -1964,7 +1933,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c)
19641933
mce_flags.skx_repmov_quirk = 1;
19651934
}
19661935

1967-
static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
1936+
static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c)
19681937
{
19691938
/*
19701939
* All newer Zhaoxin CPUs support MCE broadcasting. Enable
@@ -1976,29 +1945,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
19761945
}
19771946
}
19781947

1979-
/* Add per CPU specific workarounds here */
1980-
static void __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
1981-
{
1982-
struct mca_config *cfg = &mca_cfg;
1983-
1984-
switch (c->x86_vendor) {
1985-
case X86_VENDOR_AMD:
1986-
apply_quirks_amd(c);
1987-
break;
1988-
case X86_VENDOR_INTEL:
1989-
apply_quirks_intel(c);
1990-
break;
1991-
case X86_VENDOR_ZHAOXIN:
1992-
apply_quirks_zhaoxin(c);
1993-
break;
1994-
}
1995-
1996-
if (cfg->monarch_timeout < 0)
1997-
cfg->monarch_timeout = 0;
1998-
if (cfg->bootlog != 0)
1999-
cfg->panic_timeout = 30;
2000-
}
2001-
20021948
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
20031949
{
20041950
if (c->x86 != 5)
@@ -2256,6 +2202,23 @@ void mca_bsp_init(struct cpuinfo_x86 *c)
22562202

22572203
if (cap & MCG_SER_P)
22582204
mca_cfg.ser = 1;
2205+
2206+
switch (c->x86_vendor) {
2207+
case X86_VENDOR_AMD:
2208+
amd_apply_global_quirks(c);
2209+
break;
2210+
case X86_VENDOR_INTEL:
2211+
intel_apply_global_quirks(c);
2212+
break;
2213+
case X86_VENDOR_ZHAOXIN:
2214+
zhaoxin_apply_global_quirks(c);
2215+
break;
2216+
}
2217+
2218+
if (mca_cfg.monarch_timeout < 0)
2219+
mca_cfg.monarch_timeout = 0;
2220+
if (mca_cfg.bootlog != 0)
2221+
mca_cfg.panic_timeout = 30;
22592222
}
22602223

22612224
/*
@@ -2275,8 +2238,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
22752238

22762239
__mcheck_cpu_cap_init();
22772240

2278-
__mcheck_cpu_apply_quirks(c);
2279-
22802241
if (!mce_gen_pool_init()) {
22812242
mca_cfg.disabled = 1;
22822243
pr_emerg("Couldn't allocate MCE records pool!\n");

arch/x86/kernel/cpu/mce/intel.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,26 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
468468
}
469469
}
470470

471+
static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c)
472+
{
473+
/*
474+
* SDM documents that on family 6 bank 0 should not be written
475+
* because it aliases to another special BIOS controlled
476+
* register.
477+
* But it's not aliased anymore on model 0x1a+
478+
* Don't ignore bank 0 completely because there could be a
479+
* valid event later, merely don't write CTL0.
480+
*
481+
* Older CPUs (prior to family 6) can't reach this point and already
482+
* return early due to the check of __mcheck_cpu_ancient_init().
483+
*/
484+
if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
485+
this_cpu_ptr(mce_banks_array)[0].init = false;
486+
}
487+
471488
void mce_intel_feature_init(struct cpuinfo_x86 *c)
472489
{
490+
intel_apply_cpu_quirks(c);
473491
intel_init_cmci();
474492
intel_init_lmce();
475493
intel_imc_init(c);

0 commit comments

Comments
 (0)