Skip to content

Commit 0b62f6c

Browse files
KAGA-KOKObp3tk0v
authored andcommitted
x86/microcode/32: Move early loading after paging enable
32-bit loads microcode before paging is enabled. The commit which introduced that has zero justification in the changelog. The cover letter has slightly more content, but it does not give any technical justification either: "The problem in current microcode loading method is that we load a microcode way, way too late; ideally we should load it before turning paging on. This may only be practical on 32 bits since we can't get to 64-bit mode without paging on, but we should still do it as early as at all possible." Handwaving word salad with zero technical content. Someone claimed in an offlist conversation that this is required for curing the ATOM erratum AAE44/AAF40/AAG38/AAH41. That erratum requires an microcode update in order to make the usage of PSE safe. But during early boot, PSE is completely irrelevant and it is evaluated way later. Neither is it relevant for the AP on single core HT enabled CPUs as the microcode loading on the AP is not doing anything. On dual core CPUs there is a theoretical problem if a split of an executable large page between enabling paging including PSE and loading the microcode happens. But that's only theoretical, it's practically irrelevant because the affected dual core CPUs are 64bit enabled and therefore have paging and PSE enabled before loading the microcode on the second core. So why would it work on 64-bit but not on 32-bit? The erratum: "AAG38 Code Fetch May Occur to Incorrect Address After a Large Page is Split Into 4-Kbyte Pages Problem: If software clears the PS (page size) bit in a present PDE (page directory entry), that will cause linear addresses mapped through this PDE to use 4-KByte pages instead of using a large page after old TLB entries are invalidated. Due to this erratum, if a code fetch uses this PDE before the TLB entry for the large page is invalidated then it may fetch from a different physical address than specified by either the old large page translation or the new 4-KByte page translation. This erratum may also cause speculative code fetches from incorrect addresses." The practical relevance for this is exactly zero because there is no splitting of large text pages during early boot-time, i.e. between paging enable and microcode loading, and neither during CPU hotplug. IOW, this load microcode before paging enable is yet another voodoo programming solution in search of a problem. What's worse is that it causes at least two serious problems: 1) When stackprotector is enabled, the microcode loader code has the stackprotector mechanics enabled. The read from the per CPU variable __stack_chk_guard is always accessing the virtual address either directly on UP or via %fs on SMP. In physical address mode this results in an access to memory above 3GB. So this works by chance as the hardware returns the same value when there is no RAM at this physical address. When there is RAM populated above 3G then the read is by chance the same as nothing changes that memory during the very early boot stage. That's not necessarily true during runtime CPU hotplug. 2) When function tracing is enabled, the relevant microcode loader functions and the functions invoked from there will call into the tracing code and evaluate global and per CPU variables in physical address mode. What could potentially go wrong? Cure this and move the microcode loading after the early paging enable, use the new temporary initrd mapping and remove the gunk in the microcode loader which is required to handle physical address mode. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Link: https://lore.kernel.org/r/20231017211722.348298216@linutronix.de
1 parent 4c585af commit 0b62f6c

9 files changed

Lines changed: 71 additions & 270 deletions

File tree

arch/x86/include/asm/microcode.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,6 @@ static inline u32 intel_get_microcode_revision(void)
7070

7171
return rev;
7272
}
73-
74-
void show_ucode_info_early(void);
75-
76-
#else /* CONFIG_CPU_SUP_INTEL */
77-
static inline void show_ucode_info_early(void) { }
7873
#endif /* !CONFIG_CPU_SUP_INTEL */
7974

8075
#endif /* _ASM_X86_MICROCODE_H */

arch/x86/kernel/cpu/common.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2166,8 +2166,6 @@ static inline void setup_getcpu(int cpu)
21662166
}
21672167

21682168
#ifdef CONFIG_X86_64
2169-
static inline void ucode_cpu_init(int cpu) { }
2170-
21712169
static inline void tss_setup_ist(struct tss_struct *tss)
21722170
{
21732171
/* Set up the per-CPU TSS IST stacks */
@@ -2178,16 +2176,8 @@ static inline void tss_setup_ist(struct tss_struct *tss)
21782176
/* Only mapped when SEV-ES is active */
21792177
tss->x86_tss.ist[IST_INDEX_VC] = __this_cpu_ist_top_va(VC);
21802178
}
2181-
21822179
#else /* CONFIG_X86_64 */
2183-
2184-
static inline void ucode_cpu_init(int cpu)
2185-
{
2186-
show_ucode_info_early();
2187-
}
2188-
21892180
static inline void tss_setup_ist(struct tss_struct *tss) { }
2190-
21912181
#endif /* !CONFIG_X86_64 */
21922182

21932183
static inline void tss_setup_io_bitmap(struct tss_struct *tss)
@@ -2243,8 +2233,6 @@ void cpu_init(void)
22432233
struct task_struct *cur = current;
22442234
int cpu = raw_smp_processor_id();
22452235

2246-
ucode_cpu_init(cpu);
2247-
22482236
#ifdef CONFIG_NUMA
22492237
if (this_cpu_read(numa_node) == 0 &&
22502238
early_cpu_to_node(cpu) != NUMA_NO_NODE)

arch/x86/kernel/cpu/microcode/amd.c

Lines changed: 32 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -121,24 +121,20 @@ static u16 find_equiv_id(struct equiv_cpu_table *et, u32 sig)
121121

122122
/*
123123
* Check whether there is a valid microcode container file at the beginning
124-
* of @buf of size @buf_size. Set @early to use this function in the early path.
124+
* of @buf of size @buf_size.
125125
*/
126-
static bool verify_container(const u8 *buf, size_t buf_size, bool early)
126+
static bool verify_container(const u8 *buf, size_t buf_size)
127127
{
128128
u32 cont_magic;
129129

130130
if (buf_size <= CONTAINER_HDR_SZ) {
131-
if (!early)
132-
pr_debug("Truncated microcode container header.\n");
133-
131+
pr_debug("Truncated microcode container header.\n");
134132
return false;
135133
}
136134

137135
cont_magic = *(const u32 *)buf;
138136
if (cont_magic != UCODE_MAGIC) {
139-
if (!early)
140-
pr_debug("Invalid magic value (0x%08x).\n", cont_magic);
141-
137+
pr_debug("Invalid magic value (0x%08x).\n", cont_magic);
142138
return false;
143139
}
144140

@@ -147,23 +143,20 @@ static bool verify_container(const u8 *buf, size_t buf_size, bool early)
147143

148144
/*
149145
* Check whether there is a valid, non-truncated CPU equivalence table at the
150-
* beginning of @buf of size @buf_size. Set @early to use this function in the
151-
* early path.
146+
* beginning of @buf of size @buf_size.
152147
*/
153-
static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early)
148+
static bool verify_equivalence_table(const u8 *buf, size_t buf_size)
154149
{
155150
const u32 *hdr = (const u32 *)buf;
156151
u32 cont_type, equiv_tbl_len;
157152

158-
if (!verify_container(buf, buf_size, early))
153+
if (!verify_container(buf, buf_size))
159154
return false;
160155

161156
cont_type = hdr[1];
162157
if (cont_type != UCODE_EQUIV_CPU_TABLE_TYPE) {
163-
if (!early)
164-
pr_debug("Wrong microcode container equivalence table type: %u.\n",
165-
cont_type);
166-
158+
pr_debug("Wrong microcode container equivalence table type: %u.\n",
159+
cont_type);
167160
return false;
168161
}
169162

@@ -172,9 +165,7 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early)
172165
equiv_tbl_len = hdr[2];
173166
if (equiv_tbl_len < sizeof(struct equiv_cpu_entry) ||
174167
buf_size < equiv_tbl_len) {
175-
if (!early)
176-
pr_debug("Truncated equivalence table.\n");
177-
168+
pr_debug("Truncated equivalence table.\n");
178169
return false;
179170
}
180171

@@ -183,22 +174,19 @@ static bool verify_equivalence_table(const u8 *buf, size_t buf_size, bool early)
183174

184175
/*
185176
* Check whether there is a valid, non-truncated microcode patch section at the
186-
* beginning of @buf of size @buf_size. Set @early to use this function in the
187-
* early path.
177+
* beginning of @buf of size @buf_size.
188178
*
189179
* On success, @sh_psize returns the patch size according to the section header,
190180
* to the caller.
191181
*/
192182
static bool
193-
__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize, bool early)
183+
__verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize)
194184
{
195185
u32 p_type, p_size;
196186
const u32 *hdr;
197187

198188
if (buf_size < SECTION_HDR_SIZE) {
199-
if (!early)
200-
pr_debug("Truncated patch section.\n");
201-
189+
pr_debug("Truncated patch section.\n");
202190
return false;
203191
}
204192

@@ -207,17 +195,13 @@ __verify_patch_section(const u8 *buf, size_t buf_size, u32 *sh_psize, bool early
207195
p_size = hdr[1];
208196

209197
if (p_type != UCODE_UCODE_TYPE) {
210-
if (!early)
211-
pr_debug("Invalid type field (0x%x) in container file section header.\n",
212-
p_type);
213-
198+
pr_debug("Invalid type field (0x%x) in container file section header.\n",
199+
p_type);
214200
return false;
215201
}
216202

217203
if (p_size < sizeof(struct microcode_header_amd)) {
218-
if (!early)
219-
pr_debug("Patch of size %u too short.\n", p_size);
220-
204+
pr_debug("Patch of size %u too short.\n", p_size);
221205
return false;
222206
}
223207

@@ -269,15 +253,15 @@ static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size
269253
* 0: success
270254
*/
271255
static int
272-
verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool early)
256+
verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size)
273257
{
274258
struct microcode_header_amd *mc_hdr;
275259
unsigned int ret;
276260
u32 sh_psize;
277261
u16 proc_id;
278262
u8 patch_fam;
279263

280-
if (!__verify_patch_section(buf, buf_size, &sh_psize, early))
264+
if (!__verify_patch_section(buf, buf_size, &sh_psize))
281265
return -1;
282266

283267
/*
@@ -292,25 +276,21 @@ verify_patch(u8 family, const u8 *buf, size_t buf_size, u32 *patch_size, bool ea
292276
* size sh_psize, as the section claims.
293277
*/
294278
if (buf_size < sh_psize) {
295-
if (!early)
296-
pr_debug("Patch of size %u truncated.\n", sh_psize);
297-
279+
pr_debug("Patch of size %u truncated.\n", sh_psize);
298280
return -1;
299281
}
300282

301283
ret = __verify_patch_size(family, sh_psize, buf_size);
302284
if (!ret) {
303-
if (!early)
304-
pr_debug("Per-family patch size mismatch.\n");
285+
pr_debug("Per-family patch size mismatch.\n");
305286
return -1;
306287
}
307288

308289
*patch_size = sh_psize;
309290

310291
mc_hdr = (struct microcode_header_amd *)(buf + SECTION_HDR_SIZE);
311292
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
312-
if (!early)
313-
pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", mc_hdr->patch_id);
293+
pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", mc_hdr->patch_id);
314294
return -1;
315295
}
316296

@@ -337,7 +317,7 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc)
337317
u16 eq_id;
338318
u8 *buf;
339319

340-
if (!verify_equivalence_table(ucode, size, true))
320+
if (!verify_equivalence_table(ucode, size))
341321
return 0;
342322

343323
buf = ucode;
@@ -364,7 +344,7 @@ static size_t parse_container(u8 *ucode, size_t size, struct cont_desc *desc)
364344
u32 patch_size;
365345
int ret;
366346

367-
ret = verify_patch(x86_family(desc->cpuid_1_eax), buf, size, &patch_size, true);
347+
ret = verify_patch(x86_family(desc->cpuid_1_eax), buf, size, &patch_size);
368348
if (ret < 0) {
369349
/*
370350
* Patch verification failed, skip to the next container, if
@@ -456,14 +436,8 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
456436
{
457437
struct cont_desc desc = { 0 };
458438
struct microcode_amd *mc;
459-
u32 rev, dummy, *new_rev;
460439
bool ret = false;
461-
462-
#ifdef CONFIG_X86_32
463-
new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
464-
#else
465-
new_rev = &ucode_new_rev;
466-
#endif
440+
u32 rev, dummy;
467441

468442
desc.cpuid_1_eax = cpuid_1_eax;
469443

@@ -484,8 +458,8 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
484458
return ret;
485459

486460
if (!__apply_microcode_amd(mc)) {
487-
*new_rev = mc->hdr.patch_id;
488-
ret = true;
461+
ucode_new_rev = mc->hdr.patch_id;
462+
ret = true;
489463
}
490464

491465
return ret;
@@ -514,26 +488,13 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
514488

515489
static void find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpio_data *ret)
516490
{
517-
struct ucode_cpu_info *uci;
518491
struct cpio_data cp;
519-
const char *path;
520-
bool use_pa;
521-
522-
if (IS_ENABLED(CONFIG_X86_32)) {
523-
uci = (struct ucode_cpu_info *)__pa_nodebug(ucode_cpu_info);
524-
path = (const char *)__pa_nodebug(ucode_path);
525-
use_pa = true;
526-
} else {
527-
uci = ucode_cpu_info;
528-
path = ucode_path;
529-
use_pa = false;
530-
}
531492

532493
if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax)))
533-
cp = find_microcode_in_initrd(path, use_pa);
494+
cp = find_microcode_in_initrd(ucode_path);
534495

535496
/* Needed in load_microcode_amd() */
536-
uci->cpu_sig.sig = cpuid_1_eax;
497+
ucode_cpu_info->cpu_sig.sig = cpuid_1_eax;
537498

538499
*ret = cp;
539500
}
@@ -562,7 +523,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
562523
enum ucode_state ret;
563524
struct cpio_data cp;
564525

565-
cp = find_microcode_in_initrd(ucode_path, false);
526+
cp = find_microcode_in_initrd(ucode_path);
566527
if (!(cp.data && cp.size))
567528
return -EINVAL;
568529

@@ -738,7 +699,7 @@ static size_t install_equiv_cpu_table(const u8 *buf, size_t buf_size)
738699
u32 equiv_tbl_len;
739700
const u32 *hdr;
740701

741-
if (!verify_equivalence_table(buf, buf_size, false))
702+
if (!verify_equivalence_table(buf, buf_size))
742703
return 0;
743704

744705
hdr = (const u32 *)buf;
@@ -784,7 +745,7 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover,
784745
u16 proc_id;
785746
int ret;
786747

787-
ret = verify_patch(family, fw, leftover, patch_size, false);
748+
ret = verify_patch(family, fw, leftover, patch_size);
788749
if (ret)
789750
return ret;
790751

@@ -918,7 +879,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device)
918879
}
919880

920881
ret = UCODE_ERROR;
921-
if (!verify_container(fw->data, fw->size, false))
882+
if (!verify_container(fw->data, fw->size))
922883
goto fw_release;
923884

924885
ret = load_microcode_amd(c->x86, fw->data, fw->size);

0 commit comments

Comments
 (0)