@@ -272,8 +272,9 @@ struct microcode_ctrl {
272272
273273DEFINE_STATIC_KEY_FALSE (microcode_nmi_handler_enable );
274274static DEFINE_PER_CPU (struct microcode_ctrl , ucode_ctrl ) ;
275+ static atomic_t late_cpus_in , offline_in_nmi ;
275276static unsigned int loops_per_usec ;
276- static atomic_t late_cpus_in ;
277+ static cpumask_t cpu_offline_mask ;
277278
278279static noinstr bool wait_for_cpus (atomic_t * cnt )
279280{
@@ -381,7 +382,7 @@ static noinstr void load_secondary(unsigned int cpu)
381382 instrumentation_end ();
382383}
383384
384- static void load_primary (unsigned int cpu )
385+ static void __load_primary (unsigned int cpu )
385386{
386387 struct cpumask * secondaries = topology_sibling_cpumask (cpu );
387388 enum sibling_ctrl ctrl ;
@@ -416,6 +417,67 @@ static void load_primary(unsigned int cpu)
416417 }
417418}
418419
420+ static bool kick_offline_cpus (unsigned int nr_offl )
421+ {
422+ unsigned int cpu , timeout ;
423+
424+ for_each_cpu (cpu , & cpu_offline_mask ) {
425+ /* Enable the rendezvous handler and send NMI */
426+ per_cpu (ucode_ctrl .nmi_enabled , cpu ) = true;
427+ apic_send_nmi_to_offline_cpu (cpu );
428+ }
429+
430+ /* Wait for them to arrive */
431+ for (timeout = 0 ; timeout < (USEC_PER_SEC / 2 ); timeout ++ ) {
432+ if (atomic_read (& offline_in_nmi ) == nr_offl )
433+ return true;
434+ udelay (1 );
435+ }
436+ /* Let the others time out */
437+ return false;
438+ }
439+
440+ static void release_offline_cpus (void )
441+ {
442+ unsigned int cpu ;
443+
444+ for_each_cpu (cpu , & cpu_offline_mask )
445+ per_cpu (ucode_ctrl .ctrl , cpu ) = SCTRL_DONE ;
446+ }
447+
448+ static void load_primary (unsigned int cpu )
449+ {
450+ unsigned int nr_offl = cpumask_weight (& cpu_offline_mask );
451+ bool proceed = true;
452+
453+ /* Kick soft-offlined SMT siblings if required */
454+ if (!cpu && nr_offl )
455+ proceed = kick_offline_cpus (nr_offl );
456+
457+ /* If the soft-offlined CPUs did not respond, abort */
458+ if (proceed )
459+ __load_primary (cpu );
460+
461+ /* Unconditionally release soft-offlined SMT siblings if required */
462+ if (!cpu && nr_offl )
463+ release_offline_cpus ();
464+ }
465+
466+ /*
467+ * Minimal stub rendezvous handler for soft-offlined CPUs which participate
468+ * in the NMI rendezvous to protect against a concurrent NMI on affected
469+ * CPUs.
470+ */
471+ void noinstr microcode_offline_nmi_handler (void )
472+ {
473+ if (!raw_cpu_read (ucode_ctrl .nmi_enabled ))
474+ return ;
475+ raw_cpu_write (ucode_ctrl .nmi_enabled , false);
476+ raw_cpu_write (ucode_ctrl .result , UCODE_OFFLINE );
477+ raw_atomic_inc (& offline_in_nmi );
478+ wait_for_ctrl ();
479+ }
480+
419481static noinstr bool microcode_update_handler (void )
420482{
421483 unsigned int cpu = raw_smp_processor_id ();
@@ -472,13 +534,15 @@ static int load_cpus_stopped(void *unused)
472534static int load_late_stop_cpus (void )
473535{
474536 unsigned int cpu , updated = 0 , failed = 0 , timedout = 0 , siblings = 0 ;
537+ unsigned int nr_offl , offline = 0 ;
475538 int old_rev = boot_cpu_data .microcode ;
476539 struct cpuinfo_x86 prev_info ;
477540
478541 pr_err ("Attempting late microcode loading - it is dangerous and taints the kernel.\n" );
479542 pr_err ("You should switch to early loading, if possible.\n" );
480543
481544 atomic_set (& late_cpus_in , num_online_cpus ());
545+ atomic_set (& offline_in_nmi , 0 );
482546 loops_per_usec = loops_per_jiffy / (TICK_NSEC / 1000 );
483547
484548 /*
@@ -501,6 +565,7 @@ static int load_late_stop_cpus(void)
501565 case UCODE_UPDATED : updated ++ ; break ;
502566 case UCODE_TIMEOUT : timedout ++ ; break ;
503567 case UCODE_OK : siblings ++ ; break ;
568+ case UCODE_OFFLINE : offline ++ ; break ;
504569 default : failed ++ ; break ;
505570 }
506571 }
@@ -512,6 +577,13 @@ static int load_late_stop_cpus(void)
512577 /* Nothing changed. */
513578 if (!failed && !timedout )
514579 return 0 ;
580+
581+ nr_offl = cpumask_weight (& cpu_offline_mask );
582+ if (offline < nr_offl ) {
583+ pr_warn ("%u offline siblings did not respond.\n" ,
584+ nr_offl - atomic_read (& offline_in_nmi ));
585+ return - EIO ;
586+ }
515587 pr_err ("update failed: %u CPUs failed %u CPUs timed out\n" ,
516588 failed , timedout );
517589 return - EIO ;
@@ -545,19 +617,49 @@ static int load_late_stop_cpus(void)
545617 * modern CPUs uses MWAIT, which is also not guaranteed to be safe
546618 * against a microcode update which affects MWAIT.
547619 *
548- * 2) Initialize the per CPU control structure
620+ * As soft-offlined CPUs still react on NMIs, the SMT sibling
621+ * restriction can be lifted when the vendor driver signals to use NMI
622+ * for rendezvous and the APIC provides a mechanism to send an NMI to a
623+ * soft-offlined CPU. The soft-offlined CPUs are then able to
624+ * participate in the rendezvous in a trivial stub handler.
625+ *
626+ * 2) Initialize the per CPU control structure and create a cpumask
627+ * which contains "offline"; secondary threads, so they can be handled
628+ * correctly by a control CPU.
549629 */
550630static bool setup_cpus (void )
551631{
552632 struct microcode_ctrl ctrl = { .ctrl = SCTRL_WAIT , .result = -1 , };
633+ bool allow_smt_offline ;
553634 unsigned int cpu ;
554635
636+ allow_smt_offline = microcode_ops -> nmi_safe ||
637+ (microcode_ops -> use_nmi && apic -> nmi_to_offline_cpu );
638+
639+ cpumask_clear (& cpu_offline_mask );
640+
555641 for_each_cpu_and (cpu , cpu_present_mask , & cpus_booted_once_mask ) {
642+ /*
643+ * Offline CPUs sit in one of the play_dead() functions
644+ * with interrupts disabled, but they still react on NMIs
645+ * and execute arbitrary code. Also MWAIT being updated
646+ * while the offline CPU sits there is not necessarily safe
647+ * on all CPU variants.
648+ *
649+ * Mark them in the offline_cpus mask which will be handled
650+ * by CPU0 later in the update process.
651+ *
652+ * Ensure that the primary thread is online so that it is
653+ * guaranteed that all cores are updated.
654+ */
556655 if (!cpu_online (cpu )) {
557- if (topology_is_primary_thread (cpu ) || !microcode_ops -> nmi_safe ) {
558- pr_err ("CPU %u not online\n" , cpu );
656+ if (topology_is_primary_thread (cpu ) || !allow_smt_offline ) {
657+ pr_err ("CPU %u not online, loading aborted \n" , cpu );
559658 return false;
560659 }
660+ cpumask_set_cpu (cpu , & cpu_offline_mask );
661+ per_cpu (ucode_ctrl , cpu ) = ctrl ;
662+ continue ;
561663 }
562664
563665 /*
0 commit comments