@@ -1796,13 +1796,28 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
17961796 */
17971797static inline bool is_cpu_allowed (struct task_struct * p , int cpu )
17981798{
1799+ /* When not in the task's cpumask, no point in looking further. */
17991800 if (!cpumask_test_cpu (cpu , p -> cpus_ptr ))
18001801 return false;
18011802
1802- if (is_per_cpu_kthread (p ) || is_migration_disabled (p ))
1803+ /* migrate_disabled() must be allowed to finish. */
1804+ if (is_migration_disabled (p ))
18031805 return cpu_online (cpu );
18041806
1805- return cpu_active (cpu );
1807+ /* Non kernel threads are not allowed during either online or offline. */
1808+ if (!(p -> flags & PF_KTHREAD ))
1809+ return cpu_active (cpu );
1810+
1811+ /* KTHREAD_IS_PER_CPU is always allowed. */
1812+ if (kthread_is_per_cpu (p ))
1813+ return cpu_online (cpu );
1814+
1815+ /* Regular kernel threads don't get to stay during offline. */
1816+ if (cpu_rq (cpu )-> balance_push )
1817+ return false;
1818+
1819+ /* But are allowed during online. */
1820+ return cpu_online (cpu );
18061821}
18071822
18081823/*
@@ -2327,7 +2342,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
23272342
23282343 if (p -> flags & PF_KTHREAD || is_migration_disabled (p )) {
23292344 /*
2330- * Kernel threads are allowed on online && !active CPUs.
2345+ * Kernel threads are allowed on online && !active CPUs,
2346+ * however, during cpu-hot-unplug, even these might get pushed
2347+ * away if not KTHREAD_IS_PER_CPU.
23312348 *
23322349 * Specifically, migration_disabled() tasks must not fail the
23332350 * cpumask_any_and_distribute() pick below, esp. so on
@@ -2371,16 +2388,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
23712388
23722389 __do_set_cpus_allowed (p , new_mask , flags );
23732390
2374- if (p -> flags & PF_KTHREAD ) {
2375- /*
2376- * For kernel threads that do indeed end up on online &&
2377- * !active we want to ensure they are strict per-CPU threads.
2378- */
2379- WARN_ON (cpumask_intersects (new_mask , cpu_online_mask ) &&
2380- !cpumask_intersects (new_mask , cpu_active_mask ) &&
2381- p -> nr_cpus_allowed != 1 );
2382- }
2383-
23842391 return affine_move_task (rq , p , & rf , dest_cpu , flags );
23852392
23862393out :
@@ -3121,6 +3128,13 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
31213128
31223129static inline bool ttwu_queue_cond (int cpu , int wake_flags )
31233130{
3131+ /*
3132+ * Do not complicate things with the async wake_list while the CPU is
3133+ * in hotplug state.
3134+ */
3135+ if (!cpu_active (cpu ))
3136+ return false;
3137+
31243138 /*
31253139 * If the CPU does not share cache, then queue the task on the
31263140 * remote rqs wakelist to avoid accessing remote data.
@@ -7276,8 +7290,14 @@ static void balance_push(struct rq *rq)
72767290 /*
72777291 * Both the cpu-hotplug and stop task are in this case and are
72787292 * required to complete the hotplug process.
7293+ *
7294+ * XXX: the idle task does not match kthread_is_per_cpu() due to
7295+ * histerical raisins.
72797296 */
7280- if (is_per_cpu_kthread (push_task ) || is_migration_disabled (push_task )) {
7297+ if (rq -> idle == push_task ||
7298+ ((push_task -> flags & PF_KTHREAD ) && kthread_is_per_cpu (push_task )) ||
7299+ is_migration_disabled (push_task )) {
7300+
72817301 /*
72827302 * If this is the idle task on the outgoing CPU try to wake
72837303 * up the hotplug control thread which might wait for the
@@ -7309,7 +7329,7 @@ static void balance_push(struct rq *rq)
73097329 /*
73107330 * At this point need_resched() is true and we'll take the loop in
73117331 * schedule(). The next pick is obviously going to be the stop task
7312- * which is_per_cpu_kthread () and will push this task away.
7332+ * which kthread_is_per_cpu () and will push this task away.
73137333 */
73147334 raw_spin_lock (& rq -> lock );
73157335}
@@ -7320,10 +7340,13 @@ static void balance_push_set(int cpu, bool on)
73207340 struct rq_flags rf ;
73217341
73227342 rq_lock_irqsave (rq , & rf );
7323- if (on )
7343+ rq -> balance_push = on ;
7344+ if (on ) {
7345+ WARN_ON_ONCE (rq -> balance_callback );
73247346 rq -> balance_callback = & balance_push_callback ;
7325- else
7347+ } else if ( rq -> balance_callback == & balance_push_callback ) {
73267348 rq -> balance_callback = NULL ;
7349+ }
73277350 rq_unlock_irqrestore (rq , & rf );
73287351}
73297352
@@ -7441,6 +7464,10 @@ int sched_cpu_activate(unsigned int cpu)
74417464 struct rq * rq = cpu_rq (cpu );
74427465 struct rq_flags rf ;
74437466
7467+ /*
7468+ * Make sure that when the hotplug state machine does a roll-back
7469+ * we clear balance_push. Ideally that would happen earlier...
7470+ */
74447471 balance_push_set (cpu , false);
74457472
74467473#ifdef CONFIG_SCHED_SMT
@@ -7483,17 +7510,27 @@ int sched_cpu_deactivate(unsigned int cpu)
74837510 int ret ;
74847511
74857512 set_cpu_active (cpu , false);
7513+
7514+ /*
7515+ * From this point forward, this CPU will refuse to run any task that
7516+ * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will actively
7517+ * push those tasks away until this gets cleared, see
7518+ * sched_cpu_dying().
7519+ */
7520+ balance_push_set (cpu , true);
7521+
74867522 /*
7487- * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
7488- * users of this state to go away such that all new such users will
7489- * observe it.
7523+ * We've cleared cpu_active_mask / set balance_push, wait for all
7524+ * preempt-disabled and RCU users of this state to go away such that
7525+ * all new such users will observe it.
7526+ *
7527+ * Specifically, we rely on ttwu to no longer target this CPU, see
7528+ * ttwu_queue_cond() and is_cpu_allowed().
74907529 *
74917530 * Do sync before park smpboot threads to take care the rcu boost case.
74927531 */
74937532 synchronize_rcu ();
74947533
7495- balance_push_set (cpu , true);
7496-
74977534 rq_lock_irqsave (rq , & rf );
74987535 if (rq -> rd ) {
74997536 update_rq_clock (rq );
@@ -7574,6 +7611,25 @@ static void calc_load_migrate(struct rq *rq)
75747611 atomic_long_add (delta , & calc_load_tasks );
75757612}
75767613
7614+ static void dump_rq_tasks (struct rq * rq , const char * loglvl )
7615+ {
7616+ struct task_struct * g , * p ;
7617+ int cpu = cpu_of (rq );
7618+
7619+ lockdep_assert_held (& rq -> lock );
7620+
7621+ printk ("%sCPU%d enqueued tasks (%u total):\n" , loglvl , cpu , rq -> nr_running );
7622+ for_each_process_thread (g , p ) {
7623+ if (task_cpu (p ) != cpu )
7624+ continue ;
7625+
7626+ if (!task_on_rq_queued (p ))
7627+ continue ;
7628+
7629+ printk ("%s\tpid: %d, name: %s\n" , loglvl , p -> pid , p -> comm );
7630+ }
7631+ }
7632+
75777633int sched_cpu_dying (unsigned int cpu )
75787634{
75797635 struct rq * rq = cpu_rq (cpu );
@@ -7583,9 +7639,18 @@ int sched_cpu_dying(unsigned int cpu)
75837639 sched_tick_stop (cpu );
75847640
75857641 rq_lock_irqsave (rq , & rf );
7586- BUG_ON (rq -> nr_running != 1 || rq_has_pinned_tasks (rq ));
7642+ if (rq -> nr_running != 1 || rq_has_pinned_tasks (rq )) {
7643+ WARN (true, "Dying CPU not properly vacated!" );
7644+ dump_rq_tasks (rq , KERN_WARNING );
7645+ }
75877646 rq_unlock_irqrestore (rq , & rf );
75887647
7648+ /*
7649+ * Now that the CPU is offline, make sure we're welcome
7650+ * to new tasks once we come back up.
7651+ */
7652+ balance_push_set (cpu , false);
7653+
75897654 calc_load_migrate (rq );
75907655 update_max_interval ();
75917656 nohz_balance_exit_idle (rq );
0 commit comments