Skip to content

Commit 82980b1

Browse files
dwmw2paulmckrcu
authored andcommitted
rcu: Kill rnp->ofl_seq and use only rcu_state.ofl_lock for exclusion
If we allow architectures to bring APs online in parallel, then we end up requiring rcu_cpu_starting() to be reentrant. But currently, the manipulation of rnp->ofl_seq is not thread-safe. However, rnp->ofl_seq is also fairly much pointless anyway since both rcu_cpu_starting() and rcu_report_dead() hold rcu_state.ofl_lock for fairly much the whole time that rnp->ofl_seq is set to an odd number to indicate that an operation is in progress. So drop rnp->ofl_seq completely, and use only rcu_state.ofl_lock. This has a couple of minor complexities: lockdep will complain when we take rcu_state.ofl_lock, and currently accepts the 'excuse' of having an odd value in rnp->ofl_seq. So switch it to an arch_spinlock_t to avoid that false positive complaint. Since we're killing rnp->ofl_seq of course that 'excuse' has to be changed too, so make it check for arch_spin_is_locked(rcu_state.ofl_lock). There's no arch_spin_lock_irqsave() so we have to manually save and restore local interrupts around the locking. At Paul's request based on Neeraj's analysis, make rcu_gp_init not just wait but *exclude* any CPU online/offline activity, which was fairly much true already by virtue of it holding rcu_state.ofl_lock. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
1 parent da12301 commit 82980b1

2 files changed

Lines changed: 37 additions & 38 deletions

File tree

kernel/rcu/tree.c

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ static struct rcu_state rcu_state = {
9191
.abbr = RCU_ABBR,
9292
.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
9393
.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
94-
.ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
94+
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
9595
};
9696

9797
/* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -1175,7 +1175,15 @@ bool rcu_lockdep_current_cpu_online(void)
11751175
preempt_disable_notrace();
11761176
rdp = this_cpu_ptr(&rcu_data);
11771177
rnp = rdp->mynode;
1178-
if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || READ_ONCE(rnp->ofl_seq) & 0x1)
1178+
/*
1179+
* Strictly, we care here about the case where the current CPU is
1180+
* in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
1181+
* not being up to date. So arch_spin_is_locked() might have a
1182+
* false positive if it's held by some *other* CPU, but that's
1183+
* OK because that just means a false *negative* on the warning.
1184+
*/
1185+
if (rdp->grpmask & rcu_rnp_online_cpus(rnp) ||
1186+
arch_spin_is_locked(&rcu_state.ofl_lock))
11791187
ret = true;
11801188
preempt_enable_notrace();
11811189
return ret;
@@ -1739,7 +1747,6 @@ static void rcu_strict_gp_boundary(void *unused)
17391747
*/
17401748
static noinline_for_stack bool rcu_gp_init(void)
17411749
{
1742-
unsigned long firstseq;
17431750
unsigned long flags;
17441751
unsigned long oldmask;
17451752
unsigned long mask;
@@ -1782,22 +1789,17 @@ static noinline_for_stack bool rcu_gp_init(void)
17821789
* of RCU's Requirements documentation.
17831790
*/
17841791
WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
1792+
/* Exclude CPU hotplug operations. */
17851793
rcu_for_each_leaf_node(rnp) {
1786-
// Wait for CPU-hotplug operations that might have
1787-
// started before this grace period did.
1788-
smp_mb(); // Pair with barriers used when updating ->ofl_seq to odd values.
1789-
firstseq = READ_ONCE(rnp->ofl_seq);
1790-
if (firstseq & 0x1)
1791-
while (firstseq == READ_ONCE(rnp->ofl_seq))
1792-
schedule_timeout_idle(1); // Can't wake unless RCU is watching.
1793-
smp_mb(); // Pair with barriers used when updating ->ofl_seq to even values.
1794-
raw_spin_lock(&rcu_state.ofl_lock);
1795-
raw_spin_lock_irq_rcu_node(rnp);
1794+
local_irq_save(flags);
1795+
arch_spin_lock(&rcu_state.ofl_lock);
1796+
raw_spin_lock_rcu_node(rnp);
17961797
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
17971798
!rnp->wait_blkd_tasks) {
17981799
/* Nothing to do on this leaf rcu_node structure. */
1799-
raw_spin_unlock_irq_rcu_node(rnp);
1800-
raw_spin_unlock(&rcu_state.ofl_lock);
1800+
raw_spin_unlock_rcu_node(rnp);
1801+
arch_spin_unlock(&rcu_state.ofl_lock);
1802+
local_irq_restore(flags);
18011803
continue;
18021804
}
18031805

@@ -1832,8 +1834,9 @@ static noinline_for_stack bool rcu_gp_init(void)
18321834
rcu_cleanup_dead_rnp(rnp);
18331835
}
18341836

1835-
raw_spin_unlock_irq_rcu_node(rnp);
1836-
raw_spin_unlock(&rcu_state.ofl_lock);
1837+
raw_spin_unlock_rcu_node(rnp);
1838+
arch_spin_unlock(&rcu_state.ofl_lock);
1839+
local_irq_restore(flags);
18371840
}
18381841
rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
18391842

@@ -4287,11 +4290,10 @@ void rcu_cpu_starting(unsigned int cpu)
42874290

42884291
rnp = rdp->mynode;
42894292
mask = rdp->grpmask;
4290-
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4291-
WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
4293+
local_irq_save(flags);
4294+
arch_spin_lock(&rcu_state.ofl_lock);
42924295
rcu_dynticks_eqs_online();
4293-
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
4294-
raw_spin_lock_irqsave_rcu_node(rnp, flags);
4296+
raw_spin_lock_rcu_node(rnp);
42954297
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
42964298
newcpu = !(rnp->expmaskinitnext & mask);
42974299
rnp->expmaskinitnext |= mask;
@@ -4304,15 +4306,18 @@ void rcu_cpu_starting(unsigned int cpu)
43044306

43054307
/* An incoming CPU should never be blocking a grace period. */
43064308
if (WARN_ON_ONCE(rnp->qsmask & mask)) { /* RCU waiting on incoming CPU? */
4309+
/* rcu_report_qs_rnp() *really* wants some flags to restore */
4310+
unsigned long flags2;
4311+
4312+
local_irq_save(flags2);
43074313
rcu_disable_urgency_upon_qs(rdp);
43084314
/* Report QS -after- changing ->qsmaskinitnext! */
4309-
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
4315+
rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags2);
43104316
} else {
4311-
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4317+
raw_spin_unlock_rcu_node(rnp);
43124318
}
4313-
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
4314-
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4315-
WARN_ON_ONCE(rnp->ofl_seq & 0x1);
4319+
arch_spin_unlock(&rcu_state.ofl_lock);
4320+
local_irq_restore(flags);
43164321
smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
43174322
}
43184323

@@ -4326,7 +4331,7 @@ void rcu_cpu_starting(unsigned int cpu)
43264331
*/
43274332
void rcu_report_dead(unsigned int cpu)
43284333
{
4329-
unsigned long flags;
4334+
unsigned long flags, seq_flags;
43304335
unsigned long mask;
43314336
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
43324337
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
@@ -4340,10 +4345,8 @@ void rcu_report_dead(unsigned int cpu)
43404345

43414346
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
43424347
mask = rdp->grpmask;
4343-
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4344-
WARN_ON_ONCE(!(rnp->ofl_seq & 0x1));
4345-
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
4346-
raw_spin_lock(&rcu_state.ofl_lock);
4348+
local_irq_save(seq_flags);
4349+
arch_spin_lock(&rcu_state.ofl_lock);
43474350
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
43484351
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
43494352
rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
@@ -4354,10 +4357,8 @@ void rcu_report_dead(unsigned int cpu)
43544357
}
43554358
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
43564359
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
4357-
raw_spin_unlock(&rcu_state.ofl_lock);
4358-
smp_mb(); // Pair with rcu_gp_cleanup()'s ->ofl_seq barrier().
4359-
WRITE_ONCE(rnp->ofl_seq, rnp->ofl_seq + 1);
4360-
WARN_ON_ONCE(rnp->ofl_seq & 0x1);
4360+
arch_spin_unlock(&rcu_state.ofl_lock);
4361+
local_irq_restore(seq_flags);
43614362

43624363
rdp->cpu_started = false;
43634364
}

kernel/rcu/tree.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ struct rcu_node {
5656
/* Initialized from ->qsmaskinitnext at the */
5757
/* beginning of each grace period. */
5858
unsigned long qsmaskinitnext;
59-
unsigned long ofl_seq; /* CPU-hotplug operation sequence count. */
60-
/* Online CPUs for next grace period. */
6159
unsigned long expmask; /* CPUs or groups that need to check in */
6260
/* to allow the current expedited GP */
6361
/* to complete. */
@@ -355,7 +353,7 @@ struct rcu_state {
355353
const char *name; /* Name of structure. */
356354
char abbr; /* Abbreviated name. */
357355

358-
raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
356+
arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
359357
/* Synchronize offline with */
360358
/* GP pre-initialization. */
361359
};

0 commit comments

Comments
 (0)