Skip to content

Commit 2cee078

Browse files
committed
rcu-tasks: Use separate ->percpu_dequeue_lim for callback dequeueing
Decreasing the number of callback queues is a bit tricky because it is necessary to handle callbacks that were queued before the number of queues decreased, but which were not ready to invoke until afterwards. This commit takes a first step in this direction by maintaining a separate ->percpu_dequeue_lim to control callback dequeueing, in addition to the existing ->percpu_enqueue_lim which now controls only enqueueing. Reported-by: Martin Lau <kafai@fb.com> Cc: Neeraj Upadhyay <neeraj.iitr10@gmail.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
1 parent ab97152 commit 2cee078

1 file changed

Lines changed: 10 additions & 5 deletions

File tree

kernel/rcu/tasks.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ struct rcu_tasks_percpu {
6666
* @call_func: This flavor's call_rcu()-equivalent function.
6767
* @rtpcpu: This flavor's rcu_tasks_percpu structure.
6868
* @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks.
69-
* @percpu_enqueue_lim: Number of per-CPU callback queues in use.
69+
* @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing.
70+
* @percpu_dequeue_lim: Number of per-CPU callback queues in use for dequeuing.
7071
* @barrier_q_mutex: Serialize barrier operations.
7172
* @barrier_q_count: Number of queues being waited on.
7273
* @barrier_q_completion: Barrier wait/wakeup mechanism.
@@ -96,6 +97,7 @@ struct rcu_tasks {
9697
struct rcu_tasks_percpu __percpu *rtpcpu;
9798
int percpu_enqueue_shift;
9899
int percpu_enqueue_lim;
100+
int percpu_dequeue_lim;
99101
struct mutex barrier_q_mutex;
100102
atomic_t barrier_q_count;
101103
struct completion barrier_q_completion;
@@ -121,6 +123,7 @@ static struct rcu_tasks rt_name = \
121123
.name = n, \
122124
.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \
123125
.percpu_enqueue_lim = 1, \
126+
.percpu_dequeue_lim = 1, \
124127
.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \
125128
.barrier_q_seq = (0UL - 50UL) << RCU_SEQ_CTR_SHIFT, \
126129
.kname = #rt_name, \
@@ -223,6 +226,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
223226
if (lim > nr_cpu_ids)
224227
lim = nr_cpu_ids;
225228
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim));
229+
WRITE_ONCE(rtp->percpu_dequeue_lim, lim);
226230
smp_store_release(&rtp->percpu_enqueue_lim, lim);
227231
for_each_possible_cpu(cpu) {
228232
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
@@ -290,6 +294,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
290294
raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
291295
if (rtp->percpu_enqueue_lim != nr_cpu_ids) {
292296
WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids));
297+
WRITE_ONCE(rtp->percpu_enqueue_lim, nr_cpu_ids);
293298
smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids);
294299
pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name);
295300
}
@@ -342,7 +347,7 @@ static void rcu_barrier_tasks_generic(struct rcu_tasks *rtp)
342347
init_completion(&rtp->barrier_q_completion);
343348
atomic_set(&rtp->barrier_q_count, 2);
344349
for_each_possible_cpu(cpu) {
345-
if (cpu >= smp_load_acquire(&rtp->percpu_enqueue_lim))
350+
if (cpu >= smp_load_acquire(&rtp->percpu_dequeue_lim))
346351
break;
347352
rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
348353
rtpcp->barrier_q_head.func = rcu_barrier_tasks_generic_cb;
@@ -366,7 +371,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
366371
unsigned long flags;
367372
int needgpcb = 0;
368373

369-
for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_enqueue_lim); cpu++) {
374+
for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_dequeue_lim); cpu++) {
370375
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
371376

372377
/* Advance and accelerate any new callbacks. */
@@ -397,11 +402,11 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu
397402

398403
cpu = rtpcp->cpu;
399404
cpunext = cpu * 2 + 1;
400-
if (cpunext < smp_load_acquire(&rtp->percpu_enqueue_lim)) {
405+
if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
401406
rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
402407
queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
403408
cpunext++;
404-
if (cpunext < smp_load_acquire(&rtp->percpu_enqueue_lim)) {
409+
if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) {
405410
rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext);
406411
queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work);
407412
}

0 commit comments

Comments
 (0)