Skip to content

Commit a504138

Browse files
author
Frederic Weisbecker
committed
Merge branch 'rcu/refscale' into next
Add performance testing for common context synchronizations (Preemption, IRQ, Softirq) and per-cpu increments. Those are relevant comparisons against SRCU-fast read side APIs, especially as they are planned to synchronize further tracing fast-path code.
2 parents 6fcc739 + 204ab51 commit a504138

1 file changed

Lines changed: 321 additions & 9 deletions

File tree

kernel/rcu/refscale.c

Lines changed: 321 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ struct ref_scale_ops {
136136
void (*cleanup)(void);
137137
void (*readsection)(const int nloops);
138138
void (*delaysection)(const int nloops, const int udl, const int ndl);
139+
bool enable_irqs;
139140
const char *name;
140141
};
141142

@@ -367,6 +368,9 @@ static const struct ref_scale_ops rcu_trace_ops = {
367368
// Definitions for reference count
368369
static atomic_t refcnt;
369370

371+
// Definitions acquire-release.
372+
static DEFINE_PER_CPU(unsigned long, test_acqrel);
373+
370374
static void ref_refcnt_section(const int nloops)
371375
{
372376
int i;
@@ -395,6 +399,184 @@ static const struct ref_scale_ops refcnt_ops = {
395399
.name = "refcnt"
396400
};
397401

402+
static void ref_percpuinc_section(const int nloops)
403+
{
404+
int i;
405+
406+
for (i = nloops; i >= 0; i--) {
407+
this_cpu_inc(test_acqrel);
408+
this_cpu_dec(test_acqrel);
409+
}
410+
}
411+
412+
static void ref_percpuinc_delay_section(const int nloops, const int udl, const int ndl)
413+
{
414+
int i;
415+
416+
for (i = nloops; i >= 0; i--) {
417+
this_cpu_inc(test_acqrel);
418+
un_delay(udl, ndl);
419+
this_cpu_dec(test_acqrel);
420+
}
421+
}
422+
423+
static const struct ref_scale_ops percpuinc_ops = {
424+
.init = rcu_sync_scale_init,
425+
.readsection = ref_percpuinc_section,
426+
.delaysection = ref_percpuinc_delay_section,
427+
.name = "percpuinc"
428+
};
429+
430+
// Note that this can lose counts in preemptible kernels.
431+
static void ref_incpercpu_section(const int nloops)
432+
{
433+
int i;
434+
435+
for (i = nloops; i >= 0; i--) {
436+
unsigned long *tap = this_cpu_ptr(&test_acqrel);
437+
438+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
439+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
440+
}
441+
}
442+
443+
static void ref_incpercpu_delay_section(const int nloops, const int udl, const int ndl)
444+
{
445+
int i;
446+
447+
for (i = nloops; i >= 0; i--) {
448+
unsigned long *tap = this_cpu_ptr(&test_acqrel);
449+
450+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
451+
un_delay(udl, ndl);
452+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
453+
}
454+
}
455+
456+
static const struct ref_scale_ops incpercpu_ops = {
457+
.init = rcu_sync_scale_init,
458+
.readsection = ref_incpercpu_section,
459+
.delaysection = ref_incpercpu_delay_section,
460+
.name = "incpercpu"
461+
};
462+
463+
static void ref_incpercpupreempt_section(const int nloops)
464+
{
465+
int i;
466+
467+
for (i = nloops; i >= 0; i--) {
468+
unsigned long *tap;
469+
470+
preempt_disable();
471+
tap = this_cpu_ptr(&test_acqrel);
472+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
473+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
474+
preempt_enable();
475+
}
476+
}
477+
478+
static void ref_incpercpupreempt_delay_section(const int nloops, const int udl, const int ndl)
479+
{
480+
int i;
481+
482+
for (i = nloops; i >= 0; i--) {
483+
unsigned long *tap;
484+
485+
preempt_disable();
486+
tap = this_cpu_ptr(&test_acqrel);
487+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
488+
un_delay(udl, ndl);
489+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
490+
preempt_enable();
491+
}
492+
}
493+
494+
static const struct ref_scale_ops incpercpupreempt_ops = {
495+
.init = rcu_sync_scale_init,
496+
.readsection = ref_incpercpupreempt_section,
497+
.delaysection = ref_incpercpupreempt_delay_section,
498+
.name = "incpercpupreempt"
499+
};
500+
501+
static void ref_incpercpubh_section(const int nloops)
502+
{
503+
int i;
504+
505+
for (i = nloops; i >= 0; i--) {
506+
unsigned long *tap;
507+
508+
local_bh_disable();
509+
tap = this_cpu_ptr(&test_acqrel);
510+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
511+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
512+
local_bh_enable();
513+
}
514+
}
515+
516+
static void ref_incpercpubh_delay_section(const int nloops, const int udl, const int ndl)
517+
{
518+
int i;
519+
520+
for (i = nloops; i >= 0; i--) {
521+
unsigned long *tap;
522+
523+
local_bh_disable();
524+
tap = this_cpu_ptr(&test_acqrel);
525+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
526+
un_delay(udl, ndl);
527+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
528+
local_bh_enable();
529+
}
530+
}
531+
532+
static const struct ref_scale_ops incpercpubh_ops = {
533+
.init = rcu_sync_scale_init,
534+
.readsection = ref_incpercpubh_section,
535+
.delaysection = ref_incpercpubh_delay_section,
536+
.enable_irqs = true,
537+
.name = "incpercpubh"
538+
};
539+
540+
static void ref_incpercpuirqsave_section(const int nloops)
541+
{
542+
int i;
543+
unsigned long flags;
544+
545+
for (i = nloops; i >= 0; i--) {
546+
unsigned long *tap;
547+
548+
local_irq_save(flags);
549+
tap = this_cpu_ptr(&test_acqrel);
550+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
551+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
552+
local_irq_restore(flags);
553+
}
554+
}
555+
556+
static void ref_incpercpuirqsave_delay_section(const int nloops, const int udl, const int ndl)
557+
{
558+
int i;
559+
unsigned long flags;
560+
561+
for (i = nloops; i >= 0; i--) {
562+
unsigned long *tap;
563+
564+
local_irq_save(flags);
565+
tap = this_cpu_ptr(&test_acqrel);
566+
WRITE_ONCE(*tap, READ_ONCE(*tap) + 1);
567+
un_delay(udl, ndl);
568+
WRITE_ONCE(*tap, READ_ONCE(*tap) - 1);
569+
local_irq_restore(flags);
570+
}
571+
}
572+
573+
static const struct ref_scale_ops incpercpuirqsave_ops = {
574+
.init = rcu_sync_scale_init,
575+
.readsection = ref_incpercpuirqsave_section,
576+
.delaysection = ref_incpercpuirqsave_delay_section,
577+
.name = "incpercpuirqsave"
578+
};
579+
398580
// Definitions for rwlock
399581
static rwlock_t test_rwlock;
400582

@@ -538,9 +720,6 @@ static const struct ref_scale_ops lock_irq_ops = {
538720
.name = "lock-irq"
539721
};
540722

541-
// Definitions acquire-release.
542-
static DEFINE_PER_CPU(unsigned long, test_acqrel);
543-
544723
static void ref_acqrel_section(const int nloops)
545724
{
546725
unsigned long x;
@@ -673,6 +852,133 @@ static const struct ref_scale_ops jiffies_ops = {
673852
.name = "jiffies"
674853
};
675854

855+
static void ref_preempt_section(const int nloops)
856+
{
857+
int i;
858+
859+
migrate_disable();
860+
for (i = nloops; i >= 0; i--) {
861+
preempt_disable();
862+
preempt_enable();
863+
}
864+
migrate_enable();
865+
}
866+
867+
static void ref_preempt_delay_section(const int nloops, const int udl, const int ndl)
868+
{
869+
int i;
870+
871+
migrate_disable();
872+
for (i = nloops; i >= 0; i--) {
873+
preempt_disable();
874+
un_delay(udl, ndl);
875+
preempt_enable();
876+
}
877+
migrate_enable();
878+
}
879+
880+
static const struct ref_scale_ops preempt_ops = {
881+
.readsection = ref_preempt_section,
882+
.delaysection = ref_preempt_delay_section,
883+
.name = "preempt"
884+
};
885+
886+
static void ref_bh_section(const int nloops)
887+
{
888+
int i;
889+
890+
preempt_disable();
891+
for (i = nloops; i >= 0; i--) {
892+
local_bh_disable();
893+
local_bh_enable();
894+
}
895+
preempt_enable();
896+
}
897+
898+
static void ref_bh_delay_section(const int nloops, const int udl, const int ndl)
899+
{
900+
int i;
901+
902+
preempt_disable();
903+
for (i = nloops; i >= 0; i--) {
904+
local_bh_disable();
905+
un_delay(udl, ndl);
906+
local_bh_enable();
907+
}
908+
preempt_enable();
909+
}
910+
911+
static const struct ref_scale_ops bh_ops = {
912+
.readsection = ref_bh_section,
913+
.delaysection = ref_bh_delay_section,
914+
.enable_irqs = true,
915+
.name = "bh"
916+
};
917+
918+
static void ref_irq_section(const int nloops)
919+
{
920+
int i;
921+
922+
preempt_disable();
923+
for (i = nloops; i >= 0; i--) {
924+
local_irq_disable();
925+
local_irq_enable();
926+
}
927+
preempt_enable();
928+
}
929+
930+
static void ref_irq_delay_section(const int nloops, const int udl, const int ndl)
931+
{
932+
int i;
933+
934+
preempt_disable();
935+
for (i = nloops; i >= 0; i--) {
936+
local_irq_disable();
937+
un_delay(udl, ndl);
938+
local_irq_enable();
939+
}
940+
preempt_enable();
941+
}
942+
943+
static const struct ref_scale_ops irq_ops = {
944+
.readsection = ref_irq_section,
945+
.delaysection = ref_irq_delay_section,
946+
.name = "irq"
947+
};
948+
949+
static void ref_irqsave_section(const int nloops)
950+
{
951+
unsigned long flags;
952+
int i;
953+
954+
preempt_disable();
955+
for (i = nloops; i >= 0; i--) {
956+
local_irq_save(flags);
957+
local_irq_restore(flags);
958+
}
959+
preempt_enable();
960+
}
961+
962+
static void ref_irqsave_delay_section(const int nloops, const int udl, const int ndl)
963+
{
964+
unsigned long flags;
965+
int i;
966+
967+
preempt_disable();
968+
for (i = nloops; i >= 0; i--) {
969+
local_irq_save(flags);
970+
un_delay(udl, ndl);
971+
local_irq_restore(flags);
972+
}
973+
preempt_enable();
974+
}
975+
976+
static const struct ref_scale_ops irqsave_ops = {
977+
.readsection = ref_irqsave_section,
978+
.delaysection = ref_irqsave_delay_section,
979+
.name = "irqsave"
980+
};
981+
676982
////////////////////////////////////////////////////////////////////////
677983
//
678984
// Methods leveraging SLAB_TYPESAFE_BY_RCU.
@@ -968,15 +1274,18 @@ ref_scale_reader(void *arg)
9681274
if (!atomic_dec_return(&n_warmedup))
9691275
while (atomic_read_acquire(&n_warmedup))
9701276
rcu_scale_one_reader();
971-
// Also keep interrupts disabled. This also has the effect
972-
// of preventing entries into slow path for rcu_read_unlock().
973-
local_irq_save(flags);
1277+
// Also keep interrupts disabled when it is safe to do so, which
1278+
// it is not for local_bh_enable(). This also has the effect of
1279+
// preventing entries into slow path for rcu_read_unlock().
1280+
if (!cur_ops->enable_irqs)
1281+
local_irq_save(flags);
9741282
start = ktime_get_mono_fast_ns();
9751283

9761284
rcu_scale_one_reader();
9771285

9781286
duration = ktime_get_mono_fast_ns() - start;
979-
local_irq_restore(flags);
1287+
if (!cur_ops->enable_irqs)
1288+
local_irq_restore(flags);
9801289

9811290
rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
9821291
// To reduce runtime-skew noise, do maintain-load invocations until
@@ -1209,8 +1518,11 @@ ref_scale_init(void)
12091518
static const struct ref_scale_ops *scale_ops[] = {
12101519
&rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_fast_updown_ops,
12111520
RCU_TRACE_OPS RCU_TASKS_OPS
1212-
&refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops,
1213-
&acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops,
1521+
&refcnt_ops, &percpuinc_ops, &incpercpu_ops, &incpercpupreempt_ops,
1522+
&incpercpubh_ops, &incpercpuirqsave_ops,
1523+
&rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops,
1524+
&sched_clock_ops, &clock_ops, &jiffies_ops,
1525+
&preempt_ops, &bh_ops, &irq_ops, &irqsave_ops,
12141526
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
12151527
};
12161528

0 commit comments

Comments
 (0)