Skip to content

Commit a590b67

Browse files
committed
Merge branch 'srcu-next.2025.08.21a' into HEAD
SRCU updates: * Create srcu_read_{,un}lock_fast_notrace() * Add srcu_read_lock_fast_notrace() and srcu_read_unlock_fast_notrace() * Add guards for notrace variants of SRCU-fast readers * Document srcu_read_{,un}lock_fast() use of implicit RCU readers * Document srcu_flip() memory-barrier D relation to SRCU-fast * Remove preempt_disable/enable() in Tiny SRCU srcu_gp_start_if_needed()
2 parents ccd0256 + e6a43ae commit a590b67

4 files changed

Lines changed: 75 additions & 22 deletions

File tree

include/linux/srcu.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,27 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *
275275
{
276276
struct srcu_ctr __percpu *retval;
277277

278+
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
278279
srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
279280
retval = __srcu_read_lock_fast(ssp);
280281
rcu_try_lock_acquire(&ssp->dep_map);
281282
return retval;
282283
}
283284

285+
/*
286+
* Used by tracing, cannot be traced and cannot call lockdep.
287+
* See srcu_read_lock_fast() for more information.
288+
*/
289+
static inline struct srcu_ctr __percpu *srcu_read_lock_fast_notrace(struct srcu_struct *ssp)
290+
__acquires(ssp)
291+
{
292+
struct srcu_ctr __percpu *retval;
293+
294+
srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
295+
retval = __srcu_read_lock_fast(ssp);
296+
return retval;
297+
}
298+
284299
/**
285300
* srcu_down_read_fast - register a new reader for an SRCU-protected structure.
286301
* @ssp: srcu_struct in which to register the new reader.
@@ -295,6 +310,7 @@ static inline struct srcu_ctr __percpu *srcu_read_lock_fast(struct srcu_struct *
295310
static inline struct srcu_ctr __percpu *srcu_down_read_fast(struct srcu_struct *ssp) __acquires(ssp)
296311
{
297312
WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
313+
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_down_read_fast().");
298314
srcu_check_read_flavor_force(ssp, SRCU_READ_FLAVOR_FAST);
299315
return __srcu_read_lock_fast(ssp);
300316
}
@@ -389,6 +405,18 @@ static inline void srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ct
389405
srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
390406
srcu_lock_release(&ssp->dep_map);
391407
__srcu_read_unlock_fast(ssp, scp);
408+
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
409+
}
410+
411+
/*
412+
* Used by tracing, cannot be traced and cannot call lockdep.
413+
* See srcu_read_unlock_fast() for more information.
414+
*/
415+
static inline void srcu_read_unlock_fast_notrace(struct srcu_struct *ssp,
416+
struct srcu_ctr __percpu *scp) __releases(ssp)
417+
{
418+
srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
419+
__srcu_read_unlock_fast(ssp, scp);
392420
}
393421

394422
/**
@@ -405,6 +433,7 @@ static inline void srcu_up_read_fast(struct srcu_struct *ssp, struct srcu_ctr __
405433
WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && in_nmi());
406434
srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_FAST);
407435
__srcu_read_unlock_fast(ssp, scp);
436+
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_up_read_fast().");
408437
}
409438

410439
/**
@@ -486,4 +515,9 @@ DEFINE_LOCK_GUARD_1(srcu_fast, struct srcu_struct,
486515
srcu_read_unlock_fast(_T->lock, _T->scp),
487516
struct srcu_ctr __percpu *scp)
488517

518+
DEFINE_LOCK_GUARD_1(srcu_fast_notrace, struct srcu_struct,
519+
_T->scp = srcu_read_lock_fast_notrace(_T->lock),
520+
srcu_read_unlock_fast_notrace(_T->lock, _T->scp),
521+
struct srcu_ctr __percpu *scp)
522+
489523
#endif

include/linux/srcutree.h

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -232,23 +232,40 @@ static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ss
232232
* srcu_read_unlock_fast().
233233
*
234234
* Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
235-
* critical sections either because they disables interrupts, because they
236-
* are a single instruction, or because they are a read-modify-write atomic
237-
* operation, depending on the whims of the architecture.
235+
* critical sections either because they disables interrupts, because
236+
* they are a single instruction, or because they are read-modify-write
237+
* atomic operations, depending on the whims of the architecture.
238+
* This matters because the SRCU-fast grace-period mechanism uses either
239+
* synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU,
240+
* *not* SRCU, in order to eliminate the need for the read-side smp_mb()
241+
* invocations that are used by srcu_read_lock() and srcu_read_unlock().
242+
* The __srcu_read_unlock_fast() function also relies on this same RCU
243+
* (again, *not* SRCU) trick to eliminate the need for smp_mb().
244+
*
245+
* The key point behind this RCU trick is that if any part of a given
246+
* RCU reader precedes the beginning of a given RCU grace period, then
247+
* the entirety of that RCU reader and everything preceding it happens
248+
* before the end of that same RCU grace period. Similarly, if any part
249+
* of a given RCU reader follows the end of a given RCU grace period,
250+
* then the entirety of that RCU reader and everything following it
251+
* happens after the beginning of that same RCU grace period. Therefore,
252+
* the operations labeled Y in __srcu_read_lock_fast() and those labeled Z
253+
* in __srcu_read_unlock_fast() are ordered against the corresponding SRCU
254+
* read-side critical section from the viewpoint of the SRCU grace period.
255+
* This is all the ordering that is required, hence no calls to smp_mb().
238256
*
239257
* This means that __srcu_read_lock_fast() is not all that fast
240258
* on architectures that support NMIs but do not supply NMI-safe
241259
* implementations of this_cpu_inc().
242260
*/
243-
static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct *ssp)
261+
static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp)
244262
{
245263
struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
246264

247-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_fast().");
248265
if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
249-
this_cpu_inc(scp->srcu_locks.counter); /* Y */
266+
this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader.
250267
else
251-
atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); /* Z */
268+
atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader.
252269
barrier(); /* Avoid leaking the critical section. */
253270
return scp;
254271
}
@@ -259,23 +276,17 @@ static inline struct srcu_ctr __percpu *__srcu_read_lock_fast(struct srcu_struct
259276
* different CPU than that which was incremented by the corresponding
260277
* srcu_read_lock_fast(), but it must be within the same task.
261278
*
262-
* Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
263-
* critical sections either because they disables interrupts, because they
264-
* are a single instruction, or because they are a read-modify-write atomic
265-
* operation, depending on the whims of the architecture.
266-
*
267-
* This means that __srcu_read_unlock_fast() is not all that fast
268-
* on architectures that support NMIs but do not supply NMI-safe
269-
* implementations of this_cpu_inc().
279+
* Please see the __srcu_read_lock_fast() function's header comment for
280+
* information on implicit RCU readers and NMI safety.
270281
*/
271-
static inline void __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
282+
static inline void notrace
283+
__srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
272284
{
273285
barrier(); /* Avoid leaking the critical section. */
274286
if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
275-
this_cpu_inc(scp->srcu_unlocks.counter); /* Z */
287+
this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader.
276288
else
277-
atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); /* Z */
278-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_fast().");
289+
atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader.
279290
}
280291

281292
void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);

kernel/rcu/srcutiny.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,9 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
176176
{
177177
unsigned long cookie;
178178

179-
preempt_disable(); // Needed for PREEMPT_LAZY
179+
lockdep_assert_preemption_disabled(); // Needed for PREEMPT_LAZY
180180
cookie = get_state_synchronize_srcu(ssp);
181181
if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) {
182-
preempt_enable();
183182
return;
184183
}
185184
WRITE_ONCE(ssp->srcu_idx_max, cookie);
@@ -189,7 +188,6 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
189188
else if (list_empty(&ssp->srcu_work.entry))
190189
list_add(&ssp->srcu_work.entry, &srcu_boot_list);
191190
}
192-
preempt_enable();
193191
}
194192

195193
/*

kernel/rcu/srcutree.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,6 +1168,16 @@ static void srcu_flip(struct srcu_struct *ssp)
11681168
* counter update. Note that both this memory barrier and the
11691169
* one in srcu_readers_active_idx_check() provide the guarantee
11701170
* for __srcu_read_lock().
1171+
*
1172+
* Note that this is a performance optimization, in which we spend
1173+
* an otherwise unnecessary smp_mb() in order to reduce the number
1174+
* of full per-CPU-variable scans in srcu_readers_lock_idx() and
1175+
* srcu_readers_unlock_idx(). But this performance optimization
1176+
* is not so optimal for SRCU-fast, where we would be spending
1177+
* not smp_mb(), but rather synchronize_rcu(). At the same time,
1178+
* the overhead of the smp_mb() is in the noise, so there is no
1179+
* point in omitting it in the SRCU-fast case. So the same code
1180+
* is executed either way.
11711181
*/
11721182
smp_mb(); /* D */ /* Pairs with C. */
11731183
}

0 commit comments

Comments
 (0)