5959#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
6060#define __my_cpu_offset this_cpu_read(this_cpu_off)
6161
62- #ifdef CONFIG_USE_X86_SEG_SUPPORT
63- /*
64- * Efficient implementation for cases in which the compiler supports
65- * named address spaces. Allows the compiler to perform additional
66- * optimizations that can save more instructions.
67- */
68- #define arch_raw_cpu_ptr (ptr ) \
69- ({ \
70- unsigned long tcp_ptr__; \
71- tcp_ptr__ = __raw_cpu_read(, this_cpu_off); \
72- \
73- tcp_ptr__ += (unsigned long)(ptr); \
74- (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
75- })
76- #else /* CONFIG_USE_X86_SEG_SUPPORT */
7762/*
7863 * Compared to the generic __my_cpu_offset version, the following
7964 * saves one instruction and avoids clobbering a temp register.
65+ *
66+ * arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit
67+ * kernel, because games are played with CONFIG_X86_64 there and
68+ * sizeof(this_cpu_off) becames 4.
8069 */
81- #define arch_raw_cpu_ptr (ptr ) \
70+ #ifndef BUILD_VDSO32_64
71+ #define arch_raw_cpu_ptr (_ptr ) \
8272({ \
83- unsigned long tcp_ptr__; \
84- asm ("mov " __percpu_arg(1) ", %0" \
85- : "=r" (tcp_ptr__) \
86- : "m" (__my_cpu_var(this_cpu_off))); \
87- \
88- tcp_ptr__ += (unsigned long)(ptr); \
89- (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
73+ unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \
74+ tcp_ptr__ += (__force unsigned long)(_ptr); \
75+ (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \
9076})
91- #endif /* CONFIG_USE_X86_SEG_SUPPORT */
77+ #else
78+ #define arch_raw_cpu_ptr (_ptr ) ({ BUILD_BUG(); (typeof(_ptr))0; })
79+ #endif
9280
9381#define PER_CPU_VAR (var ) %__percpu_seg:(var)__percpu_rel
9482
10290#endif /* CONFIG_SMP */
10391
10492#define __my_cpu_type (var ) typeof(var) __percpu_seg_override
105- #define __my_cpu_ptr (ptr ) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr)
106- #define __my_cpu_var (var ) (*__my_cpu_ptr(&var))
93+ #define __my_cpu_ptr (ptr ) (__my_cpu_type(*( ptr)) *)(__force uintptr_t)(ptr)
94+ #define __my_cpu_var (var ) (*__my_cpu_ptr(&( var) ))
10795#define __percpu_arg (x ) __percpu_prefix "%" #x
10896#define __force_percpu_arg (x ) __force_percpu_prefix "%" #x
10997
@@ -230,25 +218,26 @@ do { \
230218})
231219
232220/*
233- * xchg is implemented using cmpxchg without a lock prefix. xchg is
234- * expensive due to the implied lock prefix. The processor cannot prefetch
235- * cachelines if xchg is used.
221+ * raw_cpu_xchg() can use a load-store since
222+ * it is not required to be IRQ-safe.
236223 */
237- #define percpu_xchg_op ( size , qual , _var , _nval ) \
224+ #define raw_percpu_xchg_op ( _var , _nval ) \
238225({ \
239- __pcpu_type_##size pxo_old__; \
240- __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \
241- asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \
242- "%[oval]") \
243- "\n1:\t" \
244- __pcpu_op2_##size("cmpxchg", "%[nval]", \
245- __percpu_arg([var])) \
246- "\n\tjnz 1b" \
247- : [oval] "=&a" (pxo_old__), \
248- [var] "+m" (__my_cpu_var(_var)) \
249- : [nval] __pcpu_reg_##size(, pxo_new__) \
250- : "memory"); \
251- (typeof(_var))(unsigned long) pxo_old__; \
226+ typeof(_var) pxo_old__ = raw_cpu_read(_var); \
227+ raw_cpu_write(_var, _nval); \
228+ pxo_old__; \
229+ })
230+
231+ /*
232+ * this_cpu_xchg() is implemented using cmpxchg without a lock prefix.
233+ * xchg is expensive due to the implied lock prefix. The processor
234+ * cannot prefetch cachelines if xchg is used.
235+ */
236+ #define this_percpu_xchg_op (_var , _nval ) \
237+ ({ \
238+ typeof(_var) pxo_old__ = this_cpu_read(_var); \
239+ do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \
240+ pxo_old__; \
252241})
253242
254243/*
@@ -428,10 +417,6 @@ do { \
428417 * actually per-thread variables implemented as per-CPU variables and
429418 * thus stable for the duration of the respective task.
430419 */
431- #define this_cpu_read_stable_1 (pcp ) percpu_stable_op(1, "mov", pcp)
432- #define this_cpu_read_stable_2 (pcp ) percpu_stable_op(2, "mov", pcp)
433- #define this_cpu_read_stable_4 (pcp ) percpu_stable_op(4, "mov", pcp)
434- #define this_cpu_read_stable_8 (pcp ) percpu_stable_op(8, "mov", pcp)
435420#define this_cpu_read_stable (pcp ) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
436421
437422#ifdef CONFIG_USE_X86_SEG_SUPPORT
@@ -500,6 +485,10 @@ do { \
500485#define this_cpu_read_const (pcp ) ({ BUILD_BUG(); (typeof(pcp))0; })
501486#endif /* CONFIG_USE_X86_SEG_SUPPORT */
502487
488+ #define this_cpu_read_stable_1 (pcp ) percpu_stable_op(1, "mov", pcp)
489+ #define this_cpu_read_stable_2 (pcp ) percpu_stable_op(2, "mov", pcp)
490+ #define this_cpu_read_stable_4 (pcp ) percpu_stable_op(4, "mov", pcp)
491+
503492#define raw_cpu_add_1 (pcp , val ) percpu_add_op(1, , (pcp), val)
504493#define raw_cpu_add_2 (pcp , val ) percpu_add_op(2, , (pcp), val)
505494#define raw_cpu_add_4 (pcp , val ) percpu_add_op(4, , (pcp), val)
@@ -509,18 +498,6 @@ do { \
509498#define raw_cpu_or_1 (pcp , val ) percpu_to_op(1, , "or", (pcp), val)
510499#define raw_cpu_or_2 (pcp , val ) percpu_to_op(2, , "or", (pcp), val)
511500#define raw_cpu_or_4 (pcp , val ) percpu_to_op(4, , "or", (pcp), val)
512-
513- /*
514- * raw_cpu_xchg() can use a load-store since it is not required to be
515- * IRQ-safe.
516- */
517- #define raw_percpu_xchg_op (var , nval ) \
518- ({ \
519- typeof(var) pxo_ret__ = raw_cpu_read(var); \
520- raw_cpu_write(var, (nval)); \
521- pxo_ret__; \
522- })
523-
524501#define raw_cpu_xchg_1 (pcp , val ) raw_percpu_xchg_op(pcp, val)
525502#define raw_cpu_xchg_2 (pcp , val ) raw_percpu_xchg_op(pcp, val)
526503#define raw_cpu_xchg_4 (pcp , val ) raw_percpu_xchg_op(pcp, val)
@@ -534,9 +511,9 @@ do { \
534511#define this_cpu_or_1 (pcp , val ) percpu_to_op(1, volatile, "or", (pcp), val)
535512#define this_cpu_or_2 (pcp , val ) percpu_to_op(2, volatile, "or", (pcp), val)
536513#define this_cpu_or_4 (pcp , val ) percpu_to_op(4, volatile, "or", (pcp), val)
537- #define this_cpu_xchg_1 (pcp , nval ) percpu_xchg_op(1, volatile, pcp, nval)
538- #define this_cpu_xchg_2 (pcp , nval ) percpu_xchg_op(2, volatile, pcp, nval)
539- #define this_cpu_xchg_4 (pcp , nval ) percpu_xchg_op(4, volatile, pcp, nval)
514+ #define this_cpu_xchg_1 (pcp , nval ) this_percpu_xchg_op( pcp, nval)
515+ #define this_cpu_xchg_2 (pcp , nval ) this_percpu_xchg_op( pcp, nval)
516+ #define this_cpu_xchg_4 (pcp , nval ) this_percpu_xchg_op( pcp, nval)
540517
541518#define raw_cpu_add_return_1 (pcp , val ) percpu_add_return_op(1, , pcp, val)
542519#define raw_cpu_add_return_2 (pcp , val ) percpu_add_return_op(2, , pcp, val)
@@ -563,6 +540,8 @@ do { \
563540 * 32 bit must fall back to generic operations.
564541 */
565542#ifdef CONFIG_X86_64
543+ #define this_cpu_read_stable_8 (pcp ) percpu_stable_op(8, "mov", pcp)
544+
566545#define raw_cpu_add_8 (pcp , val ) percpu_add_op(8, , (pcp), val)
567546#define raw_cpu_and_8 (pcp , val ) percpu_to_op(8, , "and", (pcp), val)
568547#define raw_cpu_or_8 (pcp , val ) percpu_to_op(8, , "or", (pcp), val)
@@ -575,41 +554,41 @@ do { \
575554#define this_cpu_and_8 (pcp , val ) percpu_to_op(8, volatile, "and", (pcp), val)
576555#define this_cpu_or_8 (pcp , val ) percpu_to_op(8, volatile, "or", (pcp), val)
577556#define this_cpu_add_return_8 (pcp , val ) percpu_add_return_op(8, volatile, pcp, val)
578- #define this_cpu_xchg_8 (pcp , nval ) percpu_xchg_op(8, volatile, pcp, nval)
557+ #define this_cpu_xchg_8 (pcp , nval ) this_percpu_xchg_op( pcp, nval)
579558#define this_cpu_cmpxchg_8 (pcp , oval , nval ) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
580559#define this_cpu_try_cmpxchg_8 (pcp , ovalp , nval ) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
581- #endif
582-
583- static __always_inline bool x86_this_cpu_constant_test_bit (unsigned int nr ,
584- const unsigned long __percpu * addr )
585- {
586- unsigned long __percpu * a =
587- (unsigned long __percpu * )addr + nr / BITS_PER_LONG ;
588560
589- #ifdef CONFIG_X86_64
590- return ((1UL << (nr % BITS_PER_LONG )) & raw_cpu_read_8 (* a )) != 0 ;
561+ #define raw_cpu_read_long (pcp ) raw_cpu_read_8(pcp)
591562#else
592- return ((1UL << (nr % BITS_PER_LONG )) & raw_cpu_read_4 (* a )) != 0 ;
593- #endif
594- }
563+ /* There is no generic 64 bit read stable operation for 32 bit targets. */
564+ #define this_cpu_read_stable_8 (pcp ) ({ BUILD_BUG(); (typeof(pcp))0; })
595565
596- static inline bool x86_this_cpu_variable_test_bit (int nr ,
597- const unsigned long __percpu * addr )
598- {
599- bool oldbit ;
566+ #define raw_cpu_read_long (pcp ) raw_cpu_read_4(pcp)
567+ #endif
600568
601- asm volatile ("btl " __percpu_arg (2 )",%1"
602- CC_SET (c )
603- : CC_OUT (c ) (oldbit )
604- : "m" (* __my_cpu_ptr ((unsigned long __percpu * )(addr ))), "Ir" (nr ));
569+ #define x86_this_cpu_constant_test_bit (_nr , _var ) \
570+ ({ \
571+ unsigned long __percpu *addr__ = \
572+ (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \
573+ !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \
574+ })
605575
606- return oldbit ;
607- }
576+ #define x86_this_cpu_variable_test_bit (_nr , _var ) \
577+ ({ \
578+ bool oldbit; \
579+ \
580+ asm volatile("btl %[nr], " __percpu_arg([var]) \
581+ CC_SET(c) \
582+ : CC_OUT(c) (oldbit) \
583+ : [var] "m" (__my_cpu_var(_var)), \
584+ [nr] "rI" (_nr)); \
585+ oldbit; \
586+ })
608587
609- #define x86_this_cpu_test_bit (nr , addr ) \
610- (__builtin_constant_p((nr)) \
611- ? x86_this_cpu_constant_test_bit((nr), (addr)) \
612- : x86_this_cpu_variable_test_bit((nr), (addr) ))
588+ #define x86_this_cpu_test_bit (_nr , _var ) \
589+ (__builtin_constant_p(_nr) \
590+ ? x86_this_cpu_constant_test_bit(_nr, _var) \
591+ : x86_this_cpu_variable_test_bit(_nr, _var ))
613592
614593
615594#include <asm-generic/percpu.h>
0 commit comments