Skip to content

Commit 7c097ca

Browse files
ubizjakingomolnar
authored andcommitted
x86/percpu: Do not clobber %rsi in percpu_{try_,}cmpxchg{64,128}_op
The fallback alternative uses %rsi register to manually load pointer to the percpu variable before the call to the emulation function. This is unoptimal, because the load is hidden from the compiler. Move the load of %rsi outside inline asm, so the compiler can reuse the value. The code in slub.o improves from: 55ac: 49 8b 3c 24 mov (%r12),%rdi 55b0: 48 8d 4a 40 lea 0x40(%rdx),%rcx 55b4: 49 8b 1c 07 mov (%r15,%rax,1),%rbx 55b8: 4c 89 f8 mov %r15,%rax 55bb: 48 8d 37 lea (%rdi),%rsi 55be: e8 00 00 00 00 callq 55c3 <...> 55bf: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4 55c3: 75 a3 jne 5568 <...> 55c5: ... 0000000000000000 <.altinstr_replacement>: 5: 65 48 0f c7 0f cmpxchg16b %gs:(%rdi) to: 55ac: 49 8b 34 24 mov (%r12),%rsi 55b0: 48 8d 4a 40 lea 0x40(%rdx),%rcx 55b4: 49 8b 1c 07 mov (%r15,%rax,1),%rbx 55b8: 4c 89 f8 mov %r15,%rax 55bb: e8 00 00 00 00 callq 55c0 <...> 55bc: R_X86_64_PLT32 this_cpu_cmpxchg16b_emu-0x4 55c0: 75 a6 jne 5568 <...> 55c2: ... Where the alternative replacement instruction now uses %rsi: 0000000000000000 <.altinstr_replacement>: 5: 65 48 0f c7 0e cmpxchg16b %gs:(%rsi) The instruction (effectively a reg-reg move) at 55bb: in the original assembly is removed. Also, both the CALL and replacement CMPXCHG16B are 5 bytes long, removing the need for NOPs in the asm code. Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Uros Bizjak <ubizjak@gmail.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/20230918151452.62344-1-ubizjak@gmail.com
1 parent b8e3dfa commit 7c097ca

1 file changed

Lines changed: 16 additions & 12 deletions

File tree

arch/x86/include/asm/percpu.h

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -242,14 +242,15 @@ do { \
242242
old__.var = _oval; \
243243
new__.var = _nval; \
244244
\
245-
asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
245+
asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
246246
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
247247
: [var] "+m" (_var), \
248248
"+a" (old__.low), \
249249
"+d" (old__.high) \
250250
: "b" (new__.low), \
251-
"c" (new__.high) \
252-
: "memory", "esi"); \
251+
"c" (new__.high), \
252+
"S" (&(_var)) \
253+
: "memory"); \
253254
\
254255
old__.var; \
255256
})
@@ -271,16 +272,17 @@ do { \
271272
old__.var = *_oval; \
272273
new__.var = _nval; \
273274
\
274-
asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
275+
asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
275276
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
276277
CC_SET(z) \
277278
: CC_OUT(z) (success), \
278279
[var] "+m" (_var), \
279280
"+a" (old__.low), \
280281
"+d" (old__.high) \
281282
: "b" (new__.low), \
282-
"c" (new__.high) \
283-
: "memory", "esi"); \
283+
"c" (new__.high), \
284+
"S" (&(_var)) \
285+
: "memory"); \
284286
if (unlikely(!success)) \
285287
*_oval = old__.var; \
286288
likely(success); \
@@ -309,14 +311,15 @@ do { \
309311
old__.var = _oval; \
310312
new__.var = _nval; \
311313
\
312-
asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
314+
asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
313315
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
314316
: [var] "+m" (_var), \
315317
"+a" (old__.low), \
316318
"+d" (old__.high) \
317319
: "b" (new__.low), \
318-
"c" (new__.high) \
319-
: "memory", "rsi"); \
320+
"c" (new__.high), \
321+
"S" (&(_var)) \
322+
: "memory"); \
320323
\
321324
old__.var; \
322325
})
@@ -338,16 +341,17 @@ do { \
338341
old__.var = *_oval; \
339342
new__.var = _nval; \
340343
\
341-
asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
344+
asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
342345
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
343346
CC_SET(z) \
344347
: CC_OUT(z) (success), \
345348
[var] "+m" (_var), \
346349
"+a" (old__.low), \
347350
"+d" (old__.high) \
348351
: "b" (new__.low), \
349-
"c" (new__.high) \
350-
: "memory", "rsi"); \
352+
"c" (new__.high), \
353+
"S" (&(_var)) \
354+
: "memory"); \
351355
if (unlikely(!success)) \
352356
*_oval = old__.var; \
353357
likely(success); \

0 commit comments

Comments
 (0)