Skip to content

Commit 669bc57

Browse files
juergenchristAlexander Gordeev
authored andcommitted
s390/bitops: Optimize inlining
GCC inlining heuristics prevent code growth due to inlining into cold paths. This causes GCC to emit a partially specialized version of __flogr for non-constant input for all occurrences on cold paths. This happens since the overhead seen during inlining includes setting up a union register_pair, calling flogr, and extracting and casting the result. This overhead is not removed until the function is lowered into RTL. But this happens after inlining. For -ftrivial-var-auto-init=zero builds, an additional initialization of the union register_pair adds another statement to be inlinined. This is unneeded since the even register is initialized anyway and the odd register is not an input register. It is only marked as such since the whole pair has to be marked as a read/write output register. Mark the union register_pair as uninitialized to get rid of this statement. This, however, does not change the code since the initialization happens when part of the register pair is written. Nevertheless, GCC function size approximation during inlining is reduced by one statement. Force inlining of flogr and also flatten some other functions that should be leaf functions but are called in cold context, like, e.g., __init functions. Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Juergen Christ <jchrist@linux.ibm.com> Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
1 parent de88e74 commit 669bc57

1 file changed

Lines changed: 7 additions & 7 deletions

File tree

arch/s390/include/asm/bitops.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ static inline bool test_bit_inv(unsigned long nr,
130130
* where the most significant bit has bit number 0.
131131
* If no bit is set this function returns 64.
132132
*/
133-
static inline unsigned char __flogr(unsigned long word)
133+
static __always_inline unsigned char __flogr(unsigned long word)
134134
{
135135
if (__builtin_constant_p(word)) {
136136
unsigned long bit = 0;
@@ -163,7 +163,7 @@ static inline unsigned char __flogr(unsigned long word)
163163
}
164164
return bit;
165165
} else {
166-
union register_pair rp;
166+
union register_pair rp __uninitialized;
167167

168168
rp.even = word;
169169
asm volatile(
@@ -179,7 +179,7 @@ static inline unsigned char __flogr(unsigned long word)
179179
*
180180
* Undefined if no bit exists, so code should check against 0 first.
181181
*/
182-
static inline unsigned long __ffs(unsigned long word)
182+
static __always_inline __flatten unsigned long __ffs(unsigned long word)
183183
{
184184
return __flogr(-word & word) ^ (BITS_PER_LONG - 1);
185185
}
@@ -191,7 +191,7 @@ static inline unsigned long __ffs(unsigned long word)
191191
* This is defined the same way as the libc and
192192
* compiler builtin ffs routines (man ffs).
193193
*/
194-
static inline int ffs(int word)
194+
static __always_inline __flatten int ffs(int word)
195195
{
196196
unsigned int val = (unsigned int)word;
197197

@@ -204,7 +204,7 @@ static inline int ffs(int word)
204204
*
205205
* Undefined if no set bit exists, so code should check against 0 first.
206206
*/
207-
static inline unsigned long __fls(unsigned long word)
207+
static __always_inline __flatten unsigned long __fls(unsigned long word)
208208
{
209209
return __flogr(word) ^ (BITS_PER_LONG - 1);
210210
}
@@ -220,7 +220,7 @@ static inline unsigned long __fls(unsigned long word)
220220
* set bit if value is nonzero. The last (most significant) bit is
221221
* at position 64.
222222
*/
223-
static inline int fls64(unsigned long word)
223+
static __always_inline __flatten int fls64(unsigned long word)
224224
{
225225
return BITS_PER_LONG - __flogr(word);
226226
}
@@ -232,7 +232,7 @@ static inline int fls64(unsigned long word)
232232
* This is defined the same way as ffs.
233233
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
234234
*/
235-
static inline int fls(unsigned int word)
235+
static __always_inline __flatten int fls(unsigned int word)
236236
{
237237
return fls64(word);
238238
}

0 commit comments

Comments
 (0)