Skip to content

Commit b6c881b

Browse files
Peter Zijlstrabp3tk0v
authored andcommitted
x86/alternative: Complicate optimize_nops() some more
Because: SMP alternatives: ffffffff810026dc: [2:44) optimized NOPs: eb 2a eb 28 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc is quite daft, make things more complicated and have the NOP runlength detection eat the preceding JMP if they both end at the same target. SMP alternatives: ffffffff810026dc: [0:44) optimized NOPs: eb 2a cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Link: https://lore.kernel.org/r/20230208171431.433132442@infradead.org
1 parent 6c480f2 commit b6c881b

1 file changed

Lines changed: 50 additions & 10 deletions

File tree

arch/x86/kernel/alternative.c

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
114114
};
115115

116116
/*
117+
* Fill the buffer with a single effective instruction of size @len.
118+
*
117119
* In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info)
118120
* for every single-byte NOP, try to generate the maximally available NOP of
119121
* size <= ASM_NOP_MAX such that only a single CFI entry is generated (vs one for
@@ -152,6 +154,9 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
152154
extern s32 __smp_locks[], __smp_locks_end[];
153155
void text_poke_early(void *addr, const void *opcode, size_t len);
154156

157+
/*
158+
* Matches NOP and NOPL, not any of the other possible NOPs.
159+
*/
155160
static bool insn_is_nop(struct insn *insn)
156161
{
157162
if (insn->opcode.bytes[0] == 0x90)
@@ -165,6 +170,10 @@ static bool insn_is_nop(struct insn *insn)
165170
return false;
166171
}
167172

173+
/*
174+
* Find the offset of the first non-NOP instruction starting at @offset
175+
* but no further than @len.
176+
*/
168177
static int skip_nops(u8 *instr, int offset, int len)
169178
{
170179
struct insn insn;
@@ -180,12 +189,47 @@ static int skip_nops(u8 *instr, int offset, int len)
180189
return offset;
181190
}
182191

192+
/*
193+
* Optimize a sequence of NOPs, possibly preceded by an unconditional jump
194+
* to the end of the NOP sequence into a single NOP.
195+
*/
196+
static bool __optimize_nops(u8 *instr, size_t len, struct insn *insn,
197+
int *next, int *prev, int *target)
198+
{
199+
int i = *next - insn->length;
200+
201+
switch (insn->opcode.bytes[0]) {
202+
case JMP8_INSN_OPCODE:
203+
case JMP32_INSN_OPCODE:
204+
*prev = i;
205+
*target = *next + insn->immediate.value;
206+
return false;
207+
}
208+
209+
if (insn_is_nop(insn)) {
210+
int nop = i;
211+
212+
*next = skip_nops(instr, *next, len);
213+
if (*target && *next == *target)
214+
nop = *prev;
215+
216+
add_nop(instr + nop, *next - nop);
217+
DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next);
218+
return true;
219+
}
220+
221+
*target = 0;
222+
return false;
223+
}
224+
183225
/*
184226
* "noinline" to cause control flow change and thus invalidate I$ and
185227
* cause refetch after modification.
186228
*/
187229
static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
188230
{
231+
int prev, target = 0;
232+
189233
for (int next, i = 0; i < len; i = next) {
190234
struct insn insn;
191235

@@ -194,11 +238,7 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
194238

195239
next = i + insn.length;
196240

197-
if (insn_is_nop(&insn)) {
198-
next = skip_nops(instr, next, len);
199-
add_nop(instr + i, next - i);
200-
DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, i, next);
201-
}
241+
__optimize_nops(instr, len, &insn, &next, &prev, &target);
202242
}
203243
}
204244

@@ -275,6 +315,8 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
275315
static void __init_or_module noinline
276316
apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
277317
{
318+
int prev, target = 0;
319+
278320
for (int next, i = 0; i < len; i = next) {
279321
struct insn insn;
280322

@@ -283,6 +325,9 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
283325

284326
next = i + insn.length;
285327

328+
if (__optimize_nops(buf, len, &insn, &next, &prev, &target))
329+
continue;
330+
286331
switch (insn.opcode.bytes[0]) {
287332
case 0x0f:
288333
if (insn.opcode.bytes[1] < 0x80 ||
@@ -324,11 +369,6 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
324369
src - dest);
325370
}
326371
}
327-
328-
if (insn_is_nop(&insn)) {
329-
next = skip_nops(buf, next, len);
330-
add_nop(buf + i, next - i);
331-
}
332372
}
333373
}
334374

0 commit comments

Comments
 (0)