Skip to content

Commit 6c480f2

Browse files
Peter Zijlstrabp3tk0v
authored andcommitted
x86/alternative: Rewrite optimize_nops() some
Address two issues: - it no longer hard requires single byte NOP runs - now it accepts any NOP and NOPL encoded instruction (but not the more complicated 32bit NOPs). - it writes a single 'instruction' replacement. Specifically, ORC unwinder relies on the tail NOP of an alternative to be a single instruction. In particular, it relies on the inner bytes not being executed. Once the max supported NOP length has been reached (currently 8, could easily be extended to 11 on x86_64), switch to JMP.d8 and INT3 padding to achieve the same result. Objtool uses this guarantee in the analysis of alternative/overlapping CFI state for the ORC unwinder data. Every instruction edge gets a CFI state and the more instructions the larger the chance of conflicts. [ bp: - Add a comment over add_nop() to explain why it does it this way - Make add_nops() PARAVIRT only as it is used solely there now ] Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Link: https://lore.kernel.org/r/20230208171431.373412974@infradead.org
1 parent 14e4ec9 commit 6c480f2

1 file changed

Lines changed: 71 additions & 58 deletions

File tree

arch/x86/kernel/alternative.c

Lines changed: 71 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -113,17 +113,35 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
113113
x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
114114
};
115115

116-
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
117-
static void __init_or_module add_nops(void *insns, unsigned int len)
116+
/*
117+
* In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info)
118+
* for every single-byte NOP, try to generate the maximally available NOP of
119+
* size <= ASM_NOP_MAX such that only a single CFI entry is generated (vs one for
120+
* each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
121+
* *jump* over instead of executing long and daft NOPs.
122+
*/
123+
static void __init_or_module add_nop(u8 *instr, unsigned int len)
118124
{
119-
while (len > 0) {
120-
unsigned int noplen = len;
121-
if (noplen > ASM_NOP_MAX)
122-
noplen = ASM_NOP_MAX;
123-
memcpy(insns, x86_nops[noplen], noplen);
124-
insns += noplen;
125-
len -= noplen;
125+
u8 *target = instr + len;
126+
127+
if (!len)
128+
return;
129+
130+
if (len <= ASM_NOP_MAX) {
131+
memcpy(instr, x86_nops[len], len);
132+
return;
126133
}
134+
135+
if (len < 128) {
136+
__text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE);
137+
instr += JMP8_INSN_SIZE;
138+
} else {
139+
__text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE);
140+
instr += JMP32_INSN_SIZE;
141+
}
142+
143+
for (;instr < target; instr++)
144+
*instr = INT3_INSN_OPCODE;
127145
}
128146

129147
extern s32 __retpoline_sites[], __retpoline_sites_end[];
@@ -134,39 +152,32 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
134152
extern s32 __smp_locks[], __smp_locks_end[];
135153
void text_poke_early(void *addr, const void *opcode, size_t len);
136154

137-
/*
138-
* optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
139-
*
140-
* @instr: instruction byte stream
141-
* @instrlen: length of the above
142-
* @off: offset within @instr where the first NOP has been detected
143-
*
144-
* Return: number of NOPs found (and replaced).
145-
*/
146-
static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
155+
static bool insn_is_nop(struct insn *insn)
147156
{
148-
unsigned long flags;
149-
int i = off, nnops;
157+
if (insn->opcode.bytes[0] == 0x90)
158+
return true;
150159

151-
while (i < instrlen) {
152-
if (instr[i] != 0x90)
153-
break;
160+
if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F)
161+
return true;
154162

155-
i++;
156-
}
163+
/* TODO: more nops */
157164

158-
nnops = i - off;
165+
return false;
166+
}
159167

160-
if (nnops <= 1)
161-
return nnops;
168+
static int skip_nops(u8 *instr, int offset, int len)
169+
{
170+
struct insn insn;
162171

163-
local_irq_save(flags);
164-
add_nops(instr + off, nnops);
165-
local_irq_restore(flags);
172+
for (; offset < len; offset += insn.length) {
173+
if (insn_decode_kernel(&insn, &instr[offset]))
174+
break;
166175

167-
DUMP_BYTES(ALT, instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
176+
if (!insn_is_nop(&insn))
177+
break;
178+
}
168179

169-
return nnops;
180+
return offset;
170181
}
171182

172183
/*
@@ -175,28 +186,19 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
175186
*/
176187
static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
177188
{
178-
struct insn insn;
179-
int i = 0;
189+
for (int next, i = 0; i < len; i = next) {
190+
struct insn insn;
180191

181-
/*
182-
* Jump over the non-NOP insns and optimize single-byte NOPs into bigger
183-
* ones.
184-
*/
185-
for (;;) {
186192
if (insn_decode_kernel(&insn, &instr[i]))
187193
return;
188194

189-
/*
190-
* See if this and any potentially following NOPs can be
191-
* optimized.
192-
*/
193-
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
194-
i += optimize_nops_range(instr, len, i);
195-
else
196-
i += insn.length;
195+
next = i + insn.length;
197196

198-
if (i >= len)
199-
return;
197+
if (insn_is_nop(&insn)) {
198+
next = skip_nops(instr, next, len);
199+
add_nop(instr + i, next - i);
200+
DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, i, next);
201+
}
200202
}
201203
}
202204

@@ -323,13 +325,10 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
323325
}
324326
}
325327

326-
327-
/*
328-
* See if this and any potentially following NOPs can be
329-
* optimized.
330-
*/
331-
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
332-
next = i + optimize_nops_range(buf, len, i);
328+
if (insn_is_nop(&insn)) {
329+
next = skip_nops(buf, next, len);
330+
add_nop(buf + i, next - i);
331+
}
333332
}
334333
}
335334

@@ -1289,6 +1288,20 @@ int alternatives_text_reserved(void *start, void *end)
12891288
#endif /* CONFIG_SMP */
12901289

12911290
#ifdef CONFIG_PARAVIRT
1291+
1292+
/* Use this to add nops to a buffer, then text_poke the whole buffer. */
1293+
static void __init_or_module add_nops(void *insns, unsigned int len)
1294+
{
1295+
while (len > 0) {
1296+
unsigned int noplen = len;
1297+
if (noplen > ASM_NOP_MAX)
1298+
noplen = ASM_NOP_MAX;
1299+
memcpy(insns, x86_nops[noplen], noplen);
1300+
insns += noplen;
1301+
len -= noplen;
1302+
}
1303+
}
1304+
12921305
void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
12931306
struct paravirt_patch_site *end)
12941307
{

0 commit comments

Comments
 (0)