Skip to content

Commit b0574ba

Browse files
mrathor99liuw
authored andcommitted
x86/hyperv: Add trampoline asm code to transition from hypervisor
Introduce a small asm stub to transition from the hypervisor to Linux after devirtualization. Devirtualization means disabling hypervisor on the fly, so after it is done, the code is running on physical processor instead of virtual, and hypervisor is gone. This can be done by a root vm only. At a high level, during panic of either the hypervisor or the root, the NMI handler asks hypervisor to devirtualize. As part of that, the arguments include an entry point to return back to Linux. This asm stub implements that entry point. The stub is entered in protected mode, uses temporary gdt and page table to enable long mode and get to kernel entry point which then restores full kernel context to resume execution to kexec. Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com> Signed-off-by: Wei Liu <wei.liu@kernel.org>
1 parent e0a975e commit b0574ba

1 file changed

Lines changed: 101 additions & 0 deletions

File tree

arch/x86/hyperv/hv_trampoline.S

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* X86 specific Hyper-V kdump/crash related code.
4+
*
5+
* Copyright (C) 2025, Microsoft, Inc.
6+
*
7+
*/
8+
#include <linux/linkage.h>
9+
#include <asm/alternative.h>
10+
#include <asm/msr.h>
11+
#include <asm/processor-flags.h>
12+
#include <asm/nospec-branch.h>
13+
14+
/*
15+
* void noreturn hv_crash_asm32(arg1)
16+
* arg1 == edi == 32bit PA of struct hv_crash_tramp_data
17+
*
18+
* The hypervisor jumps here upon devirtualization in protected mode. This
19+
* code gets copied to a page in the low 4G ie, 32bit space so it can run
20+
* in the protected mode. Hence we cannot use any compile/link time offsets or
21+
* addresses. It restores long mode via temporary gdt and page tables and
22+
* eventually jumps to kernel code entry at HV_CRASHDATA_OFFS_C_entry.
23+
*
24+
* PreCondition (ie, Hypervisor call back ABI):
25+
* o CR0 is set to 0x0021: PE(prot mode) and NE are set, paging is disabled
26+
* o CR4 is set to 0x0
27+
* o IA32_EFER is set to 0x901 (SCE and NXE are set)
28+
* o EDI is set to the Arg passed to HVCALL_DISABLE_HYP_EX.
29+
* o CS, DS, ES, FS, GS are all initialized with a base of 0 and limit 0xFFFF
30+
* o IDTR, TR and GDTR are initialized with a base of 0 and limit of 0xFFFF
31+
* o LDTR is initialized as invalid (limit of 0)
32+
* o MSR PAT is power on default.
33+
* o Other state/registers are cleared. All TLBs flushed.
34+
*/
35+
36+
#define HV_CRASHDATA_OFFS_TRAMPCR3 0x0 /* 0 */
37+
#define HV_CRASHDATA_OFFS_KERNCR3 0x8 /* 8 */
38+
#define HV_CRASHDATA_OFFS_GDTRLIMIT 0x12 /* 18 */
39+
#define HV_CRASHDATA_OFFS_CS_JMPTGT 0x28 /* 40 */
40+
#define HV_CRASHDATA_OFFS_C_entry 0x30 /* 48 */
41+
42+
.text
43+
.code32
44+
45+
SYM_CODE_START(hv_crash_asm32)
46+
UNWIND_HINT_UNDEFINED
47+
ENDBR
48+
movl $X86_CR4_PAE, %ecx
49+
movl %ecx, %cr4
50+
51+
movl %edi, %ebx
52+
add $HV_CRASHDATA_OFFS_TRAMPCR3, %ebx
53+
movl %cs:(%ebx), %eax
54+
movl %eax, %cr3
55+
56+
/* Setup EFER for long mode now */
57+
movl $MSR_EFER, %ecx
58+
rdmsr
59+
btsl $_EFER_LME, %eax
60+
wrmsr
61+
62+
/* Turn paging on using the temp 32bit trampoline page table */
63+
movl %cr0, %eax
64+
orl $(X86_CR0_PG), %eax
65+
movl %eax, %cr0
66+
67+
/* since kernel cr3 could be above 4G, we need to be in the long mode
68+
* before we can load 64bits of the kernel cr3. We use a temp gdt for
69+
* that with CS.L=1 and CS.D=0 */
70+
mov %edi, %eax
71+
add $HV_CRASHDATA_OFFS_GDTRLIMIT, %eax
72+
lgdtl %cs:(%eax)
73+
74+
/* not done yet, restore CS now to switch to CS.L=1 */
75+
mov %edi, %eax
76+
add $HV_CRASHDATA_OFFS_CS_JMPTGT, %eax
77+
ljmp %cs:*(%eax)
78+
SYM_CODE_END(hv_crash_asm32)
79+
80+
/* we now run in full 64bit IA32-e long mode, CS.L=1 and CS.D=0 */
81+
.code64
82+
.balign 8
83+
SYM_CODE_START(hv_crash_asm64)
84+
UNWIND_HINT_UNDEFINED
85+
ENDBR
86+
/* restore kernel page tables so we can jump to kernel code */
87+
mov %edi, %eax
88+
add $HV_CRASHDATA_OFFS_KERNCR3, %eax
89+
movq %cs:(%eax), %rbx
90+
movq %rbx, %cr3
91+
92+
mov %edi, %eax
93+
add $HV_CRASHDATA_OFFS_C_entry, %eax
94+
movq %cs:(%eax), %rbx
95+
ANNOTATE_RETPOLINE_SAFE
96+
jmp *%rbx
97+
98+
int $3
99+
100+
SYM_INNER_LABEL(hv_crash_asm_end, SYM_L_GLOBAL)
101+
SYM_CODE_END(hv_crash_asm64)

0 commit comments

Comments
 (0)