77 .code32
88 .text
99#define _pa(x) ((x) - __START_KERNEL_map)
10+ #define rva(x) ((x) - pvh_start_xen)
1011
1112#include <linux/elfnote.h>
1213#include <linux/init.h>
1516#include <asm/segment.h>
1617#include <asm/asm.h>
1718#include <asm/boot.h>
19+ #include <asm/pgtable.h>
1820#include <asm/processor-flags.h>
1921#include <asm/msr.h>
2022#include <asm/nospec-branch.h>
@@ -54,22 +56,40 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
5456 UNWIND_HINT_END_OF_STACK
5557 cld
5658
57- lgdt (_pa(gdt))
59+ /*
60+ * See the comment for startup_32 for more details. We need to
61+ * execute a call to get the execution address to be position
62+ * independent, but we don't have a stack. Save and restore the
63+ * magic field of start_info in ebx, and use that as the stack.
64+ */
65+ mov (%ebx ), %eax
66+ leal 4 (%ebx ), %esp
67+ ANNOTATE_INTRA_FUNCTION_CALL
68+ call 1f
69+ 1: popl %ebp
70+ mov %eax , (%ebx )
71+ subl $rva(1b), %ebp
72+ movl $0 , %esp
73+
74+ leal rva(gdt)(%ebp ), %eax
75+ leal rva(gdt_start)(%ebp ), %ecx
76+ movl %ecx , 2 (%eax )
77+ lgdt (%eax )
5878
5979 mov $PVH_DS_SEL,%eax
6080 mov %eax ,%ds
6181 mov %eax ,%es
6282 mov %eax ,%ss
6383
6484 /* Stash hvm_start_info. */
65- mov $_pa (pvh_start_info), %edi
85+ leal rva (pvh_start_info)( %ebp ), %edi
6686 mov %ebx , %esi
67- mov _pa (pvh_start_info_sz), %ecx
87+ movl rva (pvh_start_info_sz)( %ebp ), %ecx
6888 shr $2 ,%ecx
6989 rep
7090 movsl
7191
72- mov $_pa (early_stack_end), %esp
92+ leal rva (early_stack_end)( %ebp ), %esp
7393
7494 /* Enable PAE mode. */
7595 mov %cr4 , %eax
@@ -83,31 +103,86 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
83103 btsl $_EFER_LME, %eax
84104 wrmsr
85105
106+ mov %ebp , %ebx
107+ subl $_pa(pvh_start_xen), %ebx /* offset */
108+ jz .Lpagetable_done
109+
110+ /* Fixup page-tables for relocation. */
111+ leal rva(pvh_init_top_pgt)(%ebp ), %edi
112+ movl $PTRS_PER_PGD, %ecx
113+ 2:
114+ testl $_PAGE_PRESENT, 0x00 (%edi )
115+ jz 1f
116+ addl %ebx , 0x00 (%edi )
117+ 1:
118+ addl $8 , %edi
119+ decl %ecx
120+ jnz 2b
121+
122+ /* L3 ident has a single entry. */
123+ leal rva(pvh_level3_ident_pgt)(%ebp ), %edi
124+ addl %ebx , 0x00 (%edi )
125+
126+ leal rva(pvh_level3_kernel_pgt)(%ebp ), %edi
127+ addl %ebx , (PAGE_SIZE - 16 )(%edi )
128+ addl %ebx , (PAGE_SIZE - 8 )(%edi )
129+
130+ /* pvh_level2_ident_pgt is fine - large pages */
131+
132+ /* pvh_level2_kernel_pgt needs adjustment - large pages */
133+ leal rva(pvh_level2_kernel_pgt)(%ebp ), %edi
134+ movl $PTRS_PER_PMD, %ecx
135+ 2:
136+ testl $_PAGE_PRESENT, 0x00 (%edi )
137+ jz 1f
138+ addl %ebx , 0x00 (%edi )
139+ 1:
140+ addl $8 , %edi
141+ decl %ecx
142+ jnz 2b
143+
144+ .Lpagetable_done:
86145 /* Enable pre-constructed page tables. */
87- mov $_pa(init_top_pgt ), %eax
146+ leal rva(pvh_init_top_pgt)( %ebp ), %eax
88147 mov %eax , %cr3
89148 mov $(X86_CR0_PG | X86_CR0_PE), %eax
90149 mov %eax , %cr0
91150
92151 /* Jump to 64-bit mode. */
93- ljmp $PVH_CS_SEL, $_pa(1f)
152+ pushl $PVH_CS_SEL
153+ leal rva(1f)(%ebp ), %eax
154+ pushl %eax
155+ lretl
94156
95157 /* 64-bit entry point. */
96158 .code64
971591:
160+ UNWIND_HINT_END_OF_STACK
161+
98162 /* Set base address in stack canary descriptor. */
99163 mov $MSR_GS_BASE,%ecx
100- mov $_pa( canary), %eax
164+ leal canary( %rip ), %eax
101165 xor %edx , %edx
102166 wrmsr
103167
168+ /*
169+ * Calculate load offset and store in phys_base. __pa() needs
170+ * phys_base set to calculate the hypercall page in xen_pvh_init().
171+ */
172+ movq %rbp , %rbx
173+ subq $_pa(pvh_start_xen), %rbx
174+ movq %rbx , phys_base(%rip )
104175 call xen_prepare_pvh
176+ /*
177+ * Clear phys_base. __startup_64 will *add* to its value,
178+ * so reset to 0.
179+ */
180+ xor %rbx , %rbx
181+ movq %rbx , phys_base(%rip )
105182
106183 /* startup_64 expects boot_params in %rsi. */
107- mov $_pa(pvh_bootparams), %rsi
108- mov $_pa(startup_64), %rax
109- ANNOTATE_RETPOLINE_SAFE
110- jmp *%rax
184+ lea pvh_bootparams(%rip ), %rsi
185+ jmp startup_64
111186
112187#else /* CONFIG_X86_64 */
113188
@@ -143,7 +218,7 @@ SYM_CODE_END(pvh_start_xen)
143218 .balign 8
144219SYM_DATA_START_LOCAL (gdt)
145220 .word gdt_end - gdt_start
146- .long _pa(gdt_start)
221+ .long _pa(gdt_start) /* x86-64 will overwrite if relocated. */
147222 .word 0
148223SYM_DATA_END(gdt)
149224SYM_DATA_START_LOCAL (gdt_start)
@@ -163,5 +238,67 @@ SYM_DATA_START_LOCAL(early_stack)
163238 .fill BOOT_STACK_SIZE, 1 , 0
164239SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL , early_stack_end)
165240
241+ #ifdef CONFIG_X86_64
242+ /*
243+ * Xen PVH needs a set of identity mapped and kernel high mapping
244+ * page tables. pvh_start_xen starts running on the identity mapped
245+ * page tables, but xen_prepare_pvh calls into the high mapping.
246+ * These page tables need to be relocatable and are only used until
247+ * startup_64 transitions to init_top_pgt.
248+ */
249+ SYM_DATA_START_PAGE_ALIGNED(pvh_init_top_pgt)
250+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
251+ .org pvh_init_top_pgt + L4_PAGE_OFFSET * 8 , 0
252+ .quad pvh_level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
253+ .org pvh_init_top_pgt + L4_START_KERNEL * 8 , 0
254+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
255+ .quad pvh_level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
256+ SYM_DATA_END(pvh_init_top_pgt)
257+
258+ SYM_DATA_START_PAGE_ALIGNED(pvh_level3_ident_pgt)
259+ .quad pvh_level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
260+ .fill 511 , 8 , 0
261+ SYM_DATA_END(pvh_level3_ident_pgt)
262+ SYM_DATA_START_PAGE_ALIGNED(pvh_level2_ident_pgt)
263+ /*
264+ * Since I easily can, map the first 1G.
265+ * Don't set NX because code runs from these pages.
266+ *
267+ * Note: This sets _PAGE_GLOBAL despite whether
268+ * the CPU supports it or it is enabled. But,
269+ * the CPU should ignore the bit.
270+ */
271+ PMDS(0 , __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
272+ SYM_DATA_END(pvh_level2_ident_pgt)
273+ SYM_DATA_START_PAGE_ALIGNED(pvh_level3_kernel_pgt)
274+ .fill L3_START_KERNEL, 8 , 0
275+ /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
276+ .quad pvh_level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
277+ .quad 0 /* no fixmap */
278+ SYM_DATA_END(pvh_level3_kernel_pgt)
279+
280+ SYM_DATA_START_PAGE_ALIGNED(pvh_level2_kernel_pgt)
281+ /*
282+ * Kernel high mapping.
283+ *
284+ * The kernel code+data+bss must be located below KERNEL_IMAGE_SIZE in
285+ * virtual address space, which is 1 GiB if RANDOMIZE_BASE is enabled,
286+ * 512 MiB otherwise.
287+ *
288+ * (NOTE: after that starts the module area, see MODULES_VADDR.)
289+ *
290+ * This table is eventually used by the kernel during normal runtime.
291+ * Care must be taken to clear out undesired bits later, like _PAGE_RW
292+ * or _PAGE_GLOBAL in some cases.
293+ */
294+ PMDS(0 , __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE / PMD_SIZE)
295+ SYM_DATA_END(pvh_level2_kernel_pgt)
296+
297+ ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_RELOC,
298+ .long CONFIG_PHYSICAL_ALIGN;
299+ .long LOAD_PHYSICAL_ADDR;
300+ .long KERNEL_IMAGE_SIZE - 1 )
301+ #endif
302+
166303 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
167304 _ASM_PTR (pvh_start_xen - __START_KERNEL_map))
0 commit comments