Skip to content

Commit 3335068

Browse files
Alexandre Ghitipalmer-dabbelt
authored andcommitted
riscv: Use PUD/P4D/PGD pages for the linear mapping
During the early page table creation, we used to set the mapping for PAGE_OFFSET to the kernel load address: but the kernel load address is always offseted by PMD_SIZE which makes it impossible to use PUD/P4D/PGD pages as this physical address is not aligned on PUD/P4D/PGD size (whereas PAGE_OFFSET is). But actually we don't have to establish this mapping (ie set va_pa_offset) that early in the boot process because: - first, setup_vm installs a temporary kernel mapping and among other things, discovers the system memory, - then, setup_vm_final creates the final kernel mapping and takes advantage of the discovered system memory to create the linear mapping. During the first phase, we don't know the start of the system memory and then until the second phase is finished, we can't use the linear mapping at all and phys_to_virt/virt_to_phys translations must not be used because it would result in a different translation from the 'real' one once the final mapping is installed. So here we simply delay the initialization of va_pa_offset to after the system memory discovery. But to make sure noone uses the linear mapping before, we add some guard in the DEBUG_VIRTUAL config. Finally we can use PUD/P4D/PGD hugepages when possible, which will result in a better TLB utilization. Note that: - this does not apply to rv32 as the kernel mapping lies in the linear mapping. - we rely on the firmware to protect itself using PMP. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Acked-by: Rob Herring <robh@kernel.org> # DT bits Reviewed-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Anup Patel <anup@brainfault.org> Tested-by: Anup Patel <anup@brainfault.org> Link: https://lore.kernel.org/r/20230324155421.271544-4-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent 8589e34 commit 3335068

4 files changed

Lines changed: 90 additions & 11 deletions

File tree

arch/riscv/include/asm/page.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ typedef struct page *pgtable_t;
9090
#define PTE_FMT "%08lx"
9191
#endif
9292

93+
#ifdef CONFIG_64BIT
94+
/*
95+
* We override this value as its generic definition uses __pa too early in
96+
* the boot process (before kernel_map.va_pa_offset is set).
97+
*/
98+
#define MIN_MEMBLOCK_ADDR 0
99+
#endif
100+
93101
#ifdef CONFIG_MMU
94102
#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base))
95103
#else
@@ -121,7 +129,11 @@ extern phys_addr_t phys_ram_base;
121129
#define is_linear_mapping(x) \
122130
((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
123131

132+
#ifndef CONFIG_DEBUG_VIRTUAL
124133
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
134+
#else
135+
void *linear_mapping_pa_to_va(unsigned long x);
136+
#endif
125137
#define kernel_mapping_pa_to_va(y) ({ \
126138
unsigned long _y = (unsigned long)(y); \
127139
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \
@@ -130,7 +142,11 @@ extern phys_addr_t phys_ram_base;
130142
})
131143
#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
132144

145+
#ifndef CONFIG_DEBUG_VIRTUAL
133146
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
147+
#else
148+
phys_addr_t linear_mapping_va_to_pa(unsigned long x);
149+
#endif
134150
#define kernel_mapping_va_to_pa(y) ({ \
135151
unsigned long _y = (unsigned long)(y); \
136152
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \

arch/riscv/mm/init.c

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,14 @@ static void __init setup_bootmem(void)
213213
phys_ram_end = memblock_end_of_DRAM();
214214
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
215215
phys_ram_base = memblock_start_of_DRAM();
216+
217+
/*
218+
* In 64-bit, any use of __va/__pa before this point is wrong as we
219+
* did not know the start of DRAM before.
220+
*/
221+
if (IS_ENABLED(CONFIG_64BIT))
222+
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
223+
216224
/*
217225
* memblock allocator is not aware of the fact that last 4K bytes of
218226
* the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -667,9 +675,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
667675

668676
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
669677
{
670-
/* Upgrade to PMD_SIZE mappings whenever possible */
671-
base &= PMD_SIZE - 1;
672-
if (!base && size >= PMD_SIZE)
678+
if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
679+
return PGDIR_SIZE;
680+
681+
if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
682+
return P4D_SIZE;
683+
684+
if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
685+
return PUD_SIZE;
686+
687+
if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
673688
return PMD_SIZE;
674689

675690
return PAGE_SIZE;
@@ -978,11 +993,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
978993
set_satp_mode();
979994
#endif
980995

981-
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
996+
/*
997+
* In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
998+
* where we have the system memory layout: this allows us to align
999+
* the physical and virtual mappings and then make use of PUD/P4D/PGD
1000+
* for the linear mapping. This is only possible because the kernel
1001+
* mapping lies outside the linear mapping.
1002+
* In 32-bit however, as the kernel resides in the linear mapping,
1003+
* setup_vm_final can not change the mapping established here,
1004+
* otherwise the same kernel addresses would get mapped to different
1005+
* physical addresses (if the start of dram is different from the
1006+
* kernel physical address start).
1007+
*/
1008+
kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
1009+
0UL : PAGE_OFFSET - kernel_map.phys_addr;
9821010
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
9831011

984-
phys_ram_base = kernel_map.phys_addr;
985-
9861012
/*
9871013
* The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
9881014
* kernel, whereas for 64-bit kernel, the end of the virtual address
@@ -1106,6 +1132,17 @@ static void __init create_linear_mapping_page_table(void)
11061132
phys_addr_t start, end;
11071133
u64 i;
11081134

1135+
#ifdef CONFIG_STRICT_KERNEL_RWX
1136+
phys_addr_t ktext_start = __pa_symbol(_start);
1137+
phys_addr_t ktext_size = __init_data_begin - _start;
1138+
phys_addr_t krodata_start = __pa_symbol(__start_rodata);
1139+
phys_addr_t krodata_size = _data - __start_rodata;
1140+
1141+
/* Isolate kernel text and rodata so they don't get mapped with a PUD */
1142+
memblock_mark_nomap(ktext_start, ktext_size);
1143+
memblock_mark_nomap(krodata_start, krodata_size);
1144+
#endif
1145+
11091146
/* Map all memory banks in the linear mapping */
11101147
for_each_mem_range(i, &start, &end) {
11111148
if (start >= end)
@@ -1118,6 +1155,15 @@ static void __init create_linear_mapping_page_table(void)
11181155

11191156
create_linear_mapping_range(start, end);
11201157
}
1158+
1159+
#ifdef CONFIG_STRICT_KERNEL_RWX
1160+
create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
1161+
create_linear_mapping_range(krodata_start,
1162+
krodata_start + krodata_size);
1163+
1164+
memblock_clear_nomap(ktext_start, ktext_size);
1165+
memblock_clear_nomap(krodata_start, krodata_size);
1166+
#endif
11211167
}
11221168

11231169
static void __init setup_vm_final(void)

arch/riscv/mm/physaddr.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x)
3333
return __va_to_pa_nodebug(x);
3434
}
3535
EXPORT_SYMBOL(__phys_addr_symbol);
36+
37+
phys_addr_t linear_mapping_va_to_pa(unsigned long x)
38+
{
39+
BUG_ON(!kernel_map.va_pa_offset);
40+
41+
return ((unsigned long)(x) - kernel_map.va_pa_offset);
42+
}
43+
EXPORT_SYMBOL(linear_mapping_va_to_pa);
44+
45+
void *linear_mapping_pa_to_va(unsigned long x)
46+
{
47+
BUG_ON(!kernel_map.va_pa_offset);
48+
49+
return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset));
50+
}
51+
EXPORT_SYMBOL(linear_mapping_pa_to_va);

drivers/of/fdt.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -887,12 +887,13 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
887887
static void __early_init_dt_declare_initrd(unsigned long start,
888888
unsigned long end)
889889
{
890-
/* ARM64 would cause a BUG to occur here when CONFIG_DEBUG_VM is
891-
* enabled since __va() is called too early. ARM64 does make use
892-
* of phys_initrd_start/phys_initrd_size so we can skip this
893-
* conversion.
890+
/*
891+
* __va() is not yet available this early on some platforms. In that
892+
* case, the platform uses phys_initrd_start/phys_initrd_size instead
893+
* and does the VA conversion itself.
894894
*/
895-
if (!IS_ENABLED(CONFIG_ARM64)) {
895+
if (!IS_ENABLED(CONFIG_ARM64) &&
896+
!(IS_ENABLED(CONFIG_RISCV) && IS_ENABLED(CONFIG_64BIT))) {
896897
initrd_start = (unsigned long)__va(start);
897898
initrd_end = (unsigned long)__va(end);
898899
initrd_below_start_ok = 1;

0 commit comments

Comments
 (0)