Skip to content

Commit 2e75ab3

Browse files
Merge patch series "riscv: Use PUD/P4D/PGD pages for the linear mapping"
Alexandre Ghiti <alexghiti@rivosinc.com> says: This patchset intends to improve tlb utilization by using hugepages for the linear mapping. As reported by Anup in v6, when STRICT_KERNEL_RWX is enabled, we must take care of isolating the kernel text and rodata so that they are not mapped with a PUD mapping which would then assign wrong permissions to the whole region: it is achieved the same way as arm64 by using the memblock nomap API which isolates those regions and re-merge them afterwards thus avoiding any issue with the system resources tree creation. arch/riscv/include/asm/page.h | 19 ++++++- arch/riscv/mm/init.c | 102 ++++++++++++++++++++++++++-------- arch/riscv/mm/physaddr.c | 16 ++++++ drivers/of/fdt.c | 11 ++-- 4 files changed, 118 insertions(+), 30 deletions(-) * b4-shazam-merge: riscv: Use PUD/P4D/PGD pages for the linear mapping riscv: Move the linear mapping creation in its own function riscv: Get rid of riscv_pfn_base variable Link: https://lore.kernel.org/r/20230324155421.271544-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents 5464912 + 3335068 commit 2e75ab3

4 files changed

Lines changed: 118 additions & 30 deletions

File tree

arch/riscv/include/asm/page.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,16 @@ typedef struct page *pgtable_t;
8989
#define PTE_FMT "%08lx"
9090
#endif
9191

92+
#ifdef CONFIG_64BIT
93+
/*
94+
* We override this value as its generic definition uses __pa too early in
95+
* the boot process (before kernel_map.va_pa_offset is set).
96+
*/
97+
#define MIN_MEMBLOCK_ADDR 0
98+
#endif
99+
92100
#ifdef CONFIG_MMU
93-
extern unsigned long riscv_pfn_base;
94-
#define ARCH_PFN_OFFSET (riscv_pfn_base)
101+
#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base))
95102
#else
96103
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
97104
#endif /* CONFIG_MMU */
@@ -121,7 +128,11 @@ extern phys_addr_t phys_ram_base;
121128
#define is_linear_mapping(x) \
122129
((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
123130

131+
#ifndef CONFIG_DEBUG_VIRTUAL
124132
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
133+
#else
134+
void *linear_mapping_pa_to_va(unsigned long x);
135+
#endif
125136
#define kernel_mapping_pa_to_va(y) ({ \
126137
unsigned long _y = (unsigned long)(y); \
127138
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \
@@ -130,7 +141,11 @@ extern phys_addr_t phys_ram_base;
130141
})
131142
#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
132143

144+
#ifndef CONFIG_DEBUG_VIRTUAL
133145
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
146+
#else
147+
phys_addr_t linear_mapping_va_to_pa(unsigned long x);
148+
#endif
134149
#define kernel_mapping_va_to_pa(y) ({ \
135150
unsigned long _y = (unsigned long)(y); \
136151
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \

arch/riscv/mm/init.c

Lines changed: 79 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,14 @@ static void __init setup_bootmem(void)
213213
phys_ram_end = memblock_end_of_DRAM();
214214
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
215215
phys_ram_base = memblock_start_of_DRAM();
216+
217+
/*
218+
* In 64-bit, any use of __va/__pa before this point is wrong as we
219+
* did not know the start of DRAM before.
220+
*/
221+
if (IS_ENABLED(CONFIG_64BIT))
222+
kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
223+
216224
/*
217225
* memblock allocator is not aware of the fact that last 4K bytes of
218226
* the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -271,9 +279,6 @@ static void __init setup_bootmem(void)
271279
#ifdef CONFIG_MMU
272280
struct pt_alloc_ops pt_ops __initdata;
273281

274-
unsigned long riscv_pfn_base __ro_after_init;
275-
EXPORT_SYMBOL(riscv_pfn_base);
276-
277282
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
278283
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
279284
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
@@ -285,7 +290,6 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG
285290

286291
#ifdef CONFIG_XIP_KERNEL
287292
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
288-
#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
289293
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
290294
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
291295
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -671,9 +675,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
671675

672676
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
673677
{
674-
/* Upgrade to PMD_SIZE mappings whenever possible */
675-
base &= PMD_SIZE - 1;
676-
if (!base && size >= PMD_SIZE)
678+
if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
679+
return PGDIR_SIZE;
680+
681+
if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
682+
return P4D_SIZE;
683+
684+
if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
685+
return PUD_SIZE;
686+
687+
if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
677688
return PMD_SIZE;
678689

679690
return PAGE_SIZE;
@@ -982,11 +993,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
982993
set_satp_mode();
983994
#endif
984995

985-
kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
996+
/*
997+
* In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
998+
* where we have the system memory layout: this allows us to align
999+
* the physical and virtual mappings and then make use of PUD/P4D/PGD
1000+
* for the linear mapping. This is only possible because the kernel
1001+
* mapping lies outside the linear mapping.
1002+
* In 32-bit however, as the kernel resides in the linear mapping,
1003+
* setup_vm_final can not change the mapping established here,
1004+
* otherwise the same kernel addresses would get mapped to different
1005+
* physical addresses (if the start of dram is different from the
1006+
* kernel physical address start).
1007+
*/
1008+
kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
1009+
0UL : PAGE_OFFSET - kernel_map.phys_addr;
9861010
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
9871011

988-
riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
989-
9901012
/*
9911013
* The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
9921014
* kernel, whereas for 64-bit kernel, the end of the virtual address
@@ -1090,16 +1112,36 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
10901112
pt_ops_set_fixmap();
10911113
}
10921114

1093-
static void __init setup_vm_final(void)
1115+
static void __init create_linear_mapping_range(phys_addr_t start,
1116+
phys_addr_t end)
10941117
{
1118+
phys_addr_t pa;
10951119
uintptr_t va, map_size;
1096-
phys_addr_t pa, start, end;
1120+
1121+
for (pa = start; pa < end; pa += map_size) {
1122+
va = (uintptr_t)__va(pa);
1123+
map_size = best_map_size(pa, end - pa);
1124+
1125+
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
1126+
pgprot_from_va(va));
1127+
}
1128+
}
1129+
1130+
static void __init create_linear_mapping_page_table(void)
1131+
{
1132+
phys_addr_t start, end;
10971133
u64 i;
10981134

1099-
/* Setup swapper PGD for fixmap */
1100-
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
1101-
__pa_symbol(fixmap_pgd_next),
1102-
PGDIR_SIZE, PAGE_TABLE);
1135+
#ifdef CONFIG_STRICT_KERNEL_RWX
1136+
phys_addr_t ktext_start = __pa_symbol(_start);
1137+
phys_addr_t ktext_size = __init_data_begin - _start;
1138+
phys_addr_t krodata_start = __pa_symbol(__start_rodata);
1139+
phys_addr_t krodata_size = _data - __start_rodata;
1140+
1141+
/* Isolate kernel text and rodata so they don't get mapped with a PUD */
1142+
memblock_mark_nomap(ktext_start, ktext_size);
1143+
memblock_mark_nomap(krodata_start, krodata_size);
1144+
#endif
11031145

11041146
/* Map all memory banks in the linear mapping */
11051147
for_each_mem_range(i, &start, &end) {
@@ -1111,15 +1153,29 @@ static void __init setup_vm_final(void)
11111153
if (end >= __pa(PAGE_OFFSET) + memory_limit)
11121154
end = __pa(PAGE_OFFSET) + memory_limit;
11131155

1114-
for (pa = start; pa < end; pa += map_size) {
1115-
va = (uintptr_t)__va(pa);
1116-
map_size = best_map_size(pa, end - pa);
1117-
1118-
create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
1119-
pgprot_from_va(va));
1120-
}
1156+
create_linear_mapping_range(start, end);
11211157
}
11221158

1159+
#ifdef CONFIG_STRICT_KERNEL_RWX
1160+
create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
1161+
create_linear_mapping_range(krodata_start,
1162+
krodata_start + krodata_size);
1163+
1164+
memblock_clear_nomap(ktext_start, ktext_size);
1165+
memblock_clear_nomap(krodata_start, krodata_size);
1166+
#endif
1167+
}
1168+
1169+
static void __init setup_vm_final(void)
1170+
{
1171+
/* Setup swapper PGD for fixmap */
1172+
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
1173+
__pa_symbol(fixmap_pgd_next),
1174+
PGDIR_SIZE, PAGE_TABLE);
1175+
1176+
/* Map the linear mapping */
1177+
create_linear_mapping_page_table();
1178+
11231179
/* Map the kernel */
11241180
if (IS_ENABLED(CONFIG_64BIT))
11251181
create_kernel_page_table(swapper_pg_dir, false);

arch/riscv/mm/physaddr.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x)
3333
return __va_to_pa_nodebug(x);
3434
}
3535
EXPORT_SYMBOL(__phys_addr_symbol);
36+
37+
phys_addr_t linear_mapping_va_to_pa(unsigned long x)
38+
{
39+
BUG_ON(!kernel_map.va_pa_offset);
40+
41+
return ((unsigned long)(x) - kernel_map.va_pa_offset);
42+
}
43+
EXPORT_SYMBOL(linear_mapping_va_to_pa);
44+
45+
void *linear_mapping_pa_to_va(unsigned long x)
46+
{
47+
BUG_ON(!kernel_map.va_pa_offset);
48+
49+
return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset));
50+
}
51+
EXPORT_SYMBOL(linear_mapping_pa_to_va);

drivers/of/fdt.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -887,12 +887,13 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
887887
static void __early_init_dt_declare_initrd(unsigned long start,
888888
unsigned long end)
889889
{
890-
/* ARM64 would cause a BUG to occur here when CONFIG_DEBUG_VM is
891-
* enabled since __va() is called too early. ARM64 does make use
892-
* of phys_initrd_start/phys_initrd_size so we can skip this
893-
* conversion.
890+
/*
891+
* __va() is not yet available this early on some platforms. In that
892+
* case, the platform uses phys_initrd_start/phys_initrd_size instead
893+
* and does the VA conversion itself.
894894
*/
895-
if (!IS_ENABLED(CONFIG_ARM64)) {
895+
if (!IS_ENABLED(CONFIG_ARM64) &&
896+
!(IS_ENABLED(CONFIG_RISCV) && IS_ENABLED(CONFIG_64BIT))) {
896897
initrd_start = (unsigned long)__va(start);
897898
initrd_end = (unsigned long)__va(end);
898899
initrd_below_start_ok = 1;

0 commit comments

Comments
 (0)