Skip to content

Commit e015eb6

Browse files
Merge patch series "riscv: Use READ_ONCE()/WRITE_ONCE() for pte accesses"
Alexandre Ghiti <alexghiti@rivosinc.com> says: This series is a follow-up for riscv of a recent series from Ryan [1] which converts all direct dereferences of pte_t into a ptet_get() access. The goal here for riscv is to use READ_ONCE()/WRITE_ONCE() for all page table entries accesses to avoid any compiler transformation when the hardware can concurrently modify the page tables entries (A/D bits for example). I went a bit further and added pud/p4d/pgd_get() helpers as such concurrent modifications can happen too at those levels. [1] https://lore.kernel.org/all/20230612151545.3317766-1-ryan.roberts@arm.com/ * b4-shazam-merge: riscv: Use accessors to page table entries instead of direct dereference riscv: mm: Only compile pgtable.c if MMU mm: Introduce pudp/p4dp/pgdp_get() functions riscv: Use WRITE_ONCE() when setting page table entries Link: https://lore.kernel.org/r/20231213203001.179237-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents 4a6b93f + edf9556 commit e015eb6

13 files changed

Lines changed: 157 additions & 120 deletions

File tree

arch/arm/include/asm/pgtable.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
151151

152152
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
153153

154+
#define pgdp_get(pgpd) READ_ONCE(*pgdp)
155+
154156
#define pud_page(pud) pmd_page(__pmd(pud_val(pud)))
155157
#define pud_write(pud) pmd_write(__pmd(pud_val(pud)))
156158

arch/riscv/include/asm/kfence.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
1818
pte_t *pte = virt_to_kpte(addr);
1919

2020
if (protect)
21-
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
21+
set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT));
2222
else
23-
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
23+
set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT));
2424

2525
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
2626

arch/riscv/include/asm/pgtable-64.h

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ static inline int pud_user(pud_t pud)
202202

203203
static inline void set_pud(pud_t *pudp, pud_t pud)
204204
{
205-
*pudp = pud;
205+
WRITE_ONCE(*pudp, pud);
206206
}
207207

208208
static inline void pud_clear(pud_t *pudp)
@@ -278,7 +278,7 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
278278
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
279279
{
280280
if (pgtable_l4_enabled)
281-
*p4dp = p4d;
281+
WRITE_ONCE(*p4dp, p4d);
282282
else
283283
set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
284284
}
@@ -340,18 +340,12 @@ static inline struct page *p4d_page(p4d_t p4d)
340340
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
341341

342342
#define pud_offset pud_offset
343-
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
344-
{
345-
if (pgtable_l4_enabled)
346-
return p4d_pgtable(*p4d) + pud_index(address);
347-
348-
return (pud_t *)p4d;
349-
}
343+
pud_t *pud_offset(p4d_t *p4d, unsigned long address);
350344

351345
static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
352346
{
353347
if (pgtable_l5_enabled)
354-
*pgdp = pgd;
348+
WRITE_ONCE(*pgdp, pgd);
355349
else
356350
set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
357351
}
@@ -404,12 +398,6 @@ static inline struct page *pgd_page(pgd_t pgd)
404398
#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
405399

406400
#define p4d_offset p4d_offset
407-
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
408-
{
409-
if (pgtable_l5_enabled)
410-
return pgd_pgtable(*pgd) + p4d_index(address);
411-
412-
return (p4d_t *)pgd;
413-
}
401+
p4d_t *p4d_offset(pgd_t *pgd, unsigned long address);
414402

415403
#endif /* _ASM_RISCV_PGTABLE_64_H */

arch/riscv/include/asm/pgtable.h

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ static inline int pmd_leaf(pmd_t pmd)
248248

249249
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
250250
{
251-
*pmdp = pmd;
251+
WRITE_ONCE(*pmdp, pmd);
252252
}
253253

254254
static inline void pmd_clear(pmd_t *pmdp)
@@ -510,7 +510,7 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
510510
*/
511511
static inline void set_pte(pte_t *ptep, pte_t pteval)
512512
{
513-
*ptep = pteval;
513+
WRITE_ONCE(*ptep, pteval);
514514
}
515515

516516
void flush_icache_pte(pte_t pte);
@@ -544,19 +544,12 @@ static inline void pte_clear(struct mm_struct *mm,
544544
__set_pte_at(ptep, __pte(0));
545545
}
546546

547-
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
548-
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
549-
unsigned long address, pte_t *ptep,
550-
pte_t entry, int dirty)
551-
{
552-
if (!pte_same(*ptep, entry))
553-
__set_pte_at(ptep, entry);
554-
/*
555-
* update_mmu_cache will unconditionally execute, handling both
556-
* the case that the PTE changed and the spurious fault case.
557-
*/
558-
return true;
559-
}
547+
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */
548+
extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
549+
pte_t *ptep, pte_t entry, int dirty);
550+
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */
551+
extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address,
552+
pte_t *ptep);
560553

561554
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
562555
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
@@ -569,16 +562,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
569562
return pte;
570563
}
571564

572-
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
573-
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
574-
unsigned long address,
575-
pte_t *ptep)
576-
{
577-
if (!pte_young(*ptep))
578-
return 0;
579-
return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep));
580-
}
581-
582565
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
583566
static inline void ptep_set_wrprotect(struct mm_struct *mm,
584567
unsigned long address, pte_t *ptep)

arch/riscv/kernel/efi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
6060
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
6161
{
6262
efi_memory_desc_t *md = data;
63-
pte_t pte = READ_ONCE(*ptep);
63+
pte_t pte = ptep_get(ptep);
6464
unsigned long val;
6565

6666
if (md->attribute & EFI_MEMORY_RO) {

arch/riscv/kvm/mmu.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
103103
*ptep_level = current_level;
104104
ptep = (pte_t *)kvm->arch.pgd;
105105
ptep = &ptep[gstage_pte_index(addr, current_level)];
106-
while (ptep && pte_val(*ptep)) {
106+
while (ptep && pte_val(ptep_get(ptep))) {
107107
if (gstage_pte_leaf(ptep)) {
108108
*ptep_level = current_level;
109109
*ptepp = ptep;
@@ -113,7 +113,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
113113
if (current_level) {
114114
current_level--;
115115
*ptep_level = current_level;
116-
ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
116+
ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
117117
ptep = &ptep[gstage_pte_index(addr, current_level)];
118118
} else {
119119
ptep = NULL;
@@ -149,25 +149,25 @@ static int gstage_set_pte(struct kvm *kvm, u32 level,
149149
if (gstage_pte_leaf(ptep))
150150
return -EEXIST;
151151

152-
if (!pte_val(*ptep)) {
152+
if (!pte_val(ptep_get(ptep))) {
153153
if (!pcache)
154154
return -ENOMEM;
155155
next_ptep = kvm_mmu_memory_cache_alloc(pcache);
156156
if (!next_ptep)
157157
return -ENOMEM;
158-
*ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
159-
__pgprot(_PAGE_TABLE));
158+
set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
159+
__pgprot(_PAGE_TABLE)));
160160
} else {
161161
if (gstage_pte_leaf(ptep))
162162
return -EEXIST;
163-
next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
163+
next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
164164
}
165165

166166
current_level--;
167167
ptep = &next_ptep[gstage_pte_index(addr, current_level)];
168168
}
169169

170-
*ptep = *new_pte;
170+
set_pte(ptep, *new_pte);
171171
if (gstage_pte_leaf(ptep))
172172
gstage_remote_tlb_flush(kvm, current_level, addr);
173173

@@ -239,11 +239,11 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
239239

240240
BUG_ON(addr & (page_size - 1));
241241

242-
if (!pte_val(*ptep))
242+
if (!pte_val(ptep_get(ptep)))
243243
return;
244244

245245
if (ptep_level && !gstage_pte_leaf(ptep)) {
246-
next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
246+
next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
247247
next_ptep_level = ptep_level - 1;
248248
ret = gstage_level_to_page_size(next_ptep_level,
249249
&next_page_size);
@@ -261,7 +261,7 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
261261
if (op == GSTAGE_OP_CLEAR)
262262
set_pte(ptep, __pte(0));
263263
else if (op == GSTAGE_OP_WP)
264-
set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
264+
set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE));
265265
gstage_remote_tlb_flush(kvm, ptep_level, addr);
266266
}
267267
}
@@ -603,7 +603,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
603603
&ptep, &ptep_level))
604604
return false;
605605

606-
return pte_young(*ptep);
606+
return pte_young(ptep_get(ptep));
607607
}
608608

609609
int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,

arch/riscv/mm/Makefile

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@ endif
1313
KCOV_INSTRUMENT_init.o := n
1414

1515
obj-y += init.o
16-
obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o
16+
obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o
1717
obj-y += cacheflush.o
1818
obj-y += context.o
19-
obj-y += pgtable.o
2019
obj-y += pmem.o
2120

2221
ifeq ($(CONFIG_MMU),y)

arch/riscv/mm/fault.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,36 +136,36 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
136136
pgd = (pgd_t *)pfn_to_virt(pfn) + index;
137137
pgd_k = init_mm.pgd + index;
138138

139-
if (!pgd_present(*pgd_k)) {
139+
if (!pgd_present(pgdp_get(pgd_k))) {
140140
no_context(regs, addr);
141141
return;
142142
}
143-
set_pgd(pgd, *pgd_k);
143+
set_pgd(pgd, pgdp_get(pgd_k));
144144

145145
p4d_k = p4d_offset(pgd_k, addr);
146-
if (!p4d_present(*p4d_k)) {
146+
if (!p4d_present(p4dp_get(p4d_k))) {
147147
no_context(regs, addr);
148148
return;
149149
}
150150

151151
pud_k = pud_offset(p4d_k, addr);
152-
if (!pud_present(*pud_k)) {
152+
if (!pud_present(pudp_get(pud_k))) {
153153
no_context(regs, addr);
154154
return;
155155
}
156-
if (pud_leaf(*pud_k))
156+
if (pud_leaf(pudp_get(pud_k)))
157157
goto flush_tlb;
158158

159159
/*
160160
* Since the vmalloc area is global, it is unnecessary
161161
* to copy individual PTEs
162162
*/
163163
pmd_k = pmd_offset(pud_k, addr);
164-
if (!pmd_present(*pmd_k)) {
164+
if (!pmd_present(pmdp_get(pmd_k))) {
165165
no_context(regs, addr);
166166
return;
167167
}
168-
if (pmd_leaf(*pmd_k))
168+
if (pmd_leaf(pmdp_get(pmd_k)))
169169
goto flush_tlb;
170170

171171
/*
@@ -175,7 +175,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
175175
* silently loop forever.
176176
*/
177177
pte_k = pte_offset_kernel(pmd_k, addr);
178-
if (!pte_present(*pte_k)) {
178+
if (!pte_present(ptep_get(pte_k))) {
179179
no_context(regs, addr);
180180
return;
181181
}

arch/riscv/mm/hugetlbpage.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
5454
}
5555

5656
if (sz == PMD_SIZE) {
57-
if (want_pmd_share(vma, addr) && pud_none(*pud))
57+
if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud)))
5858
pte = huge_pmd_share(mm, vma, addr, pud);
5959
else
6060
pte = (pte_t *)pmd_alloc(mm, pud, addr);
@@ -93,27 +93,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
9393
pmd_t *pmd;
9494

9595
pgd = pgd_offset(mm, addr);
96-
if (!pgd_present(*pgd))
96+
if (!pgd_present(pgdp_get(pgd)))
9797
return NULL;
9898

9999
p4d = p4d_offset(pgd, addr);
100-
if (!p4d_present(*p4d))
100+
if (!p4d_present(p4dp_get(p4d)))
101101
return NULL;
102102

103103
pud = pud_offset(p4d, addr);
104104
if (sz == PUD_SIZE)
105105
/* must be pud huge, non-present or none */
106106
return (pte_t *)pud;
107107

108-
if (!pud_present(*pud))
108+
if (!pud_present(pudp_get(pud)))
109109
return NULL;
110110

111111
pmd = pmd_offset(pud, addr);
112112
if (sz == PMD_SIZE)
113113
/* must be pmd huge, non-present or none */
114114
return (pte_t *)pmd;
115115

116-
if (!pmd_present(*pmd))
116+
if (!pmd_present(pmdp_get(pmd)))
117117
return NULL;
118118

119119
for_each_napot_order(order) {
@@ -293,7 +293,7 @@ void huge_pte_clear(struct mm_struct *mm,
293293
pte_t *ptep,
294294
unsigned long sz)
295295
{
296-
pte_t pte = READ_ONCE(*ptep);
296+
pte_t pte = ptep_get(ptep);
297297
int i, pte_num;
298298

299299
if (!pte_napot(pte)) {

0 commit comments

Comments
 (0)