Skip to content

Commit fdceddb

Browse files
committed
Merge branch 'for-next/mte' into for-next/core
KASAN optimisations for the hardware tagging (MTE) implementation. * for-next/mte: kasan: disable freed user page poisoning with HW tags arm64: mte: handle tags zeroing at page allocation time kasan: use separate (un)poison implementation for integrated init mm: arch: remove indirection level in alloc_zeroed_user_highpage_movable() kasan: speed up mte_set_mem_tag_range
2 parents 81ad4bb + c275c5c commit fdceddb

20 files changed

Lines changed: 309 additions & 135 deletions

File tree

arch/alpha/include/asm/page.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
extern void clear_page(void *page);
1818
#define clear_user_page(page, vaddr, pg) clear_page(page)
1919

20-
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
21-
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr)
22-
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
20+
#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
21+
alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr)
22+
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
2323

2424
extern void copy_page(void * _to, void * _from);
2525
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)

arch/arm64/include/asm/mte-kasan.h

Lines changed: 67 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -48,43 +48,84 @@ static inline u8 mte_get_random_tag(void)
4848
return mte_get_ptr_tag(addr);
4949
}
5050

51+
static inline u64 __stg_post(u64 p)
52+
{
53+
asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16"
54+
: "+r"(p)
55+
:
56+
: "memory");
57+
return p;
58+
}
59+
60+
static inline u64 __stzg_post(u64 p)
61+
{
62+
asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16"
63+
: "+r"(p)
64+
:
65+
: "memory");
66+
return p;
67+
}
68+
69+
static inline void __dc_gva(u64 p)
70+
{
71+
asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory");
72+
}
73+
74+
static inline void __dc_gzva(u64 p)
75+
{
76+
asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory");
77+
}
78+
5179
/*
5280
* Assign allocation tags for a region of memory based on the pointer tag.
5381
* Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
54-
* size must be non-zero and MTE_GRANULE_SIZE aligned.
82+
* size must be MTE_GRANULE_SIZE aligned.
5583
*/
56-
static inline void mte_set_mem_tag_range(void *addr, size_t size,
57-
u8 tag, bool init)
84+
static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
85+
bool init)
5886
{
59-
u64 curr, end;
87+
u64 curr, mask, dczid_bs, end1, end2, end3;
6088

61-
if (!size)
62-
return;
89+
/* Read DC G(Z)VA block size from the system register. */
90+
dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf);
6391

6492
curr = (u64)__tag_set(addr, tag);
65-
end = curr + size;
93+
mask = dczid_bs - 1;
94+
/* STG/STZG up to the end of the first block. */
95+
end1 = curr | mask;
96+
end3 = curr + size;
97+
/* DC GVA / GZVA in [end1, end2) */
98+
end2 = end3 & ~mask;
6699

67100
/*
68-
* 'asm volatile' is required to prevent the compiler to move
69-
* the statement outside of the loop.
101+
* The following code uses STG on the first DC GVA block even if the
102+
* start address is aligned - it appears to be faster than an alignment
103+
* check + conditional branch. Also, if the range size is at least 2 DC
104+
* GVA blocks, the first two loops can use post-condition to save one
105+
* branch each.
70106
*/
71-
if (init) {
72-
do {
73-
asm volatile(__MTE_PREAMBLE "stzg %0, [%0]"
74-
:
75-
: "r" (curr)
76-
: "memory");
77-
curr += MTE_GRANULE_SIZE;
78-
} while (curr != end);
79-
} else {
80-
do {
81-
asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
82-
:
83-
: "r" (curr)
84-
: "memory");
85-
curr += MTE_GRANULE_SIZE;
86-
} while (curr != end);
87-
}
107+
#define SET_MEMTAG_RANGE(stg_post, dc_gva) \
108+
do { \
109+
if (size >= 2 * dczid_bs) { \
110+
do { \
111+
curr = stg_post(curr); \
112+
} while (curr < end1); \
113+
\
114+
do { \
115+
dc_gva(curr); \
116+
curr += dczid_bs; \
117+
} while (curr < end2); \
118+
} \
119+
\
120+
while (curr < end3) \
121+
curr = stg_post(curr); \
122+
} while (0)
123+
124+
if (init)
125+
SET_MEMTAG_RANGE(__stzg_post, __dc_gzva);
126+
else
127+
SET_MEMTAG_RANGE(__stg_post, __dc_gva);
128+
#undef SET_MEMTAG_RANGE
88129
}
89130

90131
void mte_enable_kernel_sync(void);

arch/arm64/include/asm/mte.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void mte_free_tag_storage(char *storage);
3737
/* track which pages have valid allocation tags */
3838
#define PG_mte_tagged PG_arch_2
3939

40+
void mte_zero_clear_page_tags(void *addr);
4041
void mte_sync_tags(pte_t *ptep, pte_t pte);
4142
void mte_copy_page_tags(void *kto, const void *kfrom);
4243
void mte_thread_init_user(void);
@@ -53,6 +54,9 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
5354
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
5455
#define PG_mte_tagged 0
5556

57+
static inline void mte_zero_clear_page_tags(void *addr)
58+
{
59+
}
5660
static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
5761
{
5862
}

arch/arm64/include/asm/page.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef __ASSEMBLY__
1414

1515
#include <linux/personality.h> /* for READ_IMPLIES_EXEC */
16+
#include <linux/types.h> /* for gfp_t */
1617
#include <asm/pgtable-types.h>
1718

1819
struct page;
@@ -28,9 +29,12 @@ void copy_user_highpage(struct page *to, struct page *from,
2829
void copy_highpage(struct page *to, struct page *from);
2930
#define __HAVE_ARCH_COPY_HIGHPAGE
3031

31-
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
32-
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
33-
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
32+
struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
33+
unsigned long vaddr);
34+
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
35+
36+
void tag_clear_highpage(struct page *to);
37+
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
3438

3539
#define clear_user_page(page, vaddr, pg) clear_page(page)
3640
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)

arch/arm64/lib/mte.S

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,26 @@ SYM_FUNC_START(mte_clear_page_tags)
3636
ret
3737
SYM_FUNC_END(mte_clear_page_tags)
3838

39+
/*
40+
* Zero the page and tags at the same time
41+
*
42+
* Parameters:
43+
* x0 - address to the beginning of the page
44+
*/
45+
SYM_FUNC_START(mte_zero_clear_page_tags)
46+
mrs x1, dczid_el0
47+
and w1, w1, #0xf
48+
mov x2, #4
49+
lsl x1, x2, x1
50+
and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag
51+
52+
1: dc gzva, x0
53+
add x0, x0, x1
54+
tst x0, #(PAGE_SIZE - 1)
55+
b.ne 1b
56+
ret
57+
SYM_FUNC_END(mte_zero_clear_page_tags)
58+
3959
/*
4060
* Copy the tags from the source page to the destination one
4161
* x0 - address of the destination page

arch/arm64/mm/fault.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,3 +919,29 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
919919
debug_exception_exit(regs);
920920
}
921921
NOKPROBE_SYMBOL(do_debug_exception);
922+
923+
/*
924+
* Used during anonymous page fault handling.
925+
*/
926+
struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
927+
unsigned long vaddr)
928+
{
929+
gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO;
930+
931+
/*
932+
* If the page is mapped with PROT_MTE, initialise the tags at the
933+
* point of allocation and page zeroing as this is usually faster than
934+
* separate DC ZVA and STGM.
935+
*/
936+
if (vma->vm_flags & VM_MTE)
937+
flags |= __GFP_ZEROTAGS;
938+
939+
return alloc_page_vma(flags, vma, vaddr);
940+
}
941+
942+
void tag_clear_highpage(struct page *page)
943+
{
944+
mte_zero_clear_page_tags(page_address(page));
945+
page_kasan_tag_reset(page);
946+
set_bit(PG_mte_tagged, &page->flags);
947+
}

arch/arm64/mm/proc.S

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@
4646
#endif
4747

4848
#ifdef CONFIG_KASAN_HW_TAGS
49-
#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
49+
#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
5050
#else
51-
#define TCR_KASAN_HW_FLAGS 0
51+
/*
52+
* The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
53+
* TBI being enabled at EL1.
54+
*/
55+
#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
5256
#endif
5357

5458
/*
@@ -454,7 +458,7 @@ SYM_FUNC_START(__cpu_setup)
454458
msr_s SYS_TFSRE0_EL1, xzr
455459

456460
/* set the TCR_EL1 bits */
457-
mov_q x10, TCR_KASAN_HW_FLAGS
461+
mov_q x10, TCR_MTE_FLAGS
458462
orr tcr, tcr, x10
459463
1:
460464
#endif

arch/ia64/include/asm/page.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,16 +82,16 @@ do { \
8282
} while (0)
8383

8484

85-
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
85+
#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
8686
({ \
8787
struct page *page = alloc_page_vma( \
88-
GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr); \
88+
GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr); \
8989
if (page) \
9090
flush_dcache_page(page); \
9191
page; \
9292
})
9393

94-
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
94+
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
9595

9696
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
9797

arch/m68k/include/asm/page_no.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ extern unsigned long memory_end;
1313
#define clear_user_page(page, vaddr, pg) clear_page(page)
1414
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
1515

16-
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
17-
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
18-
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
16+
#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
17+
alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
18+
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
1919

2020
#define __pa(vaddr) ((unsigned long)(vaddr))
2121
#define __va(paddr) ((void *)((unsigned long)(paddr)))

arch/s390/include/asm/page.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,9 @@ static inline void copy_page(void *to, void *from)
6868
#define clear_user_page(page, vaddr, pg) clear_page(page)
6969
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
7070

71-
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
72-
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
73-
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
71+
#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
72+
alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
73+
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
7474

7575
/*
7676
* These are used to make use of C type-checking..

0 commit comments

Comments
 (0)