Skip to content

Commit 3d0cca0

Browse files
eugeniswilldeacon
authored andcommitted
kasan: speed up mte_set_mem_tag_range
Use DC GVA / DC GZVA to speed up KASan memory tagging in HW tags mode. The first cacheline is always tagged using STG/STZG even if the address is cacheline-aligned, as benchmarks show it is faster than a conditional branch. Signed-off-by: Evgenii Stepanov <eugenis@google.com> Co-developed-by: Peter Collingbourne <pcc@google.com> Signed-off-by: Peter Collingbourne <pcc@google.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Link: https://lore.kernel.org/r/20210521010023.3244784-1-eugenis@google.com Signed-off-by: Will Deacon <will@kernel.org>
1 parent c468154 commit 3d0cca0

1 file changed

Lines changed: 67 additions & 26 deletions

File tree

arch/arm64/include/asm/mte-kasan.h

Lines changed: 67 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -48,43 +48,84 @@ static inline u8 mte_get_random_tag(void)
4848
return mte_get_ptr_tag(addr);
4949
}
5050

51+
static inline u64 __stg_post(u64 p)
52+
{
53+
asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16"
54+
: "+r"(p)
55+
:
56+
: "memory");
57+
return p;
58+
}
59+
60+
static inline u64 __stzg_post(u64 p)
61+
{
62+
asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16"
63+
: "+r"(p)
64+
:
65+
: "memory");
66+
return p;
67+
}
68+
69+
static inline void __dc_gva(u64 p)
70+
{
71+
asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory");
72+
}
73+
74+
static inline void __dc_gzva(u64 p)
75+
{
76+
asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory");
77+
}
78+
5179
/*
5280
* Assign allocation tags for a region of memory based on the pointer tag.
5381
* Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
54-
* size must be non-zero and MTE_GRANULE_SIZE aligned.
82+
* size must be MTE_GRANULE_SIZE aligned.
5583
*/
56-
static inline void mte_set_mem_tag_range(void *addr, size_t size,
57-
u8 tag, bool init)
84+
static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
85+
bool init)
5886
{
59-
u64 curr, end;
87+
u64 curr, mask, dczid_bs, end1, end2, end3;
6088

61-
if (!size)
62-
return;
89+
/* Read DC G(Z)VA block size from the system register. */
90+
dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf);
6391

6492
curr = (u64)__tag_set(addr, tag);
65-
end = curr + size;
93+
mask = dczid_bs - 1;
94+
/* STG/STZG up to the end of the first block. */
95+
end1 = curr | mask;
96+
end3 = curr + size;
97+
/* DC GVA / GZVA in [end1, end2) */
98+
end2 = end3 & ~mask;
6699

67100
/*
68-
* 'asm volatile' is required to prevent the compiler to move
69-
* the statement outside of the loop.
101+
* The following code uses STG on the first DC GVA block even if the
102+
* start address is aligned - it appears to be faster than an alignment
103+
* check + conditional branch. Also, if the range size is at least 2 DC
104+
* GVA blocks, the first two loops can use post-condition to save one
105+
* branch each.
70106
*/
71-
if (init) {
72-
do {
73-
asm volatile(__MTE_PREAMBLE "stzg %0, [%0]"
74-
:
75-
: "r" (curr)
76-
: "memory");
77-
curr += MTE_GRANULE_SIZE;
78-
} while (curr != end);
79-
} else {
80-
do {
81-
asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
82-
:
83-
: "r" (curr)
84-
: "memory");
85-
curr += MTE_GRANULE_SIZE;
86-
} while (curr != end);
87-
}
107+
#define SET_MEMTAG_RANGE(stg_post, dc_gva) \
108+
do { \
109+
if (size >= 2 * dczid_bs) { \
110+
do { \
111+
curr = stg_post(curr); \
112+
} while (curr < end1); \
113+
\
114+
do { \
115+
dc_gva(curr); \
116+
curr += dczid_bs; \
117+
} while (curr < end2); \
118+
} \
119+
\
120+
while (curr < end3) \
121+
curr = stg_post(curr); \
122+
} while (0)
123+
124+
if (init)
125+
SET_MEMTAG_RANGE(__stzg_post, __dc_gzva);
126+
else
127+
SET_MEMTAG_RANGE(__stg_post, __dc_gva);
128+
#undef SET_MEMTAG_RANGE
88129
}
89130

90131
void mte_enable_kernel_sync(void);

0 commit comments

Comments
 (0)