@@ -48,43 +48,84 @@ static inline u8 mte_get_random_tag(void)
4848 return mte_get_ptr_tag (addr );
4949}
5050
51+ static inline u64 __stg_post (u64 p )
52+ {
53+ asm volatile (__MTE_PREAMBLE "stg %0, [%0], #16"
54+ : "+r" (p )
55+ :
56+ : "memory" );
57+ return p ;
58+ }
59+
60+ static inline u64 __stzg_post (u64 p )
61+ {
62+ asm volatile (__MTE_PREAMBLE "stzg %0, [%0], #16"
63+ : "+r" (p )
64+ :
65+ : "memory" );
66+ return p ;
67+ }
68+
69+ static inline void __dc_gva (u64 p )
70+ {
71+ asm volatile (__MTE_PREAMBLE "dc gva, %0" : : "r" (p ) : "memory" );
72+ }
73+
74+ static inline void __dc_gzva (u64 p )
75+ {
76+ asm volatile (__MTE_PREAMBLE "dc gzva, %0" : : "r" (p ) : "memory" );
77+ }
78+
5179/*
5280 * Assign allocation tags for a region of memory based on the pointer tag.
5381 * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
54- * size must be non-zero and MTE_GRANULE_SIZE aligned.
82+ * size must be MTE_GRANULE_SIZE aligned.
5583 */
56- static inline void mte_set_mem_tag_range (void * addr , size_t size ,
57- u8 tag , bool init )
84+ static inline void mte_set_mem_tag_range (void * addr , size_t size , u8 tag ,
85+ bool init )
5886{
59- u64 curr , end ;
87+ u64 curr , mask , dczid_bs , end1 , end2 , end3 ;
6088
61- if (! size )
62- return ;
89+ /* Read DC G(Z)VA block size from the system register. */
90+ dczid_bs = 4ul << ( read_cpuid ( DCZID_EL0 ) & 0xf ) ;
6391
6492 curr = (u64 )__tag_set (addr , tag );
65- end = curr + size ;
93+ mask = dczid_bs - 1 ;
94+ /* STG/STZG up to the end of the first block. */
95+ end1 = curr | mask ;
96+ end3 = curr + size ;
97+ /* DC GVA / GZVA in [end1, end2) */
98+ end2 = end3 & ~mask ;
6699
67100 /*
68- * 'asm volatile' is required to prevent the compiler to move
69- * the statement outside of the loop.
101+ * The following code uses STG on the first DC GVA block even if the
102+ * start address is aligned - it appears to be faster than an alignment
103+ * check + conditional branch. Also, if the range size is at least 2 DC
104+ * GVA blocks, the first two loops can use post-condition to save one
105+ * branch each.
70106 */
71- if (init ) {
72- do {
73- asm volatile (__MTE_PREAMBLE "stzg %0, [%0]"
74- :
75- : "r" (curr )
76- : "memory" );
77- curr += MTE_GRANULE_SIZE ;
78- } while (curr != end );
79- } else {
80- do {
81- asm volatile (__MTE_PREAMBLE "stg %0, [%0]"
82- :
83- : "r" (curr )
84- : "memory" );
85- curr += MTE_GRANULE_SIZE ;
86- } while (curr != end );
87- }
107+ #define SET_MEMTAG_RANGE (stg_post , dc_gva ) \
108+ do { \
109+ if (size >= 2 * dczid_bs) { \
110+ do { \
111+ curr = stg_post(curr); \
112+ } while (curr < end1); \
113+ \
114+ do { \
115+ dc_gva(curr); \
116+ curr += dczid_bs; \
117+ } while (curr < end2); \
118+ } \
119+ \
120+ while (curr < end3) \
121+ curr = stg_post(curr); \
122+ } while (0)
123+
124+ if (init )
125+ SET_MEMTAG_RANGE (__stzg_post , __dc_gzva );
126+ else
127+ SET_MEMTAG_RANGE (__stg_post , __dc_gva );
128+ #undef SET_MEMTAG_RANGE
88129}
89130
90131void mte_enable_kernel_sync (void );
0 commit comments