1010
1111#ifndef __ASSEMBLY__
1212
13+ #include <linux/bitfield.h>
1314#include <linux/mm_types.h>
1415#include <linux/sched.h>
1516#include <asm/cputype.h>
5960 __ta; \
6061 })
6162
63+ /*
64+ * Get translation granule of the system, which is decided by
65+ * PAGE_SIZE. Used by TTL.
66+ * - 4KB : 1
67+ * - 16KB : 2
68+ * - 64KB : 3
69+ */
70+ #define TLBI_TTL_TG_4K 1
71+ #define TLBI_TTL_TG_16K 2
72+ #define TLBI_TTL_TG_64K 3
73+
74+ static inline unsigned long get_trans_granule (void )
75+ {
76+ switch (PAGE_SIZE ) {
77+ case SZ_4K :
78+ return TLBI_TTL_TG_4K ;
79+ case SZ_16K :
80+ return TLBI_TTL_TG_16K ;
81+ case SZ_64K :
82+ return TLBI_TTL_TG_64K ;
83+ default :
84+ return 0 ;
85+ }
86+ }
87+
88+ /*
89+ * Level-based TLBI operations.
90+ *
91+ * When ARMv8.4-TTL exists, TLBI operations take an additional hint for
92+ * the level at which the invalidation must take place. If the level is
93+ * wrong, no invalidation may take place. In the case where the level
94+ * cannot be easily determined, a 0 value for the level parameter will
95+ * perform a non-hinted invalidation.
96+ *
97+ * For Stage-2 invalidation, use the level values provided to that effect
98+ * in asm/stage2_pgtable.h.
99+ */
100+ #define TLBI_TTL_MASK GENMASK_ULL(47, 44)
101+
102+ #define __tlbi_level (op , addr , level ) do { \
103+ u64 arg = addr; \
104+ \
105+ if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
106+ level) { \
107+ u64 ttl = level & 3; \
108+ ttl |= get_trans_granule() << 2; \
109+ arg &= ~TLBI_TTL_MASK; \
110+ arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
111+ } \
112+ \
113+ __tlbi(op, arg); \
114+ } while(0)
115+
116+ #define __tlbi_user_level (op , arg , level ) do { \
117+ if (arm64_kernel_unmapped_at_el0()) \
118+ __tlbi_level(op, (arg | USER_ASID_FLAG), level); \
119+ } while (0)
120+
121+ /*
122+ * This macro creates a properly formatted VA operand for the TLB RANGE.
123+ * The value bit assignments are:
124+ *
125+ * +----------+------+-------+-------+-------+----------------------+
126+ * | ASID | TG | SCALE | NUM | TTL | BADDR |
127+ * +-----------------+-------+-------+-------+----------------------+
128+ * |63 48|47 46|45 44|43 39|38 37|36 0|
129+ *
130+ * The address range is determined by below formula:
131+ * [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
132+ *
133+ */
134+ #define __TLBI_VADDR_RANGE (addr , asid , scale , num , ttl ) \
135+ ({ \
136+ unsigned long __ta = (addr) >> PAGE_SHIFT; \
137+ __ta &= GENMASK_ULL(36, 0); \
138+ __ta |= (unsigned long)(ttl) << 37; \
139+ __ta |= (unsigned long)(num) << 39; \
140+ __ta |= (unsigned long)(scale) << 44; \
141+ __ta |= get_trans_granule() << 46; \
142+ __ta |= (unsigned long)(asid) << 48; \
143+ __ta; \
144+ })
145+
146+ /* These macros are used by the TLBI RANGE feature. */
147+ #define __TLBI_RANGE_PAGES (num , scale ) \
148+ ((unsigned long)((num) + 1) << (5 * (scale) + 1))
149+ #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
150+
151+ /*
152+ * Generate 'num' values from -1 to 30 with -1 rejected by the
153+ * __flush_tlb_range() loop below.
154+ */
155+ #define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
156+ #define __TLBI_RANGE_NUM (pages , scale ) \
157+ ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
158+
62159/*
63160 * TLB Invalidation
64161 * ================
@@ -179,34 +276,83 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
179276
180277static inline void __flush_tlb_range (struct vm_area_struct * vma ,
181278 unsigned long start , unsigned long end ,
182- unsigned long stride , bool last_level )
279+ unsigned long stride , bool last_level ,
280+ int tlb_level )
183281{
282+ int num = 0 ;
283+ int scale = 0 ;
184284 unsigned long asid = ASID (vma -> vm_mm );
185285 unsigned long addr ;
286+ unsigned long pages ;
186287
187288 start = round_down (start , stride );
188289 end = round_up (end , stride );
290+ pages = (end - start ) >> PAGE_SHIFT ;
189291
190- if ((end - start ) >= (MAX_TLBI_OPS * stride )) {
292+ /*
293+ * When not uses TLB range ops, we can handle up to
294+ * (MAX_TLBI_OPS - 1) pages;
295+ * When uses TLB range ops, we can handle up to
296+ * (MAX_TLBI_RANGE_PAGES - 1) pages.
297+ */
298+ if ((!system_supports_tlb_range () &&
299+ (end - start ) >= (MAX_TLBI_OPS * stride )) ||
300+ pages >= MAX_TLBI_RANGE_PAGES ) {
191301 flush_tlb_mm (vma -> vm_mm );
192302 return ;
193303 }
194304
195- /* Convert the stride into units of 4k */
196- stride >>= 12 ;
305+ dsb (ishst );
197306
198- start = __TLBI_VADDR (start , asid );
199- end = __TLBI_VADDR (end , asid );
307+ /*
308+ * When the CPU does not support TLB range operations, flush the TLB
309+ * entries one by one at the granularity of 'stride'. If the the TLB
310+ * range ops are supported, then:
311+ *
312+ * 1. If 'pages' is odd, flush the first page through non-range
313+ * operations;
314+ *
315+ * 2. For remaining pages: the minimum range granularity is decided
316+ * by 'scale', so multiple range TLBI operations may be required.
317+ * Start from scale = 0, flush the corresponding number of pages
318+ * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
319+ * until no pages left.
320+ *
321+ * Note that certain ranges can be represented by either num = 31 and
322+ * scale or num = 0 and scale + 1. The loop below favours the latter
323+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
324+ */
325+ while (pages > 0 ) {
326+ if (!system_supports_tlb_range () ||
327+ pages % 2 == 1 ) {
328+ addr = __TLBI_VADDR (start , asid );
329+ if (last_level ) {
330+ __tlbi_level (vale1is , addr , tlb_level );
331+ __tlbi_user_level (vale1is , addr , tlb_level );
332+ } else {
333+ __tlbi_level (vae1is , addr , tlb_level );
334+ __tlbi_user_level (vae1is , addr , tlb_level );
335+ }
336+ start += stride ;
337+ pages -= stride >> PAGE_SHIFT ;
338+ continue ;
339+ }
200340
201- dsb (ishst );
202- for (addr = start ; addr < end ; addr += stride ) {
203- if (last_level ) {
204- __tlbi (vale1is , addr );
205- __tlbi_user (vale1is , addr );
206- } else {
207- __tlbi (vae1is , addr );
208- __tlbi_user (vae1is , addr );
341+ num = __TLBI_RANGE_NUM (pages , scale );
342+ if (num >= 0 ) {
343+ addr = __TLBI_VADDR_RANGE (start , asid , scale ,
344+ num , tlb_level );
345+ if (last_level ) {
346+ __tlbi (rvale1is , addr );
347+ __tlbi_user (rvale1is , addr );
348+ } else {
349+ __tlbi (rvae1is , addr );
350+ __tlbi_user (rvae1is , addr );
351+ }
352+ start += __TLBI_RANGE_PAGES (num , scale ) << PAGE_SHIFT ;
353+ pages -= __TLBI_RANGE_PAGES (num , scale );
209354 }
355+ scale ++ ;
210356 }
211357 dsb (ish );
212358}
@@ -217,8 +363,9 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
217363 /*
218364 * We cannot use leaf-only invalidation here, since we may be invalidating
219365 * table entries as part of collapsing hugepages or moving page tables.
366+ * Set the tlb_level to 0 because we can not get enough information here.
220367 */
221- __flush_tlb_range (vma , start , end , PAGE_SIZE , false);
368+ __flush_tlb_range (vma , start , end , PAGE_SIZE , false, 0 );
222369}
223370
224371static inline void flush_tlb_kernel_range (unsigned long start , unsigned long end )
0 commit comments