Skip to content

Commit 92598ae

Browse files
committed
Merge tag 'x86_mm_for_v6.0_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Borislav Petkov: - Rename a PKRU macro to make more sense when reading the code - Update pkeys documentation - Avoid reading contended mm's TLB generation var if not absolutely necessary along with fixing a case where arch_tlbbatch_flush() doesn't adhere to the generation scheme and thus violates the conditions for the above avoidance. * tag 'x86_mm_for_v6.0_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm/tlb: Ignore f->new_tlb_gen when zero x86/pkeys: Clarify PKRU_AD_KEY macro Documentation/protection-keys: Clean up documentation for User Space pkeys x86/mm/tlb: Avoid reading mm_tlb_gen when possible
2 parents 94e37e8 + 8f1d56f commit 92598ae

4 files changed

Lines changed: 59 additions & 32 deletions

File tree

Documentation/core-api/protection-keys.rst

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,29 @@
44
Memory Protection Keys
55
======================
66

7-
Memory Protection Keys for Userspace (PKU aka PKEYs) is a feature
8-
which is found on Intel's Skylake (and later) "Scalable Processor"
9-
Server CPUs. It will be available in future non-server Intel parts
10-
and future AMD processors.
11-
12-
For anyone wishing to test or use this feature, it is available in
13-
Amazon's EC2 C5 instances and is known to work there using an Ubuntu
14-
17.04 image.
15-
16-
Memory Protection Keys provides a mechanism for enforcing page-based
17-
protections, but without requiring modification of the page tables
18-
when an application changes protection domains. It works by
19-
dedicating 4 previously ignored bits in each page table entry to a
20-
"protection key", giving 16 possible keys.
21-
22-
There is also a new user-accessible register (PKRU) with two separate
23-
bits (Access Disable and Write Disable) for each key. Being a CPU
24-
register, PKRU is inherently thread-local, potentially giving each
7+
Memory Protection Keys provide a mechanism for enforcing page-based
8+
protections, but without requiring modification of the page tables when an
9+
application changes protection domains.
10+
11+
Pkeys Userspace (PKU) is a feature which can be found on:
12+
* Intel server CPUs, Skylake and later
13+
* Intel client CPUs, Tiger Lake (11th Gen Core) and later
14+
* Future AMD CPUs
15+
16+
Pkeys work by dedicating 4 previously Reserved bits in each page table entry to
17+
a "protection key", giving 16 possible keys.
18+
19+
Protections for each key are defined with a per-CPU user-accessible register
20+
(PKRU). Each of these is a 32-bit register storing two bits (Access Disable
21+
and Write Disable) for each of 16 keys.
22+
23+
Being a CPU register, PKRU is inherently thread-local, potentially giving each
2524
thread a different set of protections from every other thread.
2625

27-
There are two new instructions (RDPKRU/WRPKRU) for reading and writing
28-
to the new register. The feature is only available in 64-bit mode,
29-
even though there is theoretically space in the PAE PTEs. These
30-
permissions are enforced on data access only and have no effect on
31-
instruction fetches.
26+
There are two instructions (RDPKRU/WRPKRU) for reading and writing to the
27+
register. The feature is only available in 64-bit mode, even though there is
28+
theoretically space in the PAE PTEs. These permissions are enforced on data
29+
access only and have no effect on instruction fetches.
3230

3331
Syscalls
3432
========

arch/x86/include/asm/tlbflush.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
void __flush_tlb_all(void);
1717

1818
#define TLB_FLUSH_ALL -1UL
19+
#define TLB_GENERATION_INVALID 0
1920

2021
void cr4_update_irqsoff(unsigned long set, unsigned long clear);
2122
unsigned long cr4_read_shadow(void);

arch/x86/mm/pkeys.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,19 +110,22 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
110110
return vma_pkey(vma);
111111
}
112112

113-
#define PKRU_AD_KEY(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
113+
#define PKRU_AD_MASK(pkey) (PKRU_AD_BIT << ((pkey) * PKRU_BITS_PER_PKEY))
114114

115115
/*
116116
* Make the default PKRU value (at execve() time) as restrictive
117117
* as possible. This ensures that any threads clone()'d early
118118
* in the process's lifetime will not accidentally get access
119119
* to data which is pkey-protected later on.
120120
*/
121-
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
122-
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
123-
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
124-
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
125-
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
121+
u32 init_pkru_value = PKRU_AD_MASK( 1) | PKRU_AD_MASK( 2) |
122+
PKRU_AD_MASK( 3) | PKRU_AD_MASK( 4) |
123+
PKRU_AD_MASK( 5) | PKRU_AD_MASK( 6) |
124+
PKRU_AD_MASK( 7) | PKRU_AD_MASK( 8) |
125+
PKRU_AD_MASK( 9) | PKRU_AD_MASK(10) |
126+
PKRU_AD_MASK(11) | PKRU_AD_MASK(12) |
127+
PKRU_AD_MASK(13) | PKRU_AD_MASK(14) |
128+
PKRU_AD_MASK(15);
126129

127130
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
128131
size_t count, loff_t *ppos)

arch/x86/mm/tlb.c

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -734,10 +734,10 @@ static void flush_tlb_func(void *info)
734734
const struct flush_tlb_info *f = info;
735735
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
736736
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
737-
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
738737
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
739738
bool local = smp_processor_id() == f->initiating_cpu;
740739
unsigned long nr_invalidate = 0;
740+
u64 mm_tlb_gen;
741741

742742
/* This code cannot presently handle being reentered. */
743743
VM_WARN_ON(!irqs_disabled());
@@ -771,6 +771,23 @@ static void flush_tlb_func(void *info)
771771
return;
772772
}
773773

774+
if (unlikely(f->new_tlb_gen != TLB_GENERATION_INVALID &&
775+
f->new_tlb_gen <= local_tlb_gen)) {
776+
/*
777+
* The TLB is already up to date in respect to f->new_tlb_gen.
778+
* While the core might be still behind mm_tlb_gen, checking
779+
* mm_tlb_gen unnecessarily would have negative caching effects
780+
* so avoid it.
781+
*/
782+
return;
783+
}
784+
785+
/*
786+
* Defer mm_tlb_gen reading as long as possible to avoid cache
787+
* contention.
788+
*/
789+
mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
790+
774791
if (unlikely(local_tlb_gen == mm_tlb_gen)) {
775792
/*
776793
* There's nothing to do: we're already up to date. This can
@@ -827,6 +844,12 @@ static void flush_tlb_func(void *info)
827844
/* Partial flush */
828845
unsigned long addr = f->start;
829846

847+
/* Partial flush cannot have invalid generations */
848+
VM_WARN_ON(f->new_tlb_gen == TLB_GENERATION_INVALID);
849+
850+
/* Partial flush must have valid mm */
851+
VM_WARN_ON(f->mm == NULL);
852+
830853
nr_invalidate = (f->end - f->start) >> f->stride_shift;
831854

832855
while (addr < f->end) {
@@ -1029,7 +1052,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
10291052
struct flush_tlb_info *info;
10301053

10311054
preempt_disable();
1032-
info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
1055+
info = get_flush_tlb_info(NULL, start, end, 0, false,
1056+
TLB_GENERATION_INVALID);
10331057

10341058
on_each_cpu(do_kernel_range_flush, info, 1);
10351059

@@ -1198,7 +1222,8 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
11981222

11991223
int cpu = get_cpu();
12001224

1201-
info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
1225+
info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
1226+
TLB_GENERATION_INVALID);
12021227
/*
12031228
* flush_tlb_multi() is not optimized for the common case in which only
12041229
* a local TLB flush is needed. Optimize this use-case by calling

0 commit comments

Comments
 (0)