Skip to content

Commit cc74404

Browse files
committed
Merge tag 'kvmarm-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 6.5 - Eager page splitting optimization for dirty logging, optionally allowing for a VM to avoid the cost of block splitting in the stage-2 fault path. - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with services that live in the Secure world. pKVM intervenes on FF-A calls to guarantee the host doesn't misuse memory donated to the hyp or a pKVM guest. - Support for running the split hypervisor with VHE enabled, known as 'hVHE' mode. This is extremely useful for testing the split hypervisor on VHE-only systems, and paves the way for new use cases that depend on having two TTBRs available at EL2. - Generalized framework for configurable ID registers from userspace. KVM/arm64 currently prevents arbitrary CPU feature set configuration from userspace, but the intent is to relax this limitation and allow userspace to select a feature set consistent with the CPU. - Enable the use of Branch Target Identification (FEAT_BTI) in the hypervisor. - Use a separate set of pointer authentication keys for the hypervisor when running in protected mode, as the host is untrusted at runtime. - Ensure timer IRQs are consistently released in the init failure paths. - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps (FEAT_EVT), as it is a register commonly read from userspace. - Erratum workaround for the upcoming AmpereOne part, which has broken hardware A/D state management. As a consequence of the hVHE series reworking the arm64 software features framework, the for-next/module-alloc branch from the arm64 tree comes along for the ride.
2 parents b539627 + 192df2a commit cc74404

61 files changed

Lines changed: 2631 additions & 619 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Documentation/arm64/memory.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
3333
0000000000000000 0000ffffffffffff 256TB user
3434
ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
3535
[ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
36-
ffff800000000000 ffff800007ffffff 128MB modules
37-
ffff800008000000 fffffbffefffffff 124TB vmalloc
36+
ffff800000000000 ffff80007fffffff 2GB modules
37+
ffff800080000000 fffffbffefffffff 124TB vmalloc
3838
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
3939
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
4040
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
@@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
5050
0000000000000000 000fffffffffffff 4PB user
5151
fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
5252
[fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
53-
ffff800000000000 ffff800007ffffff 128MB modules
54-
ffff800008000000 fffffbffefffffff 124TB vmalloc
53+
ffff800000000000 ffff80007fffffff 2GB modules
54+
ffff800080000000 fffffbffefffffff 124TB vmalloc
5555
fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
5656
fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
5757
fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space

Documentation/arm64/silicon-errata.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ stable kernels.
5252
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
5353
+----------------+-----------------+-----------------+-----------------------------+
5454
+----------------+-----------------+-----------------+-----------------------------+
55+
| Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 |
56+
+----------------+-----------------+-----------------+-----------------------------+
57+
+----------------+-----------------+-----------------+-----------------------------+
5558
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
5659
+----------------+-----------------+-----------------+-----------------------------+
5760
| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |

Documentation/virt/kvm/api.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8445,6 +8445,33 @@ structure.
84458445
When getting the Modified Change Topology Report value, the attr->addr
84468446
must point to a byte where the value will be stored or retrieved from.
84478447

8448+
8.40 KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
8449+
---------------------------------------
8450+
8451+
:Capability: KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE
8452+
:Architectures: arm64
8453+
:Type: vm
8454+
:Parameters: arg[0] is the new split chunk size.
8455+
:Returns: 0 on success, -EINVAL if any memslot was already created.
8456+
8457+
This capability sets the chunk size used in Eager Page Splitting.
8458+
8459+
Eager Page Splitting improves the performance of dirty-logging (used
8460+
in live migrations) when guest memory is backed by huge-pages. It
8461+
avoids splitting huge-pages (into PAGE_SIZE pages) on fault, by doing
8462+
it eagerly when enabling dirty logging (with the
8463+
KVM_MEM_LOG_DIRTY_PAGES flag for a memory region), or when using
8464+
KVM_CLEAR_DIRTY_LOG.
8465+
8466+
The chunk size specifies how many pages to break at a time, using a
8467+
single allocation for each chunk. Bigger the chunk size, more pages
8468+
need to be allocated ahead of time.
8469+
8470+
The chunk size needs to be a valid block size. The list of acceptable
8471+
block sizes is exposed in KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES as a
8472+
64-bit bitmap (each bit describing a block size). The default value is
8473+
0, to disable the eager page splitting.
8474+
84488475
9. Known KVM API problems
84498476
=========================
84508477

arch/arm64/Kconfig

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ config ARM64
207207
select HAVE_IOREMAP_PROT
208208
select HAVE_IRQ_TIME_ACCOUNTING
209209
select HAVE_KVM
210+
select HAVE_MOD_ARCH_SPECIFIC
210211
select HAVE_NMI
211212
select HAVE_PERF_EVENTS
212213
select HAVE_PERF_REGS
@@ -406,6 +407,25 @@ menu "Kernel Features"
406407

407408
menu "ARM errata workarounds via the alternatives framework"
408409

410+
config AMPERE_ERRATUM_AC03_CPU_38
411+
bool "AmpereOne: AC03_CPU_38: Certain bits in the Virtualization Translation Control Register and Translation Control Registers do not follow RES0 semantics"
412+
default y
413+
help
414+
This option adds an alternative code sequence to work around Ampere
415+
erratum AC03_CPU_38 on AmpereOne.
416+
417+
The affected design reports FEAT_HAFDBS as not implemented in
418+
ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0
419+
as required by the architecture. The unadvertised HAFDBS
420+
implementation suffers from an additional erratum where hardware
421+
A/D updates can occur after a PTE has been marked invalid.
422+
423+
The workaround forces KVM to explicitly set VTCR_EL2.HA to 0,
424+
which avoids enabling unadvertised hardware Access Flag management
425+
at stage-2.
426+
427+
If unsure, say Y.
428+
409429
config ARM64_WORKAROUND_CLEAN_CACHE
410430
bool
411431

@@ -577,7 +597,6 @@ config ARM64_ERRATUM_845719
577597
config ARM64_ERRATUM_843419
578598
bool "Cortex-A53: 843419: A load or store might access an incorrect address"
579599
default y
580-
select ARM64_MODULE_PLTS if MODULES
581600
help
582601
This option links the kernel with '--fix-cortex-a53-843419' and
583602
enables PLT support to replace certain ADRP instructions, which can
@@ -2107,26 +2126,6 @@ config ARM64_SME
21072126
register state capable of holding two dimensional matrix tiles to
21082127
enable various matrix operations.
21092128

2110-
config ARM64_MODULE_PLTS
2111-
bool "Use PLTs to allow module memory to spill over into vmalloc area"
2112-
depends on MODULES
2113-
select HAVE_MOD_ARCH_SPECIFIC
2114-
help
2115-
Allocate PLTs when loading modules so that jumps and calls whose
2116-
targets are too far away for their relative offsets to be encoded
2117-
in the instructions themselves can be bounced via veneers in the
2118-
module's PLT. This allows modules to be allocated in the generic
2119-
vmalloc area after the dedicated module memory area has been
2120-
exhausted.
2121-
2122-
When running with address space randomization (KASLR), the module
2123-
region itself may be too far away for ordinary relative jumps and
2124-
calls, and so in that case, module PLTs are required and cannot be
2125-
disabled.
2126-
2127-
Specific errata workaround(s) might also force module PLTs to be
2128-
enabled (ARM64_ERRATUM_843419).
2129-
21302129
config ARM64_PSEUDO_NMI
21312130
bool "Support for NMI-like interrupts"
21322131
select ARM_GIC_V3
@@ -2167,7 +2166,6 @@ config RELOCATABLE
21672166

21682167
config RANDOMIZE_BASE
21692168
bool "Randomize the address of the kernel image"
2170-
select ARM64_MODULE_PLTS if MODULES
21712169
select RELOCATABLE
21722170
help
21732171
Randomizes the virtual address at which the kernel image is
@@ -2198,9 +2196,8 @@ config RANDOMIZE_MODULE_REGION_FULL
21982196
When this option is not set, the module region will be randomized over
21992197
a limited range that contains the [_stext, _etext] interval of the
22002198
core kernel, so branch relocations are almost always in range unless
2201-
ARM64_MODULE_PLTS is enabled and the region is exhausted. In this
2202-
particular case of region exhaustion, modules might be able to fall
2203-
back to a larger 2GB area.
2199+
the region is exhausted. In this particular case of region
2200+
exhaustion, modules might be able to fall back to a larger 2GB area.
22042201

22052202
config CC_HAVE_STACKPROTECTOR_SYSREG
22062203
def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)

arch/arm64/include/asm/cpufeature.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#define MAX_CPU_FEATURES 128
1616
#define cpu_feature(x) KERNEL_HWCAP_ ## x
1717

18+
#define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0
19+
#define ARM64_SW_FEATURE_OVERRIDE_HVHE 4
20+
1821
#ifndef __ASSEMBLY__
1922

2023
#include <linux/bug.h>
@@ -915,6 +918,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
915918
return 8;
916919
}
917920

921+
s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur);
918922
struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
919923

920924
extern struct arm64_ftr_override id_aa64mmfr1_override;
@@ -925,6 +929,8 @@ extern struct arm64_ftr_override id_aa64smfr0_override;
925929
extern struct arm64_ftr_override id_aa64isar1_override;
926930
extern struct arm64_ftr_override id_aa64isar2_override;
927931

932+
extern struct arm64_ftr_override arm64_sw_feature_override;
933+
928934
u32 get_kvm_ipa_limit(void);
929935
void dump_cpu_features(void);
930936

arch/arm64/include/asm/el2_setup.h

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434
*/
3535
.macro __init_el2_timers
3636
mov x0, #3 // Enable EL1 physical timers
37+
mrs x1, hcr_el2
38+
and x1, x1, #HCR_E2H
39+
cbz x1, .LnVHE_\@
40+
lsl x0, x0, #10
41+
.LnVHE_\@:
3742
msr cnthctl_el2, x0
3843
msr cntvoff_el2, xzr // Clear virtual offset
3944
.endm
@@ -124,8 +129,15 @@
124129
.endm
125130

126131
/* Coprocessor traps */
127-
.macro __init_el2_nvhe_cptr
132+
.macro __init_el2_cptr
133+
mrs x1, hcr_el2
134+
and x1, x1, #HCR_E2H
135+
cbz x1, .LnVHE_\@
136+
mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
137+
b .Lset_cptr_\@
138+
.LnVHE_\@:
128139
mov x0, #0x33ff
140+
.Lset_cptr_\@:
129141
msr cptr_el2, x0 // Disable copro. traps to EL2
130142
.endm
131143

@@ -191,9 +203,8 @@
191203
__init_el2_gicv3
192204
__init_el2_hstr
193205
__init_el2_nvhe_idregs
194-
__init_el2_nvhe_cptr
206+
__init_el2_cptr
195207
__init_el2_fgt
196-
__init_el2_nvhe_prepare_eret
197208
.endm
198209

199210
#ifndef __KVM_NVHE_HYPERVISOR__
@@ -239,7 +250,17 @@
239250

240251
.Linit_sve_\@: /* SVE register access */
241252
mrs x0, cptr_el2 // Disable SVE traps
253+
mrs x1, hcr_el2
254+
and x1, x1, #HCR_E2H
255+
cbz x1, .Lcptr_nvhe_\@
256+
257+
// VHE case
258+
orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
259+
b .Lset_cptr_\@
260+
261+
.Lcptr_nvhe_\@: // nVHE case
242262
bic x0, x0, #CPTR_EL2_TZ
263+
.Lset_cptr_\@:
243264
msr cptr_el2, x0
244265
isb
245266
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector

arch/arm64/include/asm/kvm_arm.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#define HCR_ATA_SHIFT 56
1919
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
2020
#define HCR_AMVOFFEN (UL(1) << 51)
21+
#define HCR_TID4 (UL(1) << 49)
2122
#define HCR_FIEN (UL(1) << 47)
2223
#define HCR_FWB (UL(1) << 46)
2324
#define HCR_API (UL(1) << 41)
@@ -86,7 +87,7 @@
8687
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
8788
HCR_BSU_IS | HCR_FB | HCR_TACR | \
8889
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
89-
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2)
90+
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
9091
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
9192
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
9293
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
@@ -285,7 +286,6 @@
285286
#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
286287
#define CPTR_EL2_TZ (1 << 8)
287288
#define CPTR_NVHE_EL2_RES1 0x000032ff /* known RES1 bits in CPTR_EL2 (nVHE) */
288-
#define CPTR_EL2_DEFAULT CPTR_NVHE_EL2_RES1
289289
#define CPTR_NVHE_EL2_RES0 (GENMASK(63, 32) | \
290290
GENMASK(29, 21) | \
291291
GENMASK(19, 14) | \
@@ -347,8 +347,7 @@
347347
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
348348
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
349349

350-
#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
351-
CPACR_EL1_ZEN_EL1EN)
350+
#define CPACR_EL1_TTA (1 << 28)
352351

353352
#define kvm_mode_names \
354353
{ PSR_MODE_EL0t, "EL0t" }, \

arch/arm64/include/asm/kvm_asm.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum __kvm_host_smccc_func {
6868
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
6969
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
7070
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
71+
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
7172
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
7273
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
7374
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
@@ -225,6 +226,9 @@ extern void __kvm_flush_vm_context(void);
225226
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
226227
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
227228
int level);
229+
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
230+
phys_addr_t ipa,
231+
int level);
228232
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
229233

230234
extern void __kvm_timer_set_cntvoff(u64 cntvoff);

arch/arm64/include/asm/kvm_emulate.h

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,19 +62,14 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
6262
#else
6363
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
6464
{
65-
struct kvm *kvm = vcpu->kvm;
66-
67-
WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED,
68-
&kvm->arch.flags));
69-
70-
return test_bit(KVM_ARCH_FLAG_EL1_32BIT, &kvm->arch.flags);
65+
return test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features);
7166
}
7267
#endif
7368

7469
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
7570
{
7671
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
77-
if (is_kernel_in_hyp_mode())
72+
if (has_vhe() || has_hvhe())
7873
vcpu->arch.hcr_el2 |= HCR_E2H;
7974
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
8075
/* route synchronous external abort exceptions to EL2 */
@@ -95,6 +90,12 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
9590
vcpu->arch.hcr_el2 |= HCR_TVM;
9691
}
9792

93+
if (cpus_have_final_cap(ARM64_HAS_EVT) &&
94+
!cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
95+
vcpu->arch.hcr_el2 |= HCR_TID4;
96+
else
97+
vcpu->arch.hcr_el2 |= HCR_TID2;
98+
9899
if (vcpu_el1_is_32bit(vcpu))
99100
vcpu->arch.hcr_el2 &= ~HCR_RW;
100101

@@ -570,4 +571,35 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
570571
return test_bit(feature, vcpu->arch.features);
571572
}
572573

574+
static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
575+
{
576+
u64 val;
577+
578+
if (has_vhe()) {
579+
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
580+
CPACR_EL1_ZEN_EL1EN);
581+
} else if (has_hvhe()) {
582+
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
583+
} else {
584+
val = CPTR_NVHE_EL2_RES1;
585+
586+
if (vcpu_has_sve(vcpu) &&
587+
(vcpu->arch.fp_state == FP_STATE_GUEST_OWNED))
588+
val |= CPTR_EL2_TZ;
589+
if (cpus_have_final_cap(ARM64_SME))
590+
val &= ~CPTR_EL2_TSM;
591+
}
592+
593+
return val;
594+
}
595+
596+
static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
597+
{
598+
u64 val = kvm_get_reset_cptr_el2(vcpu);
599+
600+
if (has_vhe() || has_hvhe())
601+
write_sysreg(val, cpacr_el1);
602+
else
603+
write_sysreg(val, cptr_el2);
604+
}
573605
#endif /* __ARM64_KVM_EMULATE_H__ */

0 commit comments

Comments
 (0)