Skip to content

Commit 8ecb10b

Browse files
committed
Merge tag 'kvm-x86-lam-6.8' of https://github.com/kvm-x86/linux into HEAD
KVM x86 support for virtualizing Linear Address Masking (LAM) Add KVM support for Linear Address Masking (LAM). LAM tweaks the canonicality checks for most virtual address usage in 64-bit mode, such that only the most significant bit of the untranslated address bits must match the polarity of the last translated address bit. This allows software to use ignored, untranslated address bits for metadata, e.g. to efficiently tag pointers for address sanitization. LAM can be enabled separately for user pointers and supervisor pointers, and for userspace LAM can be select between 48-bit and 57-bit masking - 48-bit LAM: metadata bits 62:48, i.e. LAM width of 15. - 57-bit LAM: metadata bits 62:57, i.e. LAM width of 6. For user pointers, LAM enabling utilizes two previously-reserved high bits from CR3 (similar to how PCID_NOFLUSH uses bit 63): LAM_U48 and LAM_U57, bits 62 and 61 respectively. Note, if LAM_57 is set, LAM_U48 is ignored, i.e.: - CR3.LAM_U48=0 && CR3.LAM_U57=0 == LAM disabled for user pointers - CR3.LAM_U48=1 && CR3.LAM_U57=0 == LAM-48 enabled for user pointers - CR3.LAM_U48=x && CR3.LAM_U57=1 == LAM-57 enabled for user pointers For supervisor pointers, LAM is controlled by a single bit, CR4.LAM_SUP, with the 48-bit versus 57-bit LAM behavior following the current paging mode, i.e.: - CR4.LAM_SUP=0 && CR4.LA57=x == LAM disabled for supervisor pointers - CR4.LAM_SUP=1 && CR4.LA57=0 == LAM-48 enabled for supervisor pointers - CR4.LAM_SUP=1 && CR4.LA57=1 == LAM-57 enabled for supervisor pointers The modified LAM canonicality checks: - LAM_S48 : [ 1 ][ metadata ][ 1 ] 63 47 - LAM_U48 : [ 0 ][ metadata ][ 0 ] 63 47 - LAM_S57 : [ 1 ][ metadata ][ 1 ] 63 56 - LAM_U57 + 5-lvl paging : [ 0 ][ metadata ][ 0 ] 63 56 - LAM_U57 + 4-lvl paging : [ 0 ][ metadata ][ 0...0 ] 63 56..47 The bulk of KVM support for LAM is to emulate LAM's modified canonicality checks. The approach taken by KVM is to "fill" the metadata bits using the highest bit of the translated address, e.g. for LAM-48, bit 47 is sign-extended to bits 62:48. The most significant bit, 63, is *not* modified, i.e. its value from the raw, untagged virtual address is kept for the canonicality check. This untagging allows Aside from emulating LAM's canonical checks behavior, LAM has the usual KVM touchpoints for selectable features: enumeration (CPUID.7.1:EAX.LAM[bit 26], enabling via CR3 and CR4 bits, etc.
2 parents 01edb1c + 183bdd1 commit 8ecb10b

18 files changed

Lines changed: 134 additions & 30 deletions

File tree

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ KVM_X86_OP(msr_filter_changed)
137137
KVM_X86_OP(complete_emulated_msr)
138138
KVM_X86_OP(vcpu_deliver_sipi_vector)
139139
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
140+
KVM_X86_OP_OPTIONAL(get_untagged_addr)
140141

141142
#undef KVM_X86_OP
142143
#undef KVM_X86_OP_OPTIONAL

arch/x86/include/asm/kvm_host.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@
133133
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
134134
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
135135
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
136-
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
136+
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
137+
| X86_CR4_LAM_SUP))
137138

138139
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
139140

@@ -1790,6 +1791,8 @@ struct kvm_x86_ops {
17901791
* Returns vCPU specific APICv inhibit reasons
17911792
*/
17921793
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
1794+
1795+
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
17931796
};
17941797

17951798
struct kvm_x86_nested_ops {

arch/x86/kvm/cpuid.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ void kvm_set_cpu_caps(void)
677677
kvm_cpu_cap_mask(CPUID_7_1_EAX,
678678
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
679679
F(FZRM) | F(FSRS) | F(FSRC) |
680-
F(AMX_FP16) | F(AVX_IFMA)
680+
F(AMX_FP16) | F(AVX_IFMA) | F(LAM)
681681
);
682682

683683
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,

arch/x86/kvm/cpuid.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,6 @@ static inline bool kvm_vcpu_is_legal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
4747
return !(gpa & vcpu->arch.reserved_gpa_bits);
4848
}
4949

50-
static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
51-
{
52-
return !kvm_vcpu_is_legal_gpa(vcpu, gpa);
53-
}
54-
5550
static inline bool kvm_vcpu_is_legal_aligned_gpa(struct kvm_vcpu *vcpu,
5651
gpa_t gpa, gpa_t alignment)
5752
{
@@ -279,4 +274,12 @@ static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
279274
vcpu->arch.governed_features.enabled);
280275
}
281276

277+
static inline bool kvm_vcpu_is_legal_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
278+
{
279+
if (guest_can_use(vcpu, X86_FEATURE_LAM))
280+
cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
281+
282+
return kvm_vcpu_is_legal_gpa(vcpu, cr3);
283+
}
284+
282285
#endif

arch/x86/kvm/emulate.c

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -687,8 +687,8 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
687687
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
688688
struct segmented_address addr,
689689
unsigned *max_size, unsigned size,
690-
bool write, bool fetch,
691-
enum x86emul_mode mode, ulong *linear)
690+
enum x86emul_mode mode, ulong *linear,
691+
unsigned int flags)
692692
{
693693
struct desc_struct desc;
694694
bool usable;
@@ -701,7 +701,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
701701
*max_size = 0;
702702
switch (mode) {
703703
case X86EMUL_MODE_PROT64:
704-
*linear = la;
704+
*linear = la = ctxt->ops->get_untagged_addr(ctxt, la, flags);
705705
va_bits = ctxt_virt_addr_bits(ctxt);
706706
if (!__is_canonical_address(la, va_bits))
707707
goto bad;
@@ -717,11 +717,11 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
717717
if (!usable)
718718
goto bad;
719719
/* code segment in protected mode or read-only data segment */
720-
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
721-
|| !(desc.type & 2)) && write)
720+
if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8)) || !(desc.type & 2)) &&
721+
(flags & X86EMUL_F_WRITE))
722722
goto bad;
723723
/* unreadable code segment */
724-
if (!fetch && (desc.type & 8) && !(desc.type & 2))
724+
if (!(flags & X86EMUL_F_FETCH) && (desc.type & 8) && !(desc.type & 2))
725725
goto bad;
726726
lim = desc_limit_scaled(&desc);
727727
if (!(desc.type & 8) && (desc.type & 4)) {
@@ -757,8 +757,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
757757
ulong *linear)
758758
{
759759
unsigned max_size;
760-
return __linearize(ctxt, addr, &max_size, size, write, false,
761-
ctxt->mode, linear);
760+
return __linearize(ctxt, addr, &max_size, size, ctxt->mode, linear,
761+
write ? X86EMUL_F_WRITE : 0);
762762
}
763763

764764
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
@@ -771,7 +771,8 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
771771

772772
if (ctxt->op_bytes != sizeof(unsigned long))
773773
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
774-
rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
774+
rc = __linearize(ctxt, addr, &max_size, 1, ctxt->mode, &linear,
775+
X86EMUL_F_FETCH);
775776
if (rc == X86EMUL_CONTINUE)
776777
ctxt->_eip = addr.ea;
777778
return rc;
@@ -907,8 +908,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
907908
* boundary check itself. Instead, we use max_size to check
908909
* against op_size.
909910
*/
910-
rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
911-
&linear);
911+
rc = __linearize(ctxt, addr, &max_size, 0, ctxt->mode, &linear,
912+
X86EMUL_F_FETCH);
912913
if (unlikely(rc != X86EMUL_CONTINUE))
913914
return rc;
914915

@@ -3439,8 +3440,10 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt)
34393440
{
34403441
int rc;
34413442
ulong linear;
3443+
unsigned int max_size;
34423444

3443-
rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
3445+
rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, ctxt->mode,
3446+
&linear, X86EMUL_F_INVLPG);
34443447
if (rc == X86EMUL_CONTINUE)
34453448
ctxt->ops->invlpg(ctxt, linear);
34463449
/* Disable writeback. */

arch/x86/kvm/governed_features.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
1616
KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
1717
KVM_GOVERNED_X86_FEATURE(VGIF)
1818
KVM_GOVERNED_X86_FEATURE(VNMI)
19+
KVM_GOVERNED_X86_FEATURE(LAM)
1920

2021
#undef KVM_GOVERNED_X86_FEATURE
2122
#undef KVM_GOVERNED_FEATURE

arch/x86/kvm/kvm_emulate.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ struct x86_instruction_info {
8888
#define X86EMUL_IO_NEEDED 5 /* IO is needed to complete emulation */
8989
#define X86EMUL_INTERCEPTED 6 /* Intercepted by nested VMCB/VMCS */
9090

91+
/* x86-specific emulation flags */
92+
#define X86EMUL_F_WRITE BIT(0)
93+
#define X86EMUL_F_FETCH BIT(1)
94+
#define X86EMUL_F_IMPLICIT BIT(2)
95+
#define X86EMUL_F_INVLPG BIT(3)
96+
9197
struct x86_emulate_ops {
9298
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
9399
/*
@@ -224,6 +230,9 @@ struct x86_emulate_ops {
224230
int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
225231
void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
226232
int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
233+
234+
gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
235+
unsigned int flags);
227236
};
228237

229238
/* Type, address-of, and value of an instruction's operand. */

arch/x86/kvm/mmu.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,14 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu)
146146
return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu));
147147
}
148148

149+
static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu)
150+
{
151+
if (!guest_can_use(vcpu, X86_FEATURE_LAM))
152+
return 0;
153+
154+
return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
155+
}
156+
149157
static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu)
150158
{
151159
u64 root_hpa = vcpu->arch.mmu->root.hpa;

arch/x86/kvm/mmu/mmu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3802,7 +3802,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
38023802
hpa_t root;
38033803

38043804
root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
3805-
root_gfn = root_pgd >> PAGE_SHIFT;
3805+
root_gfn = (root_pgd & __PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
38063806

38073807
if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
38083808
mmu->root.hpa = kvm_mmu_get_dummy_root();

arch/x86/kvm/mmu/mmu_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#endif
1414

1515
/* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
16+
#define __PT_BASE_ADDR_MASK GENMASK_ULL(51, 12)
1617
#define __PT_LEVEL_SHIFT(level, bits_per_level) \
1718
(PAGE_SHIFT + ((level) - 1) * (bits_per_level))
1819
#define __PT_INDEX(address, level, bits_per_level) \

0 commit comments

Comments
 (0)