Skip to content

Commit 9b54e24

Browse files
mdrothbonzini
authored andcommitted
KVM: SEV: Add support to handle Page State Change VMGEXIT
SEV-SNP VMs can ask the hypervisor to change the page state in the RMP table to be private or shared using the Page State Change NAE event as defined in the GHCB specification version 2. Forward these requests to userspace as KVM_EXIT_VMGEXITs, similar to how it is done for requests that don't use a GHCB page. As with the MSR-based page-state changes, use the existing KVM_HC_MAP_GPA_RANGE hypercall format to deliver these requests to userspace via KVM_EXIT_HYPERCALL. Signed-off-by: Michael Roth <michael.roth@amd.com> Co-developed-by: Brijesh Singh <brijesh.singh@amd.com> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> Message-ID: <20240501085210.2213060-11-michael.roth@amd.com> Co-developed-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent d46b7b6 commit 9b54e24

3 files changed

Lines changed: 204 additions & 0 deletions

File tree

arch/x86/include/asm/sev-common.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,19 @@ enum psc_op {
129129
* The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
130130
* is a local stack variable in set_pages_state(). Do not increase this value
131131
* without evaluating the impact to stack usage.
132+
*
133+
* Use VMGEXIT_PSC_MAX_COUNT in cases where the actual GHCB-defined max value
134+
* is needed, such as when processing GHCB requests on the hypervisor side.
132135
*/
133136
#define VMGEXIT_PSC_MAX_ENTRY 64
137+
#define VMGEXIT_PSC_MAX_COUNT 253
138+
139+
#define VMGEXIT_PSC_ERROR_GENERIC (0x100UL << 32)
140+
#define VMGEXIT_PSC_ERROR_INVALID_HDR ((1UL << 32) | 1)
141+
#define VMGEXIT_PSC_ERROR_INVALID_ENTRY ((1UL << 32) | 2)
142+
143+
#define VMGEXIT_PSC_OP_PRIVATE 1
144+
#define VMGEXIT_PSC_OP_SHARED 2
134145

135146
struct psc_hdr {
136147
u16 cur_entry;

arch/x86/kvm/svm/sev.c

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3274,6 +3274,10 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
32743274
case SVM_VMGEXIT_HV_FEATURES:
32753275
case SVM_VMGEXIT_TERM_REQUEST:
32763276
break;
3277+
case SVM_VMGEXIT_PSC:
3278+
if (!sev_snp_guest(vcpu->kvm) || !kvm_ghcb_sw_scratch_is_valid(svm))
3279+
goto vmgexit_err;
3280+
break;
32773281
default:
32783282
reason = GHCB_ERR_INVALID_EVENT;
32793283
goto vmgexit_err;
@@ -3503,6 +3507,183 @@ static int snp_begin_psc_msr(struct vcpu_svm *svm, u64 ghcb_msr)
35033507
return 0; /* forward request to userspace */
35043508
}
35053509

3510+
struct psc_buffer {
3511+
struct psc_hdr hdr;
3512+
struct psc_entry entries[];
3513+
} __packed;
3514+
3515+
static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc);
3516+
3517+
static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret)
3518+
{
3519+
svm->sev_es.psc_inflight = 0;
3520+
svm->sev_es.psc_idx = 0;
3521+
svm->sev_es.psc_2m = false;
3522+
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret);
3523+
}
3524+
3525+
static void __snp_complete_one_psc(struct vcpu_svm *svm)
3526+
{
3527+
struct psc_buffer *psc = svm->sev_es.ghcb_sa;
3528+
struct psc_entry *entries = psc->entries;
3529+
struct psc_hdr *hdr = &psc->hdr;
3530+
__u16 idx;
3531+
3532+
/*
3533+
* Everything in-flight has been processed successfully. Update the
3534+
* corresponding entries in the guest's PSC buffer and zero out the
3535+
* count of in-flight PSC entries.
3536+
*/
3537+
for (idx = svm->sev_es.psc_idx; svm->sev_es.psc_inflight;
3538+
svm->sev_es.psc_inflight--, idx++) {
3539+
struct psc_entry *entry = &entries[idx];
3540+
3541+
entry->cur_page = entry->pagesize ? 512 : 1;
3542+
}
3543+
3544+
hdr->cur_entry = idx;
3545+
}
3546+
3547+
static int snp_complete_one_psc(struct kvm_vcpu *vcpu)
3548+
{
3549+
struct vcpu_svm *svm = to_svm(vcpu);
3550+
struct psc_buffer *psc = svm->sev_es.ghcb_sa;
3551+
3552+
if (vcpu->run->hypercall.ret) {
3553+
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
3554+
return 1; /* resume guest */
3555+
}
3556+
3557+
__snp_complete_one_psc(svm);
3558+
3559+
/* Handle the next range (if any). */
3560+
return snp_begin_psc(svm, psc);
3561+
}
3562+
3563+
static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc)
3564+
{
3565+
struct psc_entry *entries = psc->entries;
3566+
struct kvm_vcpu *vcpu = &svm->vcpu;
3567+
struct psc_hdr *hdr = &psc->hdr;
3568+
struct psc_entry entry_start;
3569+
u16 idx, idx_start, idx_end;
3570+
int npages;
3571+
bool huge;
3572+
u64 gfn;
3573+
3574+
if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) {
3575+
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
3576+
return 1;
3577+
}
3578+
3579+
next_range:
3580+
/* There should be no other PSCs in-flight at this point. */
3581+
if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) {
3582+
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC);
3583+
return 1;
3584+
}
3585+
3586+
/*
3587+
* The PSC descriptor buffer can be modified by a misbehaved guest after
3588+
* validation, so take care to only use validated copies of values used
3589+
* for things like array indexing.
3590+
*/
3591+
idx_start = hdr->cur_entry;
3592+
idx_end = hdr->end_entry;
3593+
3594+
if (idx_end >= VMGEXIT_PSC_MAX_COUNT) {
3595+
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR);
3596+
return 1;
3597+
}
3598+
3599+
/* Find the start of the next range which needs processing. */
3600+
for (idx = idx_start; idx <= idx_end; idx++, hdr->cur_entry++) {
3601+
entry_start = entries[idx];
3602+
3603+
gfn = entry_start.gfn;
3604+
huge = entry_start.pagesize;
3605+
npages = huge ? 512 : 1;
3606+
3607+
if (entry_start.cur_page > npages || !IS_ALIGNED(gfn, npages)) {
3608+
snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_ENTRY);
3609+
return 1;
3610+
}
3611+
3612+
if (entry_start.cur_page) {
3613+
/*
3614+
* If this is a partially-completed 2M range, force 4K handling
3615+
* for the remaining pages since they're effectively split at
3616+
* this point. Subsequent code should ensure this doesn't get
3617+
* combined with adjacent PSC entries where 2M handling is still
3618+
* possible.
3619+
*/
3620+
npages -= entry_start.cur_page;
3621+
gfn += entry_start.cur_page;
3622+
huge = false;
3623+
}
3624+
3625+
if (npages)
3626+
break;
3627+
}
3628+
3629+
if (idx > idx_end) {
3630+
/* Nothing more to process. */
3631+
snp_complete_psc(svm, 0);
3632+
return 1;
3633+
}
3634+
3635+
svm->sev_es.psc_2m = huge;
3636+
svm->sev_es.psc_idx = idx;
3637+
svm->sev_es.psc_inflight = 1;
3638+
3639+
/*
3640+
* Find all subsequent PSC entries that contain adjacent GPA
3641+
* ranges/operations and can be combined into a single
3642+
* KVM_HC_MAP_GPA_RANGE exit.
3643+
*/
3644+
while (++idx <= idx_end) {
3645+
struct psc_entry entry = entries[idx];
3646+
3647+
if (entry.operation != entry_start.operation ||
3648+
entry.gfn != entry_start.gfn + npages ||
3649+
entry.cur_page || !!entry.pagesize != huge)
3650+
break;
3651+
3652+
svm->sev_es.psc_inflight++;
3653+
npages += huge ? 512 : 1;
3654+
}
3655+
3656+
switch (entry_start.operation) {
3657+
case VMGEXIT_PSC_OP_PRIVATE:
3658+
case VMGEXIT_PSC_OP_SHARED:
3659+
vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
3660+
vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE;
3661+
vcpu->run->hypercall.args[0] = gfn_to_gpa(gfn);
3662+
vcpu->run->hypercall.args[1] = npages;
3663+
vcpu->run->hypercall.args[2] = entry_start.operation == VMGEXIT_PSC_OP_PRIVATE
3664+
? KVM_MAP_GPA_RANGE_ENCRYPTED
3665+
: KVM_MAP_GPA_RANGE_DECRYPTED;
3666+
vcpu->run->hypercall.args[2] |= entry_start.pagesize
3667+
? KVM_MAP_GPA_RANGE_PAGE_SZ_2M
3668+
: KVM_MAP_GPA_RANGE_PAGE_SZ_4K;
3669+
vcpu->arch.complete_userspace_io = snp_complete_one_psc;
3670+
return 0; /* forward request to userspace */
3671+
default:
3672+
/*
3673+
* Only shared/private PSC operations are currently supported, so if the
3674+
* entire range consists of unsupported operations (e.g. SMASH/UNSMASH),
3675+
* then consider the entire range completed and avoid exiting to
3676+
* userspace. In theory snp_complete_psc() can always be called directly
3677+
* at this point to complete the current range and start the next one,
3678+
* but that could lead to unexpected levels of recursion.
3679+
*/
3680+
__snp_complete_one_psc(svm);
3681+
goto next_range;
3682+
}
3683+
3684+
unreachable();
3685+
}
3686+
35063687
static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
35073688
{
35083689
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -3761,6 +3942,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
37613942
vcpu->run->system_event.ndata = 1;
37623943
vcpu->run->system_event.data[0] = control->ghcb_gpa;
37633944
break;
3945+
case SVM_VMGEXIT_PSC:
3946+
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
3947+
if (ret)
3948+
break;
3949+
3950+
ret = snp_begin_psc(svm, svm->sev_es.ghcb_sa);
3951+
break;
37643952
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
37653953
vcpu_unimpl(vcpu,
37663954
"vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",

arch/x86/kvm/svm/svm.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,11 @@ struct vcpu_sev_es_state {
210210
bool ghcb_sa_sync;
211211
bool ghcb_sa_free;
212212

213+
/* SNP Page-State-Change buffer entries currently being processed */
214+
u16 psc_idx;
215+
u16 psc_inflight;
216+
bool psc_2m;
217+
213218
u64 ghcb_registered_gpa;
214219
};
215220

0 commit comments

Comments
 (0)