Skip to content

Commit ef5f97e

Browse files
author
Marc Zyngier
committed
Merge branch kvm-arm64/lock-inversion into kvmarm-master/next
* kvm-arm64/lock-inversion: : . : vm/vcpu lock inversion fixes, courtesy of Oliver Upton, plus a few : extra fixes from both Oliver and Reiji Watanabe. : : From the initial cover letter: : : As it so happens, lock ordering in KVM/arm64 is completely backwards. : There's a significant amount of VM-wide state that needs to be accessed : from the context of a vCPU. Until now, this was accomplished by : acquiring the kvm->lock, but that cannot be nested within vcpu->mutex. : : This series fixes the issue with some fine-grained locking for MP state : and a new, dedicated mutex that can nest with both kvm->lock and : vcpu->mutex. : . KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init() KVM: arm64: vgic: Don't acquire its_lock before config_lock KVM: arm64: Use config_lock to protect vgic state KVM: arm64: Use config_lock to protect data ordered against KVM_RUN KVM: arm64: Avoid lock inversion when setting the VM register width KVM: arm64: Avoid vcpu->mutex v. kvm->lock inversion in CPU_ON Signed-off-by: Marc Zyngier <maz@kernel.org>
2 parents 197b6b6 + a189884 commit ef5f97e

15 files changed

Lines changed: 186 additions & 114 deletions

File tree

arch/arm64/include/asm/kvm_host.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ struct kvm_arch {
199199
/* Mandated version of PSCI */
200200
u32 psci_version;
201201

202+
/* Protects VM-scoped configuration data */
203+
struct mutex config_lock;
204+
202205
/*
203206
* If we encounter a data abort without valid instruction syndrome
204207
* information, report this to user space. User space can (and
@@ -522,6 +525,7 @@ struct kvm_vcpu_arch {
522525

523526
/* vcpu power state */
524527
struct kvm_mp_state mp_state;
528+
spinlock_t mp_state_lock;
525529

526530
/* Cache some mmu pages needed inside spinlock regions */
527531
struct kvm_mmu_memory_cache mmu_page_cache;

arch/arm64/kvm/arm.c

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
128128
{
129129
int ret;
130130

131+
mutex_init(&kvm->arch.config_lock);
132+
133+
#ifdef CONFIG_LOCKDEP
134+
/* Clue in lockdep that the config_lock must be taken inside kvm->lock */
135+
mutex_lock(&kvm->lock);
136+
mutex_lock(&kvm->arch.config_lock);
137+
mutex_unlock(&kvm->arch.config_lock);
138+
mutex_unlock(&kvm->lock);
139+
#endif
140+
131141
ret = kvm_share_hyp(kvm, kvm + 1);
132142
if (ret)
133143
return ret;
@@ -326,6 +336,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
326336
{
327337
int err;
328338

339+
spin_lock_init(&vcpu->arch.mp_state_lock);
340+
341+
#ifdef CONFIG_LOCKDEP
342+
/* Inform lockdep that the config_lock is acquired after vcpu->mutex */
343+
mutex_lock(&vcpu->mutex);
344+
mutex_lock(&vcpu->kvm->arch.config_lock);
345+
mutex_unlock(&vcpu->kvm->arch.config_lock);
346+
mutex_unlock(&vcpu->mutex);
347+
#endif
348+
329349
/* Force users to call KVM_ARM_VCPU_INIT */
330350
vcpu->arch.target = -1;
331351
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
@@ -443,34 +463,41 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
443463
vcpu->cpu = -1;
444464
}
445465

446-
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
466+
static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
447467
{
448-
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
468+
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
449469
kvm_make_request(KVM_REQ_SLEEP, vcpu);
450470
kvm_vcpu_kick(vcpu);
451471
}
452472

473+
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
474+
{
475+
spin_lock(&vcpu->arch.mp_state_lock);
476+
__kvm_arm_vcpu_power_off(vcpu);
477+
spin_unlock(&vcpu->arch.mp_state_lock);
478+
}
479+
453480
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
454481
{
455-
return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
482+
return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
456483
}
457484

458485
static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
459486
{
460-
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
487+
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED);
461488
kvm_make_request(KVM_REQ_SUSPEND, vcpu);
462489
kvm_vcpu_kick(vcpu);
463490
}
464491

465492
static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
466493
{
467-
return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
494+
return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED;
468495
}
469496

470497
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
471498
struct kvm_mp_state *mp_state)
472499
{
473-
*mp_state = vcpu->arch.mp_state;
500+
*mp_state = READ_ONCE(vcpu->arch.mp_state);
474501

475502
return 0;
476503
}
@@ -480,12 +507,14 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
480507
{
481508
int ret = 0;
482509

510+
spin_lock(&vcpu->arch.mp_state_lock);
511+
483512
switch (mp_state->mp_state) {
484513
case KVM_MP_STATE_RUNNABLE:
485-
vcpu->arch.mp_state = *mp_state;
514+
WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
486515
break;
487516
case KVM_MP_STATE_STOPPED:
488-
kvm_arm_vcpu_power_off(vcpu);
517+
__kvm_arm_vcpu_power_off(vcpu);
489518
break;
490519
case KVM_MP_STATE_SUSPENDED:
491520
kvm_arm_vcpu_suspend(vcpu);
@@ -494,6 +523,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
494523
ret = -EINVAL;
495524
}
496525

526+
spin_unlock(&vcpu->arch.mp_state_lock);
527+
497528
return ret;
498529
}
499530

@@ -593,9 +624,9 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
593624
if (kvm_vm_is_protected(kvm))
594625
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
595626

596-
mutex_lock(&kvm->lock);
627+
mutex_lock(&kvm->arch.config_lock);
597628
set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
598-
mutex_unlock(&kvm->lock);
629+
mutex_unlock(&kvm->arch.config_lock);
599630

600631
return ret;
601632
}
@@ -1210,10 +1241,14 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
12101241
/*
12111242
* Handle the "start in power-off" case.
12121243
*/
1244+
spin_lock(&vcpu->arch.mp_state_lock);
1245+
12131246
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
1214-
kvm_arm_vcpu_power_off(vcpu);
1247+
__kvm_arm_vcpu_power_off(vcpu);
12151248
else
1216-
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
1249+
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
1250+
1251+
spin_unlock(&vcpu->arch.mp_state_lock);
12171252

12181253
return 0;
12191254
}

arch/arm64/kvm/guest.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
957957

958958
switch (attr->group) {
959959
case KVM_ARM_VCPU_PMU_V3_CTRL:
960+
mutex_lock(&vcpu->kvm->arch.config_lock);
960961
ret = kvm_arm_pmu_v3_set_attr(vcpu, attr);
962+
mutex_unlock(&vcpu->kvm->arch.config_lock);
961963
break;
962964
case KVM_ARM_VCPU_TIMER_CTRL:
963965
ret = kvm_arm_timer_set_attr(vcpu, attr);

arch/arm64/kvm/hypercalls.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
377377
if (val & ~fw_reg_features)
378378
return -EINVAL;
379379

380-
mutex_lock(&kvm->lock);
380+
mutex_lock(&kvm->arch.config_lock);
381381

382382
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
383383
val != *fw_reg_bmap) {
@@ -387,7 +387,7 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val)
387387

388388
WRITE_ONCE(*fw_reg_bmap, val);
389389
out:
390-
mutex_unlock(&kvm->lock);
390+
mutex_unlock(&kvm->arch.config_lock);
391391
return ret;
392392
}
393393

arch/arm64/kvm/pmu-emul.c

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,7 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
874874
struct arm_pmu *arm_pmu;
875875
int ret = -ENXIO;
876876

877-
mutex_lock(&kvm->lock);
877+
lockdep_assert_held(&kvm->arch.config_lock);
878878
mutex_lock(&arm_pmus_lock);
879879

880880
list_for_each_entry(entry, &arm_pmus, entry) {
@@ -894,30 +894,27 @@ static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id)
894894
}
895895

896896
mutex_unlock(&arm_pmus_lock);
897-
mutex_unlock(&kvm->lock);
898897
return ret;
899898
}
900899

901900
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
902901
{
903902
struct kvm *kvm = vcpu->kvm;
904903

904+
lockdep_assert_held(&kvm->arch.config_lock);
905+
905906
if (!kvm_vcpu_has_pmu(vcpu))
906907
return -ENODEV;
907908

908909
if (vcpu->arch.pmu.created)
909910
return -EBUSY;
910911

911-
mutex_lock(&kvm->lock);
912912
if (!kvm->arch.arm_pmu) {
913913
/* No PMU set, get the default one */
914914
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
915-
if (!kvm->arch.arm_pmu) {
916-
mutex_unlock(&kvm->lock);
915+
if (!kvm->arch.arm_pmu)
917916
return -ENODEV;
918-
}
919917
}
920-
mutex_unlock(&kvm->lock);
921918

922919
switch (attr->attr) {
923920
case KVM_ARM_VCPU_PMU_V3_IRQ: {
@@ -961,19 +958,13 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
961958
filter.action != KVM_PMU_EVENT_DENY))
962959
return -EINVAL;
963960

964-
mutex_lock(&kvm->lock);
965-
966-
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) {
967-
mutex_unlock(&kvm->lock);
961+
if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags))
968962
return -EBUSY;
969-
}
970963

971964
if (!kvm->arch.pmu_filter) {
972965
kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
973-
if (!kvm->arch.pmu_filter) {
974-
mutex_unlock(&kvm->lock);
966+
if (!kvm->arch.pmu_filter)
975967
return -ENOMEM;
976-
}
977968

978969
/*
979970
* The default depends on the first applied filter.
@@ -992,8 +983,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
992983
else
993984
bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
994985

995-
mutex_unlock(&kvm->lock);
996-
997986
return 0;
998987
}
999988
case KVM_ARM_VCPU_PMU_V3_SET_PMU: {

arch/arm64/kvm/psci.c

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
6262
struct vcpu_reset_state *reset_state;
6363
struct kvm *kvm = source_vcpu->kvm;
6464
struct kvm_vcpu *vcpu = NULL;
65+
int ret = PSCI_RET_SUCCESS;
6566
unsigned long cpu_id;
6667

6768
cpu_id = smccc_get_arg1(source_vcpu);
@@ -76,11 +77,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
7677
*/
7778
if (!vcpu)
7879
return PSCI_RET_INVALID_PARAMS;
80+
81+
spin_lock(&vcpu->arch.mp_state_lock);
7982
if (!kvm_arm_vcpu_stopped(vcpu)) {
8083
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
81-
return PSCI_RET_ALREADY_ON;
84+
ret = PSCI_RET_ALREADY_ON;
8285
else
83-
return PSCI_RET_INVALID_PARAMS;
86+
ret = PSCI_RET_INVALID_PARAMS;
87+
88+
goto out_unlock;
8489
}
8590

8691
reset_state = &vcpu->arch.reset_state;
@@ -96,7 +101,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
96101
*/
97102
reset_state->r0 = smccc_get_arg3(source_vcpu);
98103

99-
WRITE_ONCE(reset_state->reset, true);
104+
reset_state->reset = true;
100105
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
101106

102107
/*
@@ -105,10 +110,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
105110
*/
106111
smp_wmb();
107112

108-
vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
113+
WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
109114
kvm_vcpu_wake_up(vcpu);
110115

111-
return PSCI_RET_SUCCESS;
116+
out_unlock:
117+
spin_unlock(&vcpu->arch.mp_state_lock);
118+
return ret;
112119
}
113120

114121
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
@@ -168,8 +175,11 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags)
168175
* after this call is handled and before the VCPUs have been
169176
* re-initialized.
170177
*/
171-
kvm_for_each_vcpu(i, tmp, vcpu->kvm)
172-
tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
178+
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
179+
spin_lock(&tmp->arch.mp_state_lock);
180+
WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
181+
spin_unlock(&tmp->arch.mp_state_lock);
182+
}
173183
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
174184

175185
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
@@ -229,7 +239,6 @@ static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32
229239

230240
static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
231241
{
232-
struct kvm *kvm = vcpu->kvm;
233242
u32 psci_fn = smccc_get_function(vcpu);
234243
unsigned long val;
235244
int ret = 1;
@@ -254,9 +263,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
254263
kvm_psci_narrow_to_32bit(vcpu);
255264
fallthrough;
256265
case PSCI_0_2_FN64_CPU_ON:
257-
mutex_lock(&kvm->lock);
258266
val = kvm_psci_vcpu_on(vcpu);
259-
mutex_unlock(&kvm->lock);
260267
break;
261268
case PSCI_0_2_FN_AFFINITY_INFO:
262269
kvm_psci_narrow_to_32bit(vcpu);
@@ -395,7 +402,6 @@ static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor)
395402

396403
static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
397404
{
398-
struct kvm *kvm = vcpu->kvm;
399405
u32 psci_fn = smccc_get_function(vcpu);
400406
unsigned long val;
401407

@@ -405,9 +411,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
405411
val = PSCI_RET_SUCCESS;
406412
break;
407413
case KVM_PSCI_FN_CPU_ON:
408-
mutex_lock(&kvm->lock);
409414
val = kvm_psci_vcpu_on(vcpu);
410-
mutex_unlock(&kvm->lock);
411415
break;
412416
default:
413417
val = PSCI_RET_NOT_SUPPORTED;

arch/arm64/kvm/reset.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ static int kvm_set_vm_width(struct kvm_vcpu *vcpu)
205205

206206
is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
207207

208-
lockdep_assert_held(&kvm->lock);
208+
lockdep_assert_held(&kvm->arch.config_lock);
209209

210210
if (test_bit(KVM_ARCH_FLAG_REG_WIDTH_CONFIGURED, &kvm->arch.flags)) {
211211
/*
@@ -262,17 +262,18 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
262262
bool loaded;
263263
u32 pstate;
264264

265-
mutex_lock(&vcpu->kvm->lock);
265+
mutex_lock(&vcpu->kvm->arch.config_lock);
266266
ret = kvm_set_vm_width(vcpu);
267-
if (!ret) {
268-
reset_state = vcpu->arch.reset_state;
269-
WRITE_ONCE(vcpu->arch.reset_state.reset, false);
270-
}
271-
mutex_unlock(&vcpu->kvm->lock);
267+
mutex_unlock(&vcpu->kvm->arch.config_lock);
272268

273269
if (ret)
274270
return ret;
275271

272+
spin_lock(&vcpu->arch.mp_state_lock);
273+
reset_state = vcpu->arch.reset_state;
274+
vcpu->arch.reset_state.reset = false;
275+
spin_unlock(&vcpu->arch.mp_state_lock);
276+
276277
/* Reset PMU outside of the non-preemptible section */
277278
kvm_pmu_vcpu_reset(vcpu);
278279

0 commit comments

Comments
 (0)