Skip to content

Commit cf1d88b

Browse files
dwmw2bonzini
authored andcommitted
KVM: Remove dirty handling from gfn_to_pfn_cache completely
It isn't OK to cache the dirty status of a page in internal structures for an indefinite period of time. Any time a vCPU exits the run loop to userspace might be its last; the VMM might do its final check of the dirty log, flush the last remaining dirty pages to the destination and complete a live migration. If we have internal 'dirty' state which doesn't get flushed until the vCPU is finally destroyed on the source after migration is complete, then we have lost data because that will escape the final copy. This problem already exists with the use of kvm_vcpu_unmap() to mark pages dirty in e.g. VMX nesting. Note that the actual Linux MM already considers the page to be dirty since we have a writeable mapping of it. This is just about the KVM dirty logging. For the nesting-style use cases (KVM_GUEST_USES_PFN) we will need to track which gfn_to_pfn_caches have been used and explicitly mark the corresponding pages dirty before returning to userspace. But we would have needed external tracking of that anyway, rather than walking the full list of GPCs to find those belonging to this vCPU which are dirty. So let's rely *solely* on that external tracking, and keep it simple rather than laying a tempting trap for callers to fall into. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20220303154127.202856-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent d0d9612 commit cf1d88b

5 files changed

Lines changed: 19 additions & 46 deletions

File tree

Documentation/virt/kvm/api.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5293,6 +5293,10 @@ type values:
52935293

52945294
KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO
52955295
Sets the guest physical address of the vcpu_info for a given vCPU.
5296+
As with the shared_info page for the VM, the corresponding page may be
5297+
dirtied at any time if event channel interrupt delivery is enabled, so
5298+
userspace should always assume that the page is dirty without relying
5299+
on dirty logging.
52965300

52975301
KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
52985302
Sets the guest physical address of an additional pvclock structure

arch/x86/kvm/xen.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
4040

4141
do {
4242
ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, KVM_HOST_USES_PFN,
43-
gpa, PAGE_SIZE, false);
43+
gpa, PAGE_SIZE);
4444
if (ret)
4545
goto out;
4646

@@ -1025,8 +1025,7 @@ static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm
10251025
break;
10261026

10271027
idx = srcu_read_lock(&kvm->srcu);
1028-
rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa,
1029-
PAGE_SIZE, false);
1028+
rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE);
10301029
srcu_read_unlock(&kvm->srcu, idx);
10311030
} while(!rc);
10321031

include/linux/kvm_host.h

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,7 +1239,6 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
12391239
* by KVM (and thus needs a kernel virtual mapping).
12401240
* @gpa: guest physical address to map.
12411241
* @len: sanity check; the range being access must fit a single page.
1242-
* @dirty: mark the cache dirty immediately.
12431242
*
12441243
* @return: 0 for success.
12451244
* -EINVAL for a mapping which would cross a page boundary.
@@ -1252,7 +1251,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
12521251
*/
12531252
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
12541253
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
1255-
gpa_t gpa, unsigned long len, bool dirty);
1254+
gpa_t gpa, unsigned long len);
12561255

12571256
/**
12581257
* kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
@@ -1261,7 +1260,6 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
12611260
* @gpc: struct gfn_to_pfn_cache object.
12621261
* @gpa: current guest physical address to map.
12631262
* @len: sanity check; the range being access must fit a single page.
1264-
* @dirty: mark the cache dirty immediately.
12651263
*
12661264
* @return: %true if the cache is still valid and the address matches.
12671265
* %false if the cache is not valid.
@@ -1283,7 +1281,6 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
12831281
* @gpc: struct gfn_to_pfn_cache object.
12841282
* @gpa: updated guest physical address to map.
12851283
* @len: sanity check; the range being access must fit a single page.
1286-
* @dirty: mark the cache dirty immediately.
12871284
*
12881285
* @return: 0 for success.
12891286
* -EINVAL for a mapping which would cross a page boundary.
@@ -1296,18 +1293,17 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
12961293
* with the lock still held to permit access.
12971294
*/
12981295
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
1299-
gpa_t gpa, unsigned long len, bool dirty);
1296+
gpa_t gpa, unsigned long len);
13001297

13011298
/**
13021299
* kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
13031300
*
13041301
* @kvm: pointer to kvm instance.
13051302
* @gpc: struct gfn_to_pfn_cache object.
13061303
*
1307-
* This unmaps the referenced page and marks it dirty, if appropriate. The
1308-
* cache is left in the invalid state but at least the mapping from GPA to
1309-
* userspace HVA will remain cached and can be reused on a subsequent
1310-
* refresh.
1304+
* This unmaps the referenced page. The cache is left in the invalid state
1305+
* but at least the mapping from GPA to userspace HVA will remain cached
1306+
* and can be reused on a subsequent refresh.
13111307
*/
13121308
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
13131309

include/linux/kvm_types.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ struct gfn_to_pfn_cache {
7474
enum pfn_cache_usage usage;
7575
bool active;
7676
bool valid;
77-
bool dirty;
7877
};
7978

8079
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE

virt/kvm/pfncache.c

Lines changed: 8 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,6 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
4949
}
5050
__set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
5151
}
52-
53-
/*
54-
* We cannot call mark_page_dirty() from here because
55-
* this physical CPU might not have an active vCPU
56-
* with which to do the KVM dirty tracking.
57-
*
58-
* Neither is there any point in telling the kernel MM
59-
* that the underlying page is dirty. A vCPU in guest
60-
* mode might still be writing to it up to the point
61-
* where we wake them a few lines further down anyway.
62-
*
63-
* So all the dirty marking happens on the unmap.
64-
*/
6552
}
6653
write_unlock_irq(&gpc->lock);
6754
}
@@ -108,8 +95,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
10895
}
10996
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
11097

111-
static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
112-
gpa_t gpa, bool dirty)
98+
static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa)
11399
{
114100
/* Unmap the old page if it was mapped before, and release it */
115101
if (!is_error_noslot_pfn(pfn)) {
@@ -122,9 +108,7 @@ static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
122108
#endif
123109
}
124110

125-
kvm_release_pfn(pfn, dirty);
126-
if (dirty)
127-
mark_page_dirty(kvm, gpa);
111+
kvm_release_pfn(pfn, false);
128112
}
129113
}
130114

@@ -156,15 +140,15 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
156140
}
157141

158142
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
159-
gpa_t gpa, unsigned long len, bool dirty)
143+
gpa_t gpa, unsigned long len)
160144
{
161145
struct kvm_memslots *slots = kvm_memslots(kvm);
162146
unsigned long page_offset = gpa & ~PAGE_MASK;
163147
kvm_pfn_t old_pfn, new_pfn;
164148
unsigned long old_uhva;
165149
gpa_t old_gpa;
166150
void *old_khva;
167-
bool old_valid, old_dirty;
151+
bool old_valid;
168152
int ret = 0;
169153

170154
/*
@@ -181,14 +165,12 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
181165
old_khva = gpc->khva - offset_in_page(gpc->khva);
182166
old_uhva = gpc->uhva;
183167
old_valid = gpc->valid;
184-
old_dirty = gpc->dirty;
185168

186169
/* If the userspace HVA is invalid, refresh that first */
187170
if (gpc->gpa != gpa || gpc->generation != slots->generation ||
188171
kvm_is_error_hva(gpc->uhva)) {
189172
gfn_t gfn = gpa_to_gfn(gpa);
190173

191-
gpc->dirty = false;
192174
gpc->gpa = gpa;
193175
gpc->generation = slots->generation;
194176
gpc->memslot = __gfn_to_memslot(slots, gfn);
@@ -260,14 +242,9 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
260242
}
261243

262244
out:
263-
if (ret)
264-
gpc->dirty = false;
265-
else
266-
gpc->dirty = dirty;
267-
268245
write_unlock_irq(&gpc->lock);
269246

270-
__release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
247+
__release_gpc(kvm, old_pfn, old_khva, old_gpa);
271248

272249
return ret;
273250
}
@@ -277,15 +254,13 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
277254
{
278255
void *old_khva;
279256
kvm_pfn_t old_pfn;
280-
bool old_dirty;
281257
gpa_t old_gpa;
282258

283259
write_lock_irq(&gpc->lock);
284260

285261
gpc->valid = false;
286262

287263
old_khva = gpc->khva - offset_in_page(gpc->khva);
288-
old_dirty = gpc->dirty;
289264
old_gpa = gpc->gpa;
290265
old_pfn = gpc->pfn;
291266

@@ -298,14 +273,14 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
298273

299274
write_unlock_irq(&gpc->lock);
300275

301-
__release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
276+
__release_gpc(kvm, old_pfn, old_khva, old_gpa);
302277
}
303278
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
304279

305280

306281
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
307282
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
308-
gpa_t gpa, unsigned long len, bool dirty)
283+
gpa_t gpa, unsigned long len)
309284
{
310285
WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
311286

@@ -324,7 +299,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
324299
list_add(&gpc->list, &kvm->gpc_list);
325300
spin_unlock(&kvm->gpc_lock);
326301
}
327-
return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len, dirty);
302+
return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
328303
}
329304
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);
330305

0 commit comments

Comments
 (0)