Skip to content

Commit cc5a102

Browse files
committed
Merge tag 'kvm-x86-dirty_ring-6.17' of https://github.com/kvm-x86/linux into HEAD
KVM Dirty Ring changes for 6.17 Fix issues with dirty ring harvesting where KVM doesn't bound the processing of entries in any way, which allows userspace to keep KVM in a tight loop indefinitely. Clean up code and comments along the way.
2 parents d284562 + 614fb9d commit cc5a102

3 files changed

Lines changed: 88 additions & 48 deletions

File tree

include/linux/kvm_dirty_ring.h

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,10 @@ static inline int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *r
4949
}
5050

5151
static inline int kvm_dirty_ring_reset(struct kvm *kvm,
52-
struct kvm_dirty_ring *ring)
52+
struct kvm_dirty_ring *ring,
53+
int *nr_entries_reset)
5354
{
54-
return 0;
55+
return -ENOENT;
5556
}
5657

5758
static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu,
@@ -77,17 +78,8 @@ bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
7778
u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm);
7879
int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring,
7980
int index, u32 size);
80-
81-
/*
82-
* called with kvm->slots_lock held, returns the number of
83-
* processed pages.
84-
*/
85-
int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
86-
87-
/*
88-
* returns =0: successfully pushed
89-
* <0: unable to push, need to wait
90-
*/
81+
int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring,
82+
int *nr_entries_reset);
9183
void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset);
9284

9385
bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu);

virt/kvm/dirty_ring.c

Lines changed: 77 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,6 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
5555
struct kvm_memory_slot *memslot;
5656
int as_id, id;
5757

58-
if (!mask)
59-
return;
60-
6158
as_id = slot >> 16;
6259
id = (u16)slot;
6360

@@ -105,19 +102,38 @@ static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn)
105102
return smp_load_acquire(&gfn->flags) & KVM_DIRTY_GFN_F_RESET;
106103
}
107104

108-
int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
105+
int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring,
106+
int *nr_entries_reset)
109107
{
108+
/*
109+
* To minimize mmu_lock contention, batch resets for harvested entries
110+
* whose gfns are in the same slot, and are within N frame numbers of
111+
* each other, where N is the number of bits in an unsigned long. For
112+
* simplicity, process the current set of entries when the next entry
113+
* can't be included in the batch.
114+
*
115+
* Track the current batch slot, the gfn offset into the slot for the
116+
* batch, and the bitmask of gfns that need to be reset (relative to
117+
* offset). Note, the offset may be adjusted backwards, e.g. so that
118+
* a sequence of gfns X, X-1, ... X-N-1 can be batched.
119+
*/
110120
u32 cur_slot, next_slot;
111121
u64 cur_offset, next_offset;
112-
unsigned long mask;
113-
int count = 0;
122+
unsigned long mask = 0;
114123
struct kvm_dirty_gfn *entry;
115-
bool first_round = true;
116124

117-
/* This is only needed to make compilers happy */
118-
cur_slot = cur_offset = mask = 0;
125+
/*
126+
* Ensure concurrent calls to KVM_RESET_DIRTY_RINGS are serialized,
127+
* e.g. so that KVM fully resets all entries processed by a given call
128+
* before returning to userspace. Holding slots_lock also protects
129+
* the various memslot accesses.
130+
*/
131+
lockdep_assert_held(&kvm->slots_lock);
132+
133+
while (likely((*nr_entries_reset) < INT_MAX)) {
134+
if (signal_pending(current))
135+
return -EINTR;
119136

120-
while (true) {
121137
entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)];
122138

123139
if (!kvm_dirty_gfn_harvested(entry))
@@ -130,35 +146,64 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
130146
kvm_dirty_gfn_set_invalid(entry);
131147

132148
ring->reset_index++;
133-
count++;
134-
/*
135-
* Try to coalesce the reset operations when the guest is
136-
* scanning pages in the same slot.
137-
*/
138-
if (!first_round && next_slot == cur_slot) {
139-
s64 delta = next_offset - cur_offset;
140-
141-
if (delta >= 0 && delta < BITS_PER_LONG) {
142-
mask |= 1ull << delta;
143-
continue;
149+
(*nr_entries_reset)++;
150+
151+
if (mask) {
152+
/*
153+
* While the size of each ring is fixed, it's possible
154+
* for the ring to be constantly re-dirtied/harvested
155+
* while the reset is in-progress (the hard limit exists
156+
* only to guard against the count becoming negative).
157+
*/
158+
cond_resched();
159+
160+
/*
161+
* Try to coalesce the reset operations when the guest
162+
* is scanning pages in the same slot.
163+
*/
164+
if (next_slot == cur_slot) {
165+
s64 delta = next_offset - cur_offset;
166+
167+
if (delta >= 0 && delta < BITS_PER_LONG) {
168+
mask |= 1ull << delta;
169+
continue;
170+
}
171+
172+
/* Backwards visit, careful about overflows! */
173+
if (delta > -BITS_PER_LONG && delta < 0 &&
174+
(mask << -delta >> -delta) == mask) {
175+
cur_offset = next_offset;
176+
mask = (mask << -delta) | 1;
177+
continue;
178+
}
144179
}
145180

146-
/* Backwards visit, careful about overflows! */
147-
if (delta > -BITS_PER_LONG && delta < 0 &&
148-
(mask << -delta >> -delta) == mask) {
149-
cur_offset = next_offset;
150-
mask = (mask << -delta) | 1;
151-
continue;
152-
}
181+
/*
182+
* Reset the slot for all the harvested entries that
183+
* have been gathered, but not yet fully processed.
184+
*/
185+
kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
153186
}
154-
kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
187+
188+
/*
189+
* The current slot was reset or this is the first harvested
190+
* entry, (re)initialize the metadata.
191+
*/
155192
cur_slot = next_slot;
156193
cur_offset = next_offset;
157194
mask = 1;
158-
first_round = false;
159195
}
160196

161-
kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
197+
/*
198+
* Perform a final reset if there are harvested entries that haven't
199+
* been processed, which is guaranteed if at least one harvested was
200+
* found. The loop only performs a reset when the "next" entry can't
201+
* be batched with the "current" entry(s), and that reset processes the
202+
* _current_ entry(s); i.e. the last harvested entry, a.k.a. next, will
203+
* always be left pending.
204+
*/
205+
if (mask)
206+
kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
162207

163208
/*
164209
* The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared
@@ -167,7 +212,7 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
167212

168213
trace_kvm_dirty_ring_reset(ring);
169214

170-
return count;
215+
return 0;
171216
}
172217

173218
void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset)

virt/kvm/kvm_main.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4967,15 +4967,18 @@ static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
49674967
{
49684968
unsigned long i;
49694969
struct kvm_vcpu *vcpu;
4970-
int cleared = 0;
4970+
int cleared = 0, r;
49714971

49724972
if (!kvm->dirty_ring_size)
49734973
return -EINVAL;
49744974

49754975
mutex_lock(&kvm->slots_lock);
49764976

4977-
kvm_for_each_vcpu(i, vcpu, kvm)
4978-
cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring);
4977+
kvm_for_each_vcpu(i, vcpu, kvm) {
4978+
r = kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring, &cleared);
4979+
if (r)
4980+
break;
4981+
}
49794982

49804983
mutex_unlock(&kvm->slots_lock);
49814984

0 commit comments

Comments
 (0)