@@ -55,9 +55,6 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
5555 struct kvm_memory_slot * memslot ;
5656 int as_id , id ;
5757
58- if (!mask )
59- return ;
60-
6158 as_id = slot >> 16 ;
6259 id = (u16 )slot ;
6360
@@ -105,19 +102,38 @@ static inline bool kvm_dirty_gfn_harvested(struct kvm_dirty_gfn *gfn)
105102 return smp_load_acquire (& gfn -> flags ) & KVM_DIRTY_GFN_F_RESET ;
106103}
107104
108- int kvm_dirty_ring_reset (struct kvm * kvm , struct kvm_dirty_ring * ring )
105+ int kvm_dirty_ring_reset (struct kvm * kvm , struct kvm_dirty_ring * ring ,
106+ int * nr_entries_reset )
109107{
108+ /*
109+ * To minimize mmu_lock contention, batch resets for harvested entries
110+ * whose gfns are in the same slot, and are within N frame numbers of
111+ * each other, where N is the number of bits in an unsigned long. For
112+ * simplicity, process the current set of entries when the next entry
113+ * can't be included in the batch.
114+ *
115+ * Track the current batch slot, the gfn offset into the slot for the
116+ * batch, and the bitmask of gfns that need to be reset (relative to
117+ * offset). Note, the offset may be adjusted backwards, e.g. so that
118+ * a sequence of gfns X, X-1, ... X-N-1 can be batched.
119+ */
110120 u32 cur_slot , next_slot ;
111121 u64 cur_offset , next_offset ;
112- unsigned long mask ;
113- int count = 0 ;
122+ unsigned long mask = 0 ;
114123 struct kvm_dirty_gfn * entry ;
115- bool first_round = true;
116124
117- /* This is only needed to make compilers happy */
118- cur_slot = cur_offset = mask = 0 ;
125+ /*
126+ * Ensure concurrent calls to KVM_RESET_DIRTY_RINGS are serialized,
127+ * e.g. so that KVM fully resets all entries processed by a given call
128+ * before returning to userspace. Holding slots_lock also protects
129+ * the various memslot accesses.
130+ */
131+ lockdep_assert_held (& kvm -> slots_lock );
132+
133+ while (likely ((* nr_entries_reset ) < INT_MAX )) {
134+ if (signal_pending (current ))
135+ return - EINTR ;
119136
120- while (true) {
121137 entry = & ring -> dirty_gfns [ring -> reset_index & (ring -> size - 1 )];
122138
123139 if (!kvm_dirty_gfn_harvested (entry ))
@@ -130,35 +146,64 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
130146 kvm_dirty_gfn_set_invalid (entry );
131147
132148 ring -> reset_index ++ ;
133- count ++ ;
134- /*
135- * Try to coalesce the reset operations when the guest is
136- * scanning pages in the same slot.
137- */
138- if (!first_round && next_slot == cur_slot ) {
139- s64 delta = next_offset - cur_offset ;
140-
141- if (delta >= 0 && delta < BITS_PER_LONG ) {
142- mask |= 1ull << delta ;
143- continue ;
149+ (* nr_entries_reset )++ ;
150+
151+ if (mask ) {
152+ /*
153+ * While the size of each ring is fixed, it's possible
154+ * for the ring to be constantly re-dirtied/harvested
155+ * while the reset is in-progress (the hard limit exists
156+ * only to guard against the count becoming negative).
157+ */
158+ cond_resched ();
159+
160+ /*
161+ * Try to coalesce the reset operations when the guest
162+ * is scanning pages in the same slot.
163+ */
164+ if (next_slot == cur_slot ) {
165+ s64 delta = next_offset - cur_offset ;
166+
167+ if (delta >= 0 && delta < BITS_PER_LONG ) {
168+ mask |= 1ull << delta ;
169+ continue ;
170+ }
171+
172+ /* Backwards visit, careful about overflows! */
173+ if (delta > - BITS_PER_LONG && delta < 0 &&
174+ (mask << - delta >> - delta ) == mask ) {
175+ cur_offset = next_offset ;
176+ mask = (mask << - delta ) | 1 ;
177+ continue ;
178+ }
144179 }
145180
146- /* Backwards visit, careful about overflows! */
147- if (delta > - BITS_PER_LONG && delta < 0 &&
148- (mask << - delta >> - delta ) == mask ) {
149- cur_offset = next_offset ;
150- mask = (mask << - delta ) | 1 ;
151- continue ;
152- }
181+ /*
182+ * Reset the slot for all the harvested entries that
183+ * have been gathered, but not yet fully processed.
184+ */
185+ kvm_reset_dirty_gfn (kvm , cur_slot , cur_offset , mask );
153186 }
154- kvm_reset_dirty_gfn (kvm , cur_slot , cur_offset , mask );
187+
188+ /*
189+ * The current slot was reset or this is the first harvested
190+ * entry, (re)initialize the metadata.
191+ */
155192 cur_slot = next_slot ;
156193 cur_offset = next_offset ;
157194 mask = 1 ;
158- first_round = false;
159195 }
160196
161- kvm_reset_dirty_gfn (kvm , cur_slot , cur_offset , mask );
197+ /*
198+ * Perform a final reset if there are harvested entries that haven't
199+ * been processed, which is guaranteed if at least one harvested was
200+ * found. The loop only performs a reset when the "next" entry can't
201+ * be batched with the "current" entry(s), and that reset processes the
202+ * _current_ entry(s); i.e. the last harvested entry, a.k.a. next, will
203+ * always be left pending.
204+ */
205+ if (mask )
206+ kvm_reset_dirty_gfn (kvm , cur_slot , cur_offset , mask );
162207
163208 /*
164209 * The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared
@@ -167,7 +212,7 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
167212
168213 trace_kvm_dirty_ring_reset (ring );
169214
170- return count ;
215+ return 0 ;
171216}
172217
173218void kvm_dirty_ring_push (struct kvm_vcpu * vcpu , u32 slot , u64 offset )
0 commit comments