@@ -70,7 +70,7 @@ pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio,
7070 kfree (region_info );
7171}
7272
73- static int pds_vfio_dirty_alloc_bitmaps (struct pds_vfio_dirty * dirty ,
73+ static int pds_vfio_dirty_alloc_bitmaps (struct pds_vfio_region * region ,
7474 unsigned long bytes )
7575{
7676 unsigned long * host_seq_bmp , * host_ack_bmp ;
@@ -85,20 +85,27 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty,
8585 return - ENOMEM ;
8686 }
8787
88- dirty -> region . host_seq = host_seq_bmp ;
89- dirty -> region . host_ack = host_ack_bmp ;
90- dirty -> region . bmp_bytes = bytes ;
88+ region -> host_seq = host_seq_bmp ;
89+ region -> host_ack = host_ack_bmp ;
90+ region -> bmp_bytes = bytes ;
9191
9292 return 0 ;
9393}
9494
9595static void pds_vfio_dirty_free_bitmaps (struct pds_vfio_dirty * dirty )
9696{
97- vfree (dirty -> region .host_seq );
98- vfree (dirty -> region .host_ack );
99- dirty -> region .host_seq = NULL ;
100- dirty -> region .host_ack = NULL ;
101- dirty -> region .bmp_bytes = 0 ;
97+ if (!dirty -> regions )
98+ return ;
99+
100+ for (int i = 0 ; i < dirty -> num_regions ; i ++ ) {
101+ struct pds_vfio_region * region = & dirty -> regions [i ];
102+
103+ vfree (region -> host_seq );
104+ vfree (region -> host_ack );
105+ region -> host_seq = NULL ;
106+ region -> host_ack = NULL ;
107+ region -> bmp_bytes = 0 ;
108+ }
102109}
103110
104111static void __pds_vfio_dirty_free_sgl (struct pds_vfio_pci_device * pds_vfio ,
@@ -119,10 +126,17 @@ static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio,
119126
120127static void pds_vfio_dirty_free_sgl (struct pds_vfio_pci_device * pds_vfio )
121128{
122- struct pds_vfio_region * region = & pds_vfio -> dirty . region ;
129+ struct pds_vfio_dirty * dirty = & pds_vfio -> dirty ;
123130
124- if (region -> sgl )
125- __pds_vfio_dirty_free_sgl (pds_vfio , region );
131+ if (!dirty -> regions )
132+ return ;
133+
134+ for (int i = 0 ; i < dirty -> num_regions ; i ++ ) {
135+ struct pds_vfio_region * region = & dirty -> regions [i ];
136+
137+ if (region -> sgl )
138+ __pds_vfio_dirty_free_sgl (pds_vfio , region );
139+ }
126140}
127141
128142static int pds_vfio_dirty_alloc_sgl (struct pds_vfio_pci_device * pds_vfio ,
@@ -156,22 +170,90 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio,
156170 return 0 ;
157171}
158172
173+ static void pds_vfio_dirty_free_regions (struct pds_vfio_dirty * dirty )
174+ {
175+ vfree (dirty -> regions );
176+ dirty -> regions = NULL ;
177+ dirty -> num_regions = 0 ;
178+ }
179+
180+ static int pds_vfio_dirty_alloc_regions (struct pds_vfio_pci_device * pds_vfio ,
181+ struct pds_lm_dirty_region_info * region_info ,
182+ u64 region_page_size , u8 num_regions )
183+ {
184+ struct pci_dev * pdev = pds_vfio -> vfio_coredev .pdev ;
185+ struct pds_vfio_dirty * dirty = & pds_vfio -> dirty ;
186+ u32 dev_bmp_offset_byte = 0 ;
187+ int err ;
188+
189+ dirty -> regions = vcalloc (num_regions , sizeof (struct pds_vfio_region ));
190+ if (!dirty -> regions )
191+ return - ENOMEM ;
192+ dirty -> num_regions = num_regions ;
193+
194+ for (int i = 0 ; i < num_regions ; i ++ ) {
195+ struct pds_lm_dirty_region_info * ri = & region_info [i ];
196+ struct pds_vfio_region * region = & dirty -> regions [i ];
197+ u64 region_size , region_start ;
198+ u32 page_count ;
199+
200+ /* page_count might be adjusted by the device */
201+ page_count = le32_to_cpu (ri -> page_count );
202+ region_start = le64_to_cpu (ri -> dma_base );
203+ region_size = page_count * region_page_size ;
204+
205+ err = pds_vfio_dirty_alloc_bitmaps (region ,
206+ page_count / BITS_PER_BYTE );
207+ if (err ) {
208+ dev_err (& pdev -> dev , "Failed to alloc dirty bitmaps: %pe\n" ,
209+ ERR_PTR (err ));
210+ goto out_free_regions ;
211+ }
212+
213+ err = pds_vfio_dirty_alloc_sgl (pds_vfio , region , page_count );
214+ if (err ) {
215+ dev_err (& pdev -> dev , "Failed to alloc dirty sg lists: %pe\n" ,
216+ ERR_PTR (err ));
217+ goto out_free_regions ;
218+ }
219+
220+ region -> size = region_size ;
221+ region -> start = region_start ;
222+ region -> page_size = region_page_size ;
223+ region -> dev_bmp_offset_start_byte = dev_bmp_offset_byte ;
224+
225+ dev_bmp_offset_byte += page_count / BITS_PER_BYTE ;
226+ if (dev_bmp_offset_byte % BITS_PER_BYTE ) {
227+ dev_err (& pdev -> dev , "Device bitmap offset is mis-aligned\n" );
228+ err = - EINVAL ;
229+ goto out_free_regions ;
230+ }
231+ }
232+
233+ return 0 ;
234+
235+ out_free_regions :
236+ pds_vfio_dirty_free_bitmaps (dirty );
237+ pds_vfio_dirty_free_sgl (pds_vfio );
238+ pds_vfio_dirty_free_regions (dirty );
239+
240+ return err ;
241+ }
242+
159243static int pds_vfio_dirty_enable (struct pds_vfio_pci_device * pds_vfio ,
160244 struct rb_root_cached * ranges , u32 nnodes ,
161245 u64 * page_size )
162246{
163247 struct pci_dev * pdev = pds_vfio -> vfio_coredev .pdev ;
164248 struct device * pdsc_dev = & pci_physfn (pdev )-> dev ;
165- struct pds_vfio_dirty * dirty = & pds_vfio -> dirty ;
166- u64 region_start , region_size , region_page_size ;
167249 struct pds_lm_dirty_region_info * region_info ;
168250 struct interval_tree_node * node = NULL ;
251+ u64 region_page_size = * page_size ;
169252 u8 max_regions = 0 , num_regions ;
170253 dma_addr_t regions_dma = 0 ;
171254 u32 num_ranges = nnodes ;
172- u32 page_count ;
173- u16 len ;
174255 int err ;
256+ u16 len ;
175257
176258 dev_dbg (& pdev -> dev , "vf%u: Start dirty page tracking\n" ,
177259 pds_vfio -> vf_id );
@@ -198,39 +280,38 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
198280 return - EOPNOTSUPP ;
199281 }
200282
201- /*
202- * Only support 1 region for now. If there are any large gaps in the
203- * VM's address regions, then this would be a waste of memory as we are
204- * generating 2 bitmaps (ack/seq) from the min address to the max
205- * address of the VM's address regions. In the future, if we support
206- * more than one region in the device/driver we can split the bitmaps
207- * on the largest address region gaps. We can do this split up to the
208- * max_regions times returned from the dirty_status command.
209- */
210- max_regions = 1 ;
211283 if (num_ranges > max_regions ) {
212284 vfio_combine_iova_ranges (ranges , nnodes , max_regions );
213285 num_ranges = max_regions ;
214286 }
215287
288+ region_info = kcalloc (num_ranges , sizeof (* region_info ), GFP_KERNEL );
289+ if (!region_info )
290+ return - ENOMEM ;
291+ len = num_ranges * sizeof (* region_info );
292+
216293 node = interval_tree_iter_first (ranges , 0 , ULONG_MAX );
217294 if (!node )
218295 return - EINVAL ;
296+ for (int i = 0 ; i < num_ranges ; i ++ ) {
297+ struct pds_lm_dirty_region_info * ri = & region_info [i ];
298+ u64 region_size = node -> last - node -> start + 1 ;
299+ u64 region_start = node -> start ;
300+ u32 page_count ;
219301
220- region_size = node -> last - node -> start + 1 ;
221- region_start = node -> start ;
222- region_page_size = * page_size ;
302+ page_count = DIV_ROUND_UP (region_size , region_page_size );
223303
224- len = sizeof (* region_info );
225- region_info = kzalloc (len , GFP_KERNEL );
226- if (!region_info )
227- return - ENOMEM ;
304+ ri -> dma_base = cpu_to_le64 (region_start );
305+ ri -> page_count = cpu_to_le32 (page_count );
306+ ri -> page_size_log2 = ilog2 (region_page_size );
228307
229- page_count = DIV_ROUND_UP (region_size , region_page_size );
308+ dev_dbg (& pdev -> dev ,
309+ "region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n" ,
310+ i , region_start , node -> last , region_size , page_count ,
311+ region_page_size );
230312
231- region_info -> dma_base = cpu_to_le64 (region_start );
232- region_info -> page_count = cpu_to_le32 (page_count );
233- region_info -> page_size_log2 = ilog2 (region_page_size );
313+ node = interval_tree_iter_next (node , 0 , ULONG_MAX );
314+ }
234315
235316 regions_dma = dma_map_single (pdsc_dev , (void * )region_info , len ,
236317 DMA_BIDIRECTIONAL );
@@ -239,39 +320,20 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
239320 goto out_free_region_info ;
240321 }
241322
242- err = pds_vfio_dirty_enable_cmd (pds_vfio , regions_dma , max_regions );
323+ err = pds_vfio_dirty_enable_cmd (pds_vfio , regions_dma , num_ranges );
243324 dma_unmap_single (pdsc_dev , regions_dma , len , DMA_BIDIRECTIONAL );
244325 if (err )
245326 goto out_free_region_info ;
246327
247- /*
248- * page_count might be adjusted by the device,
249- * update it before freeing region_info DMA
250- */
251- page_count = le32_to_cpu (region_info -> page_count );
252-
253- dev_dbg (& pdev -> dev ,
254- "region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n" ,
255- regions_dma , region_start , page_count ,
256- (u8 )ilog2 (region_page_size ));
257-
258- err = pds_vfio_dirty_alloc_bitmaps (dirty , page_count / BITS_PER_BYTE );
259- if (err ) {
260- dev_err (& pdev -> dev , "Failed to alloc dirty bitmaps: %pe\n" ,
261- ERR_PTR (err ));
262- goto out_free_region_info ;
263- }
264-
265- err = pds_vfio_dirty_alloc_sgl (pds_vfio , & dirty -> region , page_count );
328+ err = pds_vfio_dirty_alloc_regions (pds_vfio , region_info ,
329+ region_page_size , num_ranges );
266330 if (err ) {
267- dev_err (& pdev -> dev , "Failed to alloc dirty sg lists: %pe\n" ,
268- ERR_PTR (err ));
269- goto out_free_bitmaps ;
331+ dev_err (& pdev -> dev ,
332+ "Failed to allocate %d regions for tracking dirty regions: %pe\n" ,
333+ num_regions , ERR_PTR (err ));
334+ goto out_dirty_disable ;
270335 }
271336
272- dirty -> region .start = region_start ;
273- dirty -> region .size = region_size ;
274- dirty -> region .page_size = region_page_size ;
275337 pds_vfio_dirty_set_enabled (pds_vfio );
276338
277339 pds_vfio_print_guest_region_info (pds_vfio , max_regions );
@@ -280,8 +342,8 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
280342
281343 return 0 ;
282344
283- out_free_bitmaps :
284- pds_vfio_dirty_free_bitmaps ( dirty );
345+ out_dirty_disable :
346+ pds_vfio_dirty_disable_cmd ( pds_vfio );
285347out_free_region_info :
286348 kfree (region_info );
287349 return err ;
@@ -295,6 +357,7 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd)
295357 pds_vfio_dirty_disable_cmd (pds_vfio );
296358 pds_vfio_dirty_free_sgl (pds_vfio );
297359 pds_vfio_dirty_free_bitmaps (& pds_vfio -> dirty );
360+ pds_vfio_dirty_free_regions (& pds_vfio -> dirty );
298361 }
299362
300363 if (send_cmd )
@@ -365,6 +428,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio,
365428
366429 num_sge = sg_table .nents ;
367430 size = num_sge * sizeof (struct pds_lm_sg_elem );
431+ offset += region -> dev_bmp_offset_start_byte ;
368432 dma_sync_single_for_device (pdsc_dev , region -> sgl_addr , size , dma_dir );
369433 err = pds_vfio_dirty_seq_ack_cmd (pds_vfio , region -> sgl_addr , num_sge ,
370434 offset , bmp_bytes , read_seq );
@@ -437,13 +501,28 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio,
437501 return 0 ;
438502}
439503
504+ static struct pds_vfio_region *
505+ pds_vfio_get_region (struct pds_vfio_pci_device * pds_vfio , unsigned long iova )
506+ {
507+ struct pds_vfio_dirty * dirty = & pds_vfio -> dirty ;
508+
509+ for (int i = 0 ; i < dirty -> num_regions ; i ++ ) {
510+ struct pds_vfio_region * region = & dirty -> regions [i ];
511+
512+ if (iova >= region -> start &&
513+ iova < (region -> start + region -> size ))
514+ return region ;
515+ }
516+
517+ return NULL ;
518+ }
519+
440520static int pds_vfio_dirty_sync (struct pds_vfio_pci_device * pds_vfio ,
441521 struct iova_bitmap * dirty_bitmap ,
442522 unsigned long iova , unsigned long length )
443523{
444524 struct device * dev = & pds_vfio -> vfio_coredev .pdev -> dev ;
445- struct pds_vfio_dirty * dirty = & pds_vfio -> dirty ;
446- struct pds_vfio_region * region = & dirty -> region ;
525+ struct pds_vfio_region * region ;
447526 u64 bmp_offset , bmp_bytes ;
448527 u64 bitmap_size , pages ;
449528 int err ;
@@ -456,6 +535,13 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio,
456535 return - EINVAL ;
457536 }
458537
538+ region = pds_vfio_get_region (pds_vfio , iova );
539+ if (!region ) {
540+ dev_err (dev , "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n" ,
541+ pds_vfio -> vf_id , iova , length );
542+ return - EINVAL ;
543+ }
544+
459545 pages = DIV_ROUND_UP (length , region -> page_size );
460546 bitmap_size =
461547 round_up (pages , sizeof (u64 ) * BITS_PER_BYTE ) / BITS_PER_BYTE ;
0 commit comments