Skip to content

Commit 2e7c6fe

Browse files
brettcreeleyAlex Williamson
authored andcommitted
vfio/pds: Add multi-region support
Only supporting a single region/range is limiting, wasteful, and in some cases broken (i.e. when there are large gaps in the iova memory ranges). Fix this by adding support for multiple regions based on what the device tells the driver it can support. Signed-off-by: Brett Creeley <brett.creeley@amd.com> Signed-off-by: Shannon Nelson <shannon.nelson@amd.com> Link: https://lore.kernel.org/r/20231117001207.2793-7-brett.creeley@amd.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent 0c320f2 commit 2e7c6fe

2 files changed

Lines changed: 156 additions & 68 deletions

File tree

drivers/vfio/pci/pds/dirty.c

Lines changed: 153 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ pds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio,
7070
kfree(region_info);
7171
}
7272

73-
static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty,
73+
static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_region *region,
7474
unsigned long bytes)
7575
{
7676
unsigned long *host_seq_bmp, *host_ack_bmp;
@@ -85,20 +85,27 @@ static int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty,
8585
return -ENOMEM;
8686
}
8787

88-
dirty->region.host_seq = host_seq_bmp;
89-
dirty->region.host_ack = host_ack_bmp;
90-
dirty->region.bmp_bytes = bytes;
88+
region->host_seq = host_seq_bmp;
89+
region->host_ack = host_ack_bmp;
90+
region->bmp_bytes = bytes;
9191

9292
return 0;
9393
}
9494

9595
static void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty)
9696
{
97-
vfree(dirty->region.host_seq);
98-
vfree(dirty->region.host_ack);
99-
dirty->region.host_seq = NULL;
100-
dirty->region.host_ack = NULL;
101-
dirty->region.bmp_bytes = 0;
97+
if (!dirty->regions)
98+
return;
99+
100+
for (int i = 0; i < dirty->num_regions; i++) {
101+
struct pds_vfio_region *region = &dirty->regions[i];
102+
103+
vfree(region->host_seq);
104+
vfree(region->host_ack);
105+
region->host_seq = NULL;
106+
region->host_ack = NULL;
107+
region->bmp_bytes = 0;
108+
}
102109
}
103110

104111
static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio,
@@ -119,10 +126,17 @@ static void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio,
119126

120127
static void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio)
121128
{
122-
struct pds_vfio_region *region = &pds_vfio->dirty.region;
129+
struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
123130

124-
if (region->sgl)
125-
__pds_vfio_dirty_free_sgl(pds_vfio, region);
131+
if (!dirty->regions)
132+
return;
133+
134+
for (int i = 0; i < dirty->num_regions; i++) {
135+
struct pds_vfio_region *region = &dirty->regions[i];
136+
137+
if (region->sgl)
138+
__pds_vfio_dirty_free_sgl(pds_vfio, region);
139+
}
126140
}
127141

128142
static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio,
@@ -156,22 +170,90 @@ static int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio,
156170
return 0;
157171
}
158172

173+
static void pds_vfio_dirty_free_regions(struct pds_vfio_dirty *dirty)
174+
{
175+
vfree(dirty->regions);
176+
dirty->regions = NULL;
177+
dirty->num_regions = 0;
178+
}
179+
180+
static int pds_vfio_dirty_alloc_regions(struct pds_vfio_pci_device *pds_vfio,
181+
struct pds_lm_dirty_region_info *region_info,
182+
u64 region_page_size, u8 num_regions)
183+
{
184+
struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
185+
struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
186+
u32 dev_bmp_offset_byte = 0;
187+
int err;
188+
189+
dirty->regions = vcalloc(num_regions, sizeof(struct pds_vfio_region));
190+
if (!dirty->regions)
191+
return -ENOMEM;
192+
dirty->num_regions = num_regions;
193+
194+
for (int i = 0; i < num_regions; i++) {
195+
struct pds_lm_dirty_region_info *ri = &region_info[i];
196+
struct pds_vfio_region *region = &dirty->regions[i];
197+
u64 region_size, region_start;
198+
u32 page_count;
199+
200+
/* page_count might be adjusted by the device */
201+
page_count = le32_to_cpu(ri->page_count);
202+
region_start = le64_to_cpu(ri->dma_base);
203+
region_size = page_count * region_page_size;
204+
205+
err = pds_vfio_dirty_alloc_bitmaps(region,
206+
page_count / BITS_PER_BYTE);
207+
if (err) {
208+
dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n",
209+
ERR_PTR(err));
210+
goto out_free_regions;
211+
}
212+
213+
err = pds_vfio_dirty_alloc_sgl(pds_vfio, region, page_count);
214+
if (err) {
215+
dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n",
216+
ERR_PTR(err));
217+
goto out_free_regions;
218+
}
219+
220+
region->size = region_size;
221+
region->start = region_start;
222+
region->page_size = region_page_size;
223+
region->dev_bmp_offset_start_byte = dev_bmp_offset_byte;
224+
225+
dev_bmp_offset_byte += page_count / BITS_PER_BYTE;
226+
if (dev_bmp_offset_byte % BITS_PER_BYTE) {
227+
dev_err(&pdev->dev, "Device bitmap offset is mis-aligned\n");
228+
err = -EINVAL;
229+
goto out_free_regions;
230+
}
231+
}
232+
233+
return 0;
234+
235+
out_free_regions:
236+
pds_vfio_dirty_free_bitmaps(dirty);
237+
pds_vfio_dirty_free_sgl(pds_vfio);
238+
pds_vfio_dirty_free_regions(dirty);
239+
240+
return err;
241+
}
242+
159243
static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
160244
struct rb_root_cached *ranges, u32 nnodes,
161245
u64 *page_size)
162246
{
163247
struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
164248
struct device *pdsc_dev = &pci_physfn(pdev)->dev;
165-
struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
166-
u64 region_start, region_size, region_page_size;
167249
struct pds_lm_dirty_region_info *region_info;
168250
struct interval_tree_node *node = NULL;
251+
u64 region_page_size = *page_size;
169252
u8 max_regions = 0, num_regions;
170253
dma_addr_t regions_dma = 0;
171254
u32 num_ranges = nnodes;
172-
u32 page_count;
173-
u16 len;
174255
int err;
256+
u16 len;
175257

176258
dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n",
177259
pds_vfio->vf_id);
@@ -198,39 +280,38 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
198280
return -EOPNOTSUPP;
199281
}
200282

201-
/*
202-
* Only support 1 region for now. If there are any large gaps in the
203-
* VM's address regions, then this would be a waste of memory as we are
204-
* generating 2 bitmaps (ack/seq) from the min address to the max
205-
* address of the VM's address regions. In the future, if we support
206-
* more than one region in the device/driver we can split the bitmaps
207-
* on the largest address region gaps. We can do this split up to the
208-
* max_regions times returned from the dirty_status command.
209-
*/
210-
max_regions = 1;
211283
if (num_ranges > max_regions) {
212284
vfio_combine_iova_ranges(ranges, nnodes, max_regions);
213285
num_ranges = max_regions;
214286
}
215287

288+
region_info = kcalloc(num_ranges, sizeof(*region_info), GFP_KERNEL);
289+
if (!region_info)
290+
return -ENOMEM;
291+
len = num_ranges * sizeof(*region_info);
292+
216293
node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
217294
if (!node)
218295
return -EINVAL;
296+
for (int i = 0; i < num_ranges; i++) {
297+
struct pds_lm_dirty_region_info *ri = &region_info[i];
298+
u64 region_size = node->last - node->start + 1;
299+
u64 region_start = node->start;
300+
u32 page_count;
219301

220-
region_size = node->last - node->start + 1;
221-
region_start = node->start;
222-
region_page_size = *page_size;
302+
page_count = DIV_ROUND_UP(region_size, region_page_size);
223303

224-
len = sizeof(*region_info);
225-
region_info = kzalloc(len, GFP_KERNEL);
226-
if (!region_info)
227-
return -ENOMEM;
304+
ri->dma_base = cpu_to_le64(region_start);
305+
ri->page_count = cpu_to_le32(page_count);
306+
ri->page_size_log2 = ilog2(region_page_size);
228307

229-
page_count = DIV_ROUND_UP(region_size, region_page_size);
308+
dev_dbg(&pdev->dev,
309+
"region_info[%d]: region_start 0x%llx region_end 0x%lx region_size 0x%llx page_count %u page_size %llu\n",
310+
i, region_start, node->last, region_size, page_count,
311+
region_page_size);
230312

231-
region_info->dma_base = cpu_to_le64(region_start);
232-
region_info->page_count = cpu_to_le32(page_count);
233-
region_info->page_size_log2 = ilog2(region_page_size);
313+
node = interval_tree_iter_next(node, 0, ULONG_MAX);
314+
}
234315

235316
regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len,
236317
DMA_BIDIRECTIONAL);
@@ -239,39 +320,20 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
239320
goto out_free_region_info;
240321
}
241322

242-
err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions);
323+
err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, num_ranges);
243324
dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL);
244325
if (err)
245326
goto out_free_region_info;
246327

247-
/*
248-
* page_count might be adjusted by the device,
249-
* update it before freeing region_info DMA
250-
*/
251-
page_count = le32_to_cpu(region_info->page_count);
252-
253-
dev_dbg(&pdev->dev,
254-
"region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n",
255-
regions_dma, region_start, page_count,
256-
(u8)ilog2(region_page_size));
257-
258-
err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE);
259-
if (err) {
260-
dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n",
261-
ERR_PTR(err));
262-
goto out_free_region_info;
263-
}
264-
265-
err = pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->region, page_count);
328+
err = pds_vfio_dirty_alloc_regions(pds_vfio, region_info,
329+
region_page_size, num_ranges);
266330
if (err) {
267-
dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n",
268-
ERR_PTR(err));
269-
goto out_free_bitmaps;
331+
dev_err(&pdev->dev,
332+
"Failed to allocate %d regions for tracking dirty regions: %pe\n",
333+
num_regions, ERR_PTR(err));
334+
goto out_dirty_disable;
270335
}
271336

272-
dirty->region.start = region_start;
273-
dirty->region.size = region_size;
274-
dirty->region.page_size = region_page_size;
275337
pds_vfio_dirty_set_enabled(pds_vfio);
276338

277339
pds_vfio_print_guest_region_info(pds_vfio, max_regions);
@@ -280,8 +342,8 @@ static int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
280342

281343
return 0;
282344

283-
out_free_bitmaps:
284-
pds_vfio_dirty_free_bitmaps(dirty);
345+
out_dirty_disable:
346+
pds_vfio_dirty_disable_cmd(pds_vfio);
285347
out_free_region_info:
286348
kfree(region_info);
287349
return err;
@@ -295,6 +357,7 @@ void pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd)
295357
pds_vfio_dirty_disable_cmd(pds_vfio);
296358
pds_vfio_dirty_free_sgl(pds_vfio);
297359
pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty);
360+
pds_vfio_dirty_free_regions(&pds_vfio->dirty);
298361
}
299362

300363
if (send_cmd)
@@ -365,6 +428,7 @@ static int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio,
365428

366429
num_sge = sg_table.nents;
367430
size = num_sge * sizeof(struct pds_lm_sg_elem);
431+
offset += region->dev_bmp_offset_start_byte;
368432
dma_sync_single_for_device(pdsc_dev, region->sgl_addr, size, dma_dir);
369433
err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, region->sgl_addr, num_sge,
370434
offset, bmp_bytes, read_seq);
@@ -437,13 +501,28 @@ static int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio,
437501
return 0;
438502
}
439503

504+
static struct pds_vfio_region *
505+
pds_vfio_get_region(struct pds_vfio_pci_device *pds_vfio, unsigned long iova)
506+
{
507+
struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
508+
509+
for (int i = 0; i < dirty->num_regions; i++) {
510+
struct pds_vfio_region *region = &dirty->regions[i];
511+
512+
if (iova >= region->start &&
513+
iova < (region->start + region->size))
514+
return region;
515+
}
516+
517+
return NULL;
518+
}
519+
440520
static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio,
441521
struct iova_bitmap *dirty_bitmap,
442522
unsigned long iova, unsigned long length)
443523
{
444524
struct device *dev = &pds_vfio->vfio_coredev.pdev->dev;
445-
struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
446-
struct pds_vfio_region *region = &dirty->region;
525+
struct pds_vfio_region *region;
447526
u64 bmp_offset, bmp_bytes;
448527
u64 bitmap_size, pages;
449528
int err;
@@ -456,6 +535,13 @@ static int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio,
456535
return -EINVAL;
457536
}
458537

538+
region = pds_vfio_get_region(pds_vfio, iova);
539+
if (!region) {
540+
dev_err(dev, "vf%u: Failed to find region that contains iova 0x%lx length 0x%lx\n",
541+
pds_vfio->vf_id, iova, length);
542+
return -EINVAL;
543+
}
544+
459545
pages = DIV_ROUND_UP(length, region->page_size);
460546
bitmap_size =
461547
round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE;

drivers/vfio/pci/pds/dirty.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@ struct pds_vfio_region {
1313
u64 page_size;
1414
struct pds_lm_sg_elem *sgl;
1515
dma_addr_t sgl_addr;
16+
u32 dev_bmp_offset_start_byte;
1617
u16 num_sge;
1718
};
1819

1920
struct pds_vfio_dirty {
20-
struct pds_vfio_region region;
21+
struct pds_vfio_region *regions;
22+
u8 num_regions;
2123
bool is_enabled;
2224
};
2325

0 commit comments

Comments
 (0)