Skip to content

Commit f6c84a5

Browse files
Li ZheAlex Williamson
authored andcommitted
vfio/type1: optimize vfio_pin_pages_remote()
When vfio_pin_pages_remote() is called with a range of addresses that includes large folios, the function currently performs individual statistics counting operations for each page. This can lead to significant performance overheads, especially when dealing with large ranges of pages. Batch processing of statistical counting operations can effectively enhance performance. In addition, the pages obtained through longterm GUP are neither invalid nor reserved. Therefore, we can reduce the overhead associated with some calls to function is_invalid_reserved_pfn(). The performance test results for completing the 16G VFIO IOMMU DMA mapping are as follows. Base(v6.16): ------- AVERAGE (MADV_HUGEPAGE) -------- VFIO MAP DMA in 0.049 s (328.5 GB/s) ------- AVERAGE (MAP_POPULATE) -------- VFIO MAP DMA in 0.268 s (59.6 GB/s) ------- AVERAGE (HUGETLBFS) -------- VFIO MAP DMA in 0.051 s (310.9 GB/s) With this patch: ------- AVERAGE (MADV_HUGEPAGE) -------- VFIO MAP DMA in 0.025 s (629.8 GB/s) ------- AVERAGE (MAP_POPULATE) -------- VFIO MAP DMA in 0.253 s (63.1 GB/s) ------- AVERAGE (HUGETLBFS) -------- VFIO MAP DMA in 0.030 s (530.5 GB/s) For large folio, we achieve an over 40% performance improvement. For small folios, the performance test results indicate a slight improvement. Signed-off-by: Li Zhe <lizhe.67@bytedance.com> Co-developed-by: Alex Williamson <alex.williamson@redhat.com> Acked-by: David Hildenbrand <david@redhat.com> Tested-by: Eric Farman <farman@linux.ibm.com> Link: https://lore.kernel.org/r/20250814064714.56485-3-lizhe.67@bytedance.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent 929bf01 commit f6c84a5

1 file changed

Lines changed: 72 additions & 12 deletions

File tree

drivers/vfio/vfio_iommu_type1.c

Lines changed: 72 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <linux/vfio.h>
3838
#include <linux/workqueue.h>
3939
#include <linux/notifier.h>
40+
#include <linux/mm_inline.h>
4041
#include "vfio.h"
4142

4243
#define DRIVER_VERSION "0.2"
@@ -318,24 +319,35 @@ static void vfio_dma_bitmap_free_all(struct vfio_iommu *iommu)
318319
/*
319320
* Helper Functions for host iova-pfn list
320321
*/
321-
static struct vfio_pfn *vfio_find_vpfn(struct vfio_dma *dma, dma_addr_t iova)
322+
323+
/*
324+
* Find the highest vfio_pfn that overlapping the range
325+
* [iova_start, iova_end) in rb tree.
326+
*/
327+
static struct vfio_pfn *vfio_find_vpfn_range(struct vfio_dma *dma,
328+
dma_addr_t iova_start, dma_addr_t iova_end)
322329
{
323330
struct vfio_pfn *vpfn;
324331
struct rb_node *node = dma->pfn_list.rb_node;
325332

326333
while (node) {
327334
vpfn = rb_entry(node, struct vfio_pfn, node);
328335

329-
if (iova < vpfn->iova)
336+
if (iova_end <= vpfn->iova)
330337
node = node->rb_left;
331-
else if (iova > vpfn->iova)
338+
else if (iova_start > vpfn->iova)
332339
node = node->rb_right;
333340
else
334341
return vpfn;
335342
}
336343
return NULL;
337344
}
338345

346+
static inline struct vfio_pfn *vfio_find_vpfn(struct vfio_dma *dma, dma_addr_t iova)
347+
{
348+
return vfio_find_vpfn_range(dma, iova, iova + 1);
349+
}
350+
339351
static void vfio_link_pfn(struct vfio_dma *dma,
340352
struct vfio_pfn *new)
341353
{
@@ -614,6 +626,39 @@ static long vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
614626
return ret;
615627
}
616628

629+
630+
static long vpfn_pages(struct vfio_dma *dma,
631+
dma_addr_t iova_start, long nr_pages)
632+
{
633+
dma_addr_t iova_end = iova_start + (nr_pages << PAGE_SHIFT);
634+
struct vfio_pfn *top = vfio_find_vpfn_range(dma, iova_start, iova_end);
635+
long ret = 1;
636+
struct vfio_pfn *vpfn;
637+
struct rb_node *prev;
638+
struct rb_node *next;
639+
640+
if (likely(!top))
641+
return 0;
642+
643+
prev = next = &top->node;
644+
645+
while ((prev = rb_prev(prev))) {
646+
vpfn = rb_entry(prev, struct vfio_pfn, node);
647+
if (vpfn->iova < iova_start)
648+
break;
649+
ret++;
650+
}
651+
652+
while ((next = rb_next(next))) {
653+
vpfn = rb_entry(next, struct vfio_pfn, node);
654+
if (vpfn->iova >= iova_end)
655+
break;
656+
ret++;
657+
}
658+
659+
return ret;
660+
}
661+
617662
/*
618663
* Attempt to pin pages. We really don't want to track all the pfns and
619664
* the iommu can only map chunks of consecutive pfns anyway, so get the
@@ -687,32 +732,47 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
687732
* and rsvd here, and therefore continues to use the batch.
688733
*/
689734
while (true) {
735+
long nr_pages, acct_pages = 0;
736+
690737
if (pfn != *pfn_base + pinned ||
691738
rsvd != is_invalid_reserved_pfn(pfn))
692739
goto out;
693740

741+
/*
742+
* Using GUP with the FOLL_LONGTERM in
743+
* vaddr_get_pfns() will not return invalid
744+
* or reserved pages.
745+
*/
746+
nr_pages = num_pages_contiguous(
747+
&batch->pages[batch->offset],
748+
batch->size);
749+
if (!rsvd) {
750+
acct_pages = nr_pages;
751+
acct_pages -= vpfn_pages(dma, iova, nr_pages);
752+
}
753+
694754
/*
695755
* Reserved pages aren't counted against the user,
696756
* externally pinned pages are already counted against
697757
* the user.
698758
*/
699-
if (!rsvd && !vfio_find_vpfn(dma, iova)) {
759+
if (acct_pages) {
700760
if (!dma->lock_cap &&
701-
mm->locked_vm + lock_acct + 1 > limit) {
761+
mm->locked_vm + lock_acct + acct_pages > limit) {
702762
pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
703763
__func__, limit << PAGE_SHIFT);
704764
ret = -ENOMEM;
705765
goto unpin_out;
706766
}
707-
lock_acct++;
767+
lock_acct += acct_pages;
708768
}
709769

710-
pinned++;
711-
npage--;
712-
vaddr += PAGE_SIZE;
713-
iova += PAGE_SIZE;
714-
batch->offset++;
715-
batch->size--;
770+
pinned += nr_pages;
771+
npage -= nr_pages;
772+
vaddr += PAGE_SIZE * nr_pages;
773+
iova += PAGE_SIZE * nr_pages;
774+
batch->offset += nr_pages;
775+
batch->size -= nr_pages;
716776

717777
if (!batch->size)
718778
break;

0 commit comments

Comments
 (0)