Skip to content

Commit 4ea3594

Browse files
Dev Jainakpm00
authored andcommitted
khugepaged: optimize __collapse_huge_page_copy_succeeded() by PTE batching
Use PTE batching to batch process PTEs mapping the same large folio. An improvement is expected due to batching refcount-mapcount manipulation on the folios, and for arm64 which supports contig mappings, the number of TLB flushes is also reduced. Link: https://lkml.kernel.org/r/20250724052301.23844-3-dev.jain@arm.com Signed-off-by: Dev Jain <dev.jain@arm.com> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Barry Song <baohua@kernel.org> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Mariano Pache <npache@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 3dfde97 commit 4ea3594

1 file changed

Lines changed: 18 additions & 7 deletions

File tree

mm/khugepaged.c

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -700,12 +700,15 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
700700
spinlock_t *ptl,
701701
struct list_head *compound_pagelist)
702702
{
703+
unsigned long end = address + HPAGE_PMD_SIZE;
703704
struct folio *src, *tmp;
704-
pte_t *_pte;
705705
pte_t pteval;
706+
pte_t *_pte;
707+
unsigned int nr_ptes;
706708

707-
for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
708-
_pte++, address += PAGE_SIZE) {
709+
for (_pte = pte; _pte < pte + HPAGE_PMD_NR; _pte += nr_ptes,
710+
address += nr_ptes * PAGE_SIZE) {
711+
nr_ptes = 1;
709712
pteval = ptep_get(_pte);
710713
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
711714
add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
@@ -722,18 +725,26 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
722725
struct page *src_page = pte_page(pteval);
723726

724727
src = page_folio(src_page);
725-
if (!folio_test_large(src))
728+
729+
if (folio_test_large(src)) {
730+
unsigned int max_nr_ptes = (end - address) >> PAGE_SHIFT;
731+
732+
nr_ptes = folio_pte_batch(src, _pte, pteval, max_nr_ptes);
733+
} else {
726734
release_pte_folio(src);
735+
}
736+
727737
/*
728738
* ptl mostly unnecessary, but preempt has to
729739
* be disabled to update the per-cpu stats
730740
* inside folio_remove_rmap_pte().
731741
*/
732742
spin_lock(ptl);
733-
ptep_clear(vma->vm_mm, address, _pte);
734-
folio_remove_rmap_pte(src, src_page, vma);
743+
clear_ptes(vma->vm_mm, address, _pte, nr_ptes);
744+
folio_remove_rmap_ptes(src, src_page, nr_ptes, vma);
735745
spin_unlock(ptl);
736-
free_folio_and_swap_cache(src);
746+
free_swap_cache(src);
747+
folio_put_refs(src, nr_ptes);
737748
}
738749
}
739750

0 commit comments

Comments
 (0)