Skip to content

Commit 21b85b0

Browse files
mjkravetzakpm00
authored andcommitted
madvise: use zap_page_range_single for madvise dontneed
This series addresses the issue first reported in [1], and fully described in patch 2. Patches 1 and 2 address the user visible issue and are tagged for stable backports. While exploring solutions to this issue, related problems with mmu notification calls were discovered. This is addressed in the patch "hugetlb: remove duplicate mmu notifications:". Since there are no user visible effects, this third is not tagged for stable backports. Previous discussions suggested further cleanup by removing the routine zap_page_range. This is possible because zap_page_range_single is now exported, and all callers of zap_page_range pass ranges entirely within a single vma. This work will be done in a later patch so as not to distract from this bug fix. [1] https://lore.kernel.org/lkml/CAO4mrfdLMXsao9RF4fUE8-Wfde8xmjsKrTNMNC9wjUb6JudD0g@mail.gmail.com/ This patch (of 2): Expose the routine zap_page_range_single to zap a range within a single vma. The madvise routine madvise_dontneed_single_vma can use this routine as it explicitly operates on a single vma. Also, update the mmu notification range in zap_page_range_single to take hugetlb pmd sharing into account. This is required as MADV_DONTNEED supports hugetlb vmas. Link: https://lkml.kernel.org/r/20221114235507.294320-1-mike.kravetz@oracle.com Link: https://lkml.kernel.org/r/20221114235507.294320-2-mike.kravetz@oracle.com Fixes: 90e7e7f ("mm: enable MADV_DONTNEED for hugetlb mappings") Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reported-by: Wei Chen <harperchen1110@gmail.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mina Almasry <almasrymina@google.com> Cc: Nadav Amit <nadav.amit@gmail.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Peter Xu <peterx@redhat.com> Cc: Rik van Riel <riel@surriel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent dec1d35 commit 21b85b0

3 files changed

Lines changed: 33 additions & 23 deletions

File tree

include/linux/mm.h

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,6 +1852,23 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem
18521852
__show_free_areas(flags, nodemask, MAX_NR_ZONES - 1);
18531853
}
18541854

1855+
/*
1856+
* Parameter block passed down to zap_pte_range in exceptional cases.
1857+
*/
1858+
struct zap_details {
1859+
struct folio *single_folio; /* Locked folio to be unmapped */
1860+
bool even_cows; /* Zap COWed private pages too? */
1861+
zap_flags_t zap_flags; /* Extra flags for zapping */
1862+
};
1863+
1864+
/*
1865+
* Whether to drop the pte markers, for example, the uffd-wp information for
1866+
* file-backed memory. This should only be specified when we will completely
1867+
* drop the page in the mm, either by truncation or unmapping of the vma. By
1868+
* default, the flag is not set.
1869+
*/
1870+
#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
1871+
18551872
#ifdef CONFIG_MMU
18561873
extern bool can_do_mlock(void);
18571874
#else
@@ -1869,6 +1886,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
18691886
unsigned long size);
18701887
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
18711888
unsigned long size);
1889+
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
1890+
unsigned long size, struct zap_details *details);
18721891
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
18731892
struct vm_area_struct *start_vma, unsigned long start,
18741893
unsigned long end);
@@ -3467,12 +3486,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
34673486
}
34683487
#endif
34693488

3470-
/*
3471-
* Whether to drop the pte markers, for example, the uffd-wp information for
3472-
* file-backed memory. This should only be specified when we will completely
3473-
* drop the page in the mm, either by truncation or unmapping of the vma. By
3474-
* default, the flag is not set.
3475-
*/
3476-
#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0))
3477-
34783489
#endif /* _LINUX_MM_H */

mm/madvise.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
772772
* Application no longer needs these pages. If the pages are dirty,
773773
* it's OK to just throw them away. The app will be more careful about
774774
* data it wants to keep. Be sure to free swap resources too. The
775-
* zap_page_range call sets things up for shrink_active_list to actually free
776-
* these pages later if no one else has touched them in the meantime,
775+
* zap_page_range_single call sets things up for shrink_active_list to actually
776+
* free these pages later if no one else has touched them in the meantime,
777777
* although we could add these pages to a global reuse list for
778778
* shrink_active_list to pick up before reclaiming other pages.
779779
*
@@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
790790
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
791791
unsigned long start, unsigned long end)
792792
{
793-
zap_page_range(vma, start, end - start);
793+
zap_page_range_single(vma, start, end - start, NULL);
794794
return 0;
795795
}
796796

mm/memory.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
13411341
return ret;
13421342
}
13431343

1344-
/*
1345-
* Parameter block passed down to zap_pte_range in exceptional cases.
1346-
*/
1347-
struct zap_details {
1348-
struct folio *single_folio; /* Locked folio to be unmapped */
1349-
bool even_cows; /* Zap COWed private pages too? */
1350-
zap_flags_t zap_flags; /* Extra flags for zapping */
1351-
};
1352-
13531344
/* Whether we should zap all COWed (private) pages too */
13541345
static inline bool should_zap_cows(struct zap_details *details)
13551346
{
@@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
17741765
*
17751766
* The range must fit into one VMA.
17761767
*/
1777-
static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
1768+
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
17781769
unsigned long size, struct zap_details *details)
17791770
{
1771+
const unsigned long end = address + size;
17801772
struct mmu_notifier_range range;
17811773
struct mmu_gather tlb;
17821774

17831775
lru_add_drain();
17841776
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
1785-
address, address + size);
1777+
address, end);
1778+
if (is_vm_hugetlb_page(vma))
1779+
adjust_range_if_pmd_sharing_possible(vma, &range.start,
1780+
&range.end);
17861781
tlb_gather_mmu(&tlb, vma->vm_mm);
17871782
update_hiwater_rss(vma->vm_mm);
17881783
mmu_notifier_invalidate_range_start(&range);
1789-
unmap_single_vma(&tlb, vma, address, range.end, details);
1784+
/*
1785+
* unmap 'address-end' not 'range.start-range.end' as range
1786+
* could have been expanded for hugetlb pmd sharing.
1787+
*/
1788+
unmap_single_vma(&tlb, vma, address, end, details);
17901789
mmu_notifier_invalidate_range_end(&range);
17911790
tlb_finish_mmu(&tlb);
17921791
}

0 commit comments

Comments
 (0)