Skip to content

Commit 408579c

Browse files
howletttorvalds
authored andcommitted
mm: Update do_vmi_align_munmap() return semantics
Since do_vmi_align_munmap() will always honor the downgrade request on the success, the callers no longer have to deal with confusing return codes. Since all callers that request downgrade actually want the lock to be dropped, change the downgrade to an unlock request. Note that the lock still needs to be held in read mode during the page table clean up to avoid races with a map request. Update do_vmi_align_munmap() to return 0 for success. Clean up the callers and comments to always expect the unlock to be honored on the success path. The error path will always leave the lock untouched. As part of the cleanup, the wrapper function do_vmi_munmap() and callers to the wrapper are also updated. Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lore.kernel.org/linux-mm/20230629191414.1215929-1-willy@infradead.org/ Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e4bd84c commit 408579c

3 files changed

Lines changed: 57 additions & 69 deletions

File tree

include/linux/mm.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3177,15 +3177,15 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr,
31773177
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
31783178
extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
31793179
unsigned long start, size_t len, struct list_head *uf,
3180-
bool downgrade);
3180+
bool unlock);
31813181
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
31823182
struct list_head *uf);
31833183
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
31843184

31853185
#ifdef CONFIG_MMU
31863186
extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
31873187
unsigned long start, unsigned long end,
3188-
struct list_head *uf, bool downgrade);
3188+
struct list_head *uf, bool unlock);
31893189
extern int __mm_populate(unsigned long addr, unsigned long len,
31903190
int ignore_errors);
31913191
static inline void mm_populate(unsigned long addr, unsigned long len)

mm/mmap.c

Lines changed: 43 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
193193
struct mm_struct *mm = current->mm;
194194
struct vm_area_struct *brkvma, *next = NULL;
195195
unsigned long min_brk;
196-
bool populate;
197-
bool downgraded = false;
196+
bool populate = false;
198197
LIST_HEAD(uf);
199198
struct vma_iterator vmi;
200199

@@ -236,33 +235,23 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
236235
goto success;
237236
}
238237

239-
/*
240-
* Always allow shrinking brk.
241-
* do_vma_munmap() may downgrade mmap_lock to read.
242-
*/
238+
/* Always allow shrinking brk. */
243239
if (brk <= mm->brk) {
244-
int ret;
245-
246240
/* Search one past newbrk */
247241
vma_iter_init(&vmi, mm, newbrk);
248242
brkvma = vma_find(&vmi, oldbrk);
249243
if (!brkvma || brkvma->vm_start >= oldbrk)
250244
goto out; /* mapping intersects with an existing non-brk vma. */
251245
/*
252246
* mm->brk must be protected by write mmap_lock.
253-
* do_vma_munmap() may downgrade the lock, so update it
247+
* do_vma_munmap() will drop the lock on success, so update it
254248
* before calling do_vma_munmap().
255249
*/
256250
mm->brk = brk;
257-
ret = do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true);
258-
if (ret == 1) {
259-
downgraded = true;
260-
goto success;
261-
} else if (!ret)
262-
goto success;
263-
264-
mm->brk = origbrk;
265-
goto out;
251+
if (do_vma_munmap(&vmi, brkvma, newbrk, oldbrk, &uf, true))
252+
goto out;
253+
254+
goto success_unlocked;
266255
}
267256

268257
if (check_brk_limits(oldbrk, newbrk - oldbrk))
@@ -283,19 +272,19 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
283272
goto out;
284273

285274
mm->brk = brk;
275+
if (mm->def_flags & VM_LOCKED)
276+
populate = true;
286277

287278
success:
288-
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
289-
if (downgraded)
290-
mmap_read_unlock(mm);
291-
else
292-
mmap_write_unlock(mm);
279+
mmap_write_unlock(mm);
280+
success_unlocked:
293281
userfaultfd_unmap_complete(mm, &uf);
294282
if (populate)
295283
mm_populate(oldbrk, newbrk - oldbrk);
296284
return brk;
297285

298286
out:
287+
mm->brk = origbrk;
299288
mmap_write_unlock(mm);
300289
return origbrk;
301290
}
@@ -2428,14 +2417,16 @@ int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
24282417
* @start: The aligned start address to munmap.
24292418
* @end: The aligned end address to munmap.
24302419
* @uf: The userfaultfd list_head
2431-
* @downgrade: Set to true to attempt a write downgrade of the mmap_lock
2420+
* @unlock: Set to true to drop the mmap_lock. unlocking only happens on
2421+
* success.
24322422
*
2433-
* If @downgrade is true, check return code for potential release of the lock.
2423+
* Return: 0 on success and drops the lock if so directed, error and leaves the
2424+
* lock held otherwise.
24342425
*/
24352426
static int
24362427
do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
24372428
struct mm_struct *mm, unsigned long start,
2438-
unsigned long end, struct list_head *uf, bool downgrade)
2429+
unsigned long end, struct list_head *uf, bool unlock)
24392430
{
24402431
struct vm_area_struct *prev, *next = NULL;
24412432
struct maple_tree mt_detach;
@@ -2551,22 +2542,24 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
25512542
/* Point of no return */
25522543
mm->locked_vm -= locked_vm;
25532544
mm->map_count -= count;
2554-
if (downgrade)
2545+
if (unlock)
25552546
mmap_write_downgrade(mm);
25562547

25572548
/*
25582549
* We can free page tables without write-locking mmap_lock because VMAs
25592550
* were isolated before we downgraded mmap_lock.
25602551
*/
2561-
unmap_region(mm, &mt_detach, vma, prev, next, start, end, !downgrade);
2552+
unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock);
25622553
/* Statistics and freeing VMAs */
25632554
mas_set(&mas_detach, start);
25642555
remove_mt(mm, &mas_detach);
25652556
__mt_destroy(&mt_detach);
2557+
if (unlock)
2558+
mmap_read_unlock(mm);
25662559

25672560

25682561
validate_mm(mm);
2569-
return downgrade ? 1 : 0;
2562+
return 0;
25702563

25712564
clear_tree_failed:
25722565
userfaultfd_error:
@@ -2589,18 +2582,18 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
25892582
* @start: The start address to munmap
25902583
* @len: The length of the range to munmap
25912584
* @uf: The userfaultfd list_head
2592-
* @downgrade: set to true if the user wants to attempt to write_downgrade the
2593-
* mmap_lock
2585+
* @unlock: set to true if the user wants to drop the mmap_lock on success
25942586
*
25952587
* This function takes a @mas that is either pointing to the previous VMA or set
25962588
* to MA_START and sets it up to remove the mapping(s). The @len will be
25972589
* aligned and any arch_unmap work will be preformed.
25982590
*
2599-
* Returns: -EINVAL on failure, 1 on success and unlock, 0 otherwise.
2591+
* Return: 0 on success and drops the lock if so directed, error and leaves the
2592+
* lock held otherwise.
26002593
*/
26012594
int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
26022595
unsigned long start, size_t len, struct list_head *uf,
2603-
bool downgrade)
2596+
bool unlock)
26042597
{
26052598
unsigned long end;
26062599
struct vm_area_struct *vma;
@@ -2617,17 +2610,22 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
26172610

26182611
/* Find the first overlapping VMA */
26192612
vma = vma_find(vmi, end);
2620-
if (!vma)
2613+
if (!vma) {
2614+
if (unlock)
2615+
mmap_write_unlock(mm);
26212616
return 0;
2617+
}
26222618

2623-
return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
2619+
return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
26242620
}
26252621

26262622
/* do_munmap() - Wrapper function for non-maple tree aware do_munmap() calls.
26272623
* @mm: The mm_struct
26282624
* @start: The start address to munmap
26292625
* @len: The length to be munmapped.
26302626
* @uf: The userfaultfd list_head
2627+
*
2628+
* Return: 0 on success, error otherwise.
26312629
*/
26322630
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
26332631
struct list_head *uf)
@@ -2888,7 +2886,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
28882886
return error;
28892887
}
28902888

2891-
static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
2889+
static int __vm_munmap(unsigned long start, size_t len, bool unlock)
28922890
{
28932891
int ret;
28942892
struct mm_struct *mm = current->mm;
@@ -2898,16 +2896,8 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
28982896
if (mmap_write_lock_killable(mm))
28992897
return -EINTR;
29002898

2901-
ret = do_vmi_munmap(&vmi, mm, start, len, &uf, downgrade);
2902-
/*
2903-
* Returning 1 indicates mmap_lock is downgraded.
2904-
* But 1 is not legal return value of vm_munmap() and munmap(), reset
2905-
* it to 0 before return.
2906-
*/
2907-
if (ret == 1) {
2908-
mmap_read_unlock(mm);
2909-
ret = 0;
2910-
} else
2899+
ret = do_vmi_munmap(&vmi, mm, start, len, &uf, unlock);
2900+
if (ret || !unlock)
29112901
mmap_write_unlock(mm);
29122902

29132903
userfaultfd_unmap_complete(mm, &uf);
@@ -3017,21 +3007,23 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
30173007
* @start: the start of the address to unmap
30183008
* @end: The end of the address to unmap
30193009
* @uf: The userfaultfd list_head
3020-
* @downgrade: Attempt to downgrade or not
3010+
* @unlock: Drop the lock on success
30213011
*
3022-
* Returns: 0 on success and not downgraded, 1 on success and downgraded.
30233012
* unmaps a VMA mapping when the vma iterator is already in position.
30243013
* Does not handle alignment.
3014+
*
3015+
* Return: 0 on success drops the lock of so directed, error on failure and will
3016+
* still hold the lock.
30253017
*/
30263018
int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
3027-
unsigned long start, unsigned long end,
3028-
struct list_head *uf, bool downgrade)
3019+
unsigned long start, unsigned long end, struct list_head *uf,
3020+
bool unlock)
30293021
{
30303022
struct mm_struct *mm = vma->vm_mm;
30313023
int ret;
30323024

30333025
arch_unmap(mm, start, end);
3034-
ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, downgrade);
3026+
ret = do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
30353027
validate_mm(mm);
30363028
return ret;
30373029
}

mm/mremap.c

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
715715
}
716716

717717
vma_iter_init(&vmi, mm, old_addr);
718-
if (do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false) < 0) {
718+
if (!do_vmi_munmap(&vmi, mm, old_addr, old_len, uf_unmap, false)) {
719719
/* OOM: unable to split vma, just get accounts right */
720720
if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP))
721721
vm_acct_memory(old_len >> PAGE_SHIFT);
@@ -913,7 +913,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
913913
struct vm_area_struct *vma;
914914
unsigned long ret = -EINVAL;
915915
bool locked = false;
916-
bool downgraded = false;
917916
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
918917
LIST_HEAD(uf_unmap_early);
919918
LIST_HEAD(uf_unmap);
@@ -999,24 +998,23 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
999998
* Always allow a shrinking remap: that just unmaps
1000999
* the unnecessary pages..
10011000
* do_vmi_munmap does all the needed commit accounting, and
1002-
* downgrades mmap_lock to read if so directed.
1001+
* unlocks the mmap_lock if so directed.
10031002
*/
10041003
if (old_len >= new_len) {
1005-
int retval;
10061004
VMA_ITERATOR(vmi, mm, addr + new_len);
10071005

1008-
retval = do_vmi_munmap(&vmi, mm, addr + new_len,
1009-
old_len - new_len, &uf_unmap, true);
1010-
/* Returning 1 indicates mmap_lock is downgraded to read. */
1011-
if (retval == 1) {
1012-
downgraded = true;
1013-
} else if (retval < 0 && old_len != new_len) {
1014-
ret = retval;
1006+
if (old_len == new_len) {
1007+
ret = addr;
10151008
goto out;
10161009
}
10171010

1011+
ret = do_vmi_munmap(&vmi, mm, addr + new_len, old_len - new_len,
1012+
&uf_unmap, true);
1013+
if (ret)
1014+
goto out;
1015+
10181016
ret = addr;
1019-
goto out;
1017+
goto out_unlocked;
10201018
}
10211019

10221020
/*
@@ -1101,12 +1099,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
11011099
out:
11021100
if (offset_in_page(ret))
11031101
locked = false;
1104-
if (downgraded)
1105-
mmap_read_unlock(current->mm);
1106-
else
1107-
mmap_write_unlock(current->mm);
1102+
mmap_write_unlock(current->mm);
11081103
if (locked && new_len > old_len)
11091104
mm_populate(new_addr + old_len, new_len - old_len);
1105+
out_unlocked:
11101106
userfaultfd_unmap_complete(mm, &uf_unmap_early);
11111107
mremap_userfaultfd_complete(&uf, addr, ret, old_len);
11121108
userfaultfd_unmap_complete(mm, &uf_unmap);

0 commit comments

Comments
 (0)