Skip to content

Commit 39d0bd8

Browse files
howlettakpm00
authored andcommitted
maple_tree: be more cautious about dead nodes
Patch series "Fix VMA tree modification under mmap read lock". Syzbot reported a BUG_ON in mm/mmap.c which was found to be caused by an inconsistency between threads walking the VMA maple tree. The inconsistency is caused by the page fault handler modifying the maple tree while holding the mmap_lock for read. This only happens for stack VMAs. We had thought this was safe as it only modifies a single pivot in the tree. Unfortunately, syzbot constructed a test case where the stack had no guard page and grew the stack to abut the next VMA. This causes us to delete the NULL entry between the two VMAs and rewrite the node. We considered several options for fixing this, including dropping the mmap_lock, then reacquiring it for write; and relaxing the definition of the tree to permit a zero-length NULL entry in the node. We decided the best option was to backport some of the RCU patches from -next, which solve the problem by allocating a new node and RCU-freeing the old node. Since the problem exists in 6.1, we preferred a solution which is similar to the one we intended to merge next merge window. These patches have been in -next since next-20230301, and have received intensive testing in Android as part of the RCU page fault patchset. They were also sent as part of the "Per-VMA locks" v4 patch series. Patches 1 to 7 are bug fixes for RCU mode of the tree and patch 8 enables RCU mode for the tree. Performance v6.3-rc3 vs patched v6.3-rc3: Running these changes through mmtests showed there was a 15-20% performance decrease in will-it-scale/brk1-processes. This tests creating and inserting a single VMA repeatedly through the brk interface and isn't representative of any real world applications. This patch (of 8): ma_pivots() and ma_data_end() may be called with a dead node. Ensure to that the node isn't dead before using the returned values. This is necessary for RCU mode of the maple tree. Link: https://lkml.kernel.org/r/20230327185532.2354250-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230227173632.3292573-1-surenb@google.com Link: https://lkml.kernel.org/r/20230227173632.3292573-2-surenb@google.com Fixes: 54a611b ("Maple Tree: add new data structure") Signed-off-by: Liam Howlett <Liam.Howlett@oracle.com> Signed-off-by: Suren Baghdasaryan <surenb@google.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arjun Roy <arjunroy@google.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Chris Li <chriscli@google.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Howells <dhowells@redhat.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: David Rientjes <rientjes@google.com> Cc: Eric Dumazet <edumazet@google.com> Cc: freak07 <michalechner92@googlemail.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jann Horn <jannh@google.com> Cc: Joel Fernandes <joelaf@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kent Overstreet <kent.overstreet@linux.dev> Cc: Laurent Dufour <ldufour@linux.ibm.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Minchan Kim <minchan@google.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Peter Oskolkov <posk@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Punit Agrawal <punit.agrawal@bytedance.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Shakeel Butt <shakeelb@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Cc: Song Liu <songliubraving@fb.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent bdd034d commit 39d0bd8

1 file changed

Lines changed: 43 additions & 9 deletions

File tree

lib/maple_tree.c

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,7 @@ static inline bool ma_dead_node(const struct maple_node *node)
544544

545545
return (parent == node);
546546
}
547+
547548
/*
548549
* mte_dead_node() - check if the @enode is dead.
549550
* @enode: The encoded maple node
@@ -625,6 +626,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas)
625626
* @node - the maple node
626627
* @type - the node type
627628
*
629+
* In the event of a dead node, this array may be %NULL
630+
*
628631
* Return: A pointer to the maple node pivots
629632
*/
630633
static inline unsigned long *ma_pivots(struct maple_node *node,
@@ -1096,8 +1099,11 @@ static int mas_ascend(struct ma_state *mas)
10961099
a_type = mas_parent_enum(mas, p_enode);
10971100
a_node = mte_parent(p_enode);
10981101
a_slot = mte_parent_slot(p_enode);
1099-
pivots = ma_pivots(a_node, a_type);
11001102
a_enode = mt_mk_node(a_node, a_type);
1103+
pivots = ma_pivots(a_node, a_type);
1104+
1105+
if (unlikely(ma_dead_node(a_node)))
1106+
return 1;
11011107

11021108
if (!set_min && a_slot) {
11031109
set_min = true;
@@ -1401,6 +1407,9 @@ static inline unsigned char ma_data_end(struct maple_node *node,
14011407
{
14021408
unsigned char offset;
14031409

1410+
if (!pivots)
1411+
return 0;
1412+
14041413
if (type == maple_arange_64)
14051414
return ma_meta_end(node, type);
14061415

@@ -1436,6 +1445,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
14361445
return ma_meta_end(node, type);
14371446

14381447
pivots = ma_pivots(node, type);
1448+
if (unlikely(ma_dead_node(node)))
1449+
return 0;
1450+
14391451
offset = mt_pivots[type] - 1;
14401452
if (likely(!pivots[offset]))
14411453
return ma_meta_end(node, type);
@@ -4505,6 +4517,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
45054517
node = mas_mn(mas);
45064518
slots = ma_slots(node, mt);
45074519
pivots = ma_pivots(node, mt);
4520+
if (unlikely(ma_dead_node(node)))
4521+
return 1;
4522+
45084523
mas->max = pivots[offset];
45094524
if (offset)
45104525
mas->min = pivots[offset - 1] + 1;
@@ -4526,6 +4541,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
45264541
slots = ma_slots(node, mt);
45274542
pivots = ma_pivots(node, mt);
45284543
offset = ma_data_end(node, mt, pivots, mas->max);
4544+
if (unlikely(ma_dead_node(node)))
4545+
return 1;
4546+
45294547
if (offset)
45304548
mas->min = pivots[offset - 1] + 1;
45314549

@@ -4574,6 +4592,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
45744592
struct maple_enode *enode;
45754593
int level = 0;
45764594
unsigned char offset;
4595+
unsigned char node_end;
45774596
enum maple_type mt;
45784597
void __rcu **slots;
45794598

@@ -4597,7 +4616,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
45974616
node = mas_mn(mas);
45984617
mt = mte_node_type(mas->node);
45994618
pivots = ma_pivots(node, mt);
4600-
} while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max)));
4619+
node_end = ma_data_end(node, mt, pivots, mas->max);
4620+
if (unlikely(ma_dead_node(node)))
4621+
return 1;
4622+
4623+
} while (unlikely(offset == node_end));
46014624

46024625
slots = ma_slots(node, mt);
46034626
pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
@@ -4613,6 +4636,9 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
46134636
mt = mte_node_type(mas->node);
46144637
slots = ma_slots(node, mt);
46154638
pivots = ma_pivots(node, mt);
4639+
if (unlikely(ma_dead_node(node)))
4640+
return 1;
4641+
46164642
offset = 0;
46174643
pivot = pivots[0];
46184644
}
@@ -4659,11 +4685,14 @@ static inline void *mas_next_nentry(struct ma_state *mas,
46594685
return NULL;
46604686
}
46614687

4662-
pivots = ma_pivots(node, type);
46634688
slots = ma_slots(node, type);
4664-
mas->index = mas_safe_min(mas, pivots, mas->offset);
4689+
pivots = ma_pivots(node, type);
46654690
count = ma_data_end(node, type, pivots, mas->max);
4666-
if (ma_dead_node(node))
4691+
if (unlikely(ma_dead_node(node)))
4692+
return NULL;
4693+
4694+
mas->index = mas_safe_min(mas, pivots, mas->offset);
4695+
if (unlikely(ma_dead_node(node)))
46674696
return NULL;
46684697

46694698
if (mas->index > max)
@@ -4817,6 +4846,11 @@ static inline void *mas_prev_nentry(struct ma_state *mas, unsigned long limit,
48174846

48184847
slots = ma_slots(mn, mt);
48194848
pivots = ma_pivots(mn, mt);
4849+
if (unlikely(ma_dead_node(mn))) {
4850+
mas_rewalk(mas, index);
4851+
goto retry;
4852+
}
4853+
48204854
if (offset == mt_pivots[mt])
48214855
pivot = mas->max;
48224856
else
@@ -6617,11 +6651,11 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
66176651
while (likely(!ma_is_leaf(mt))) {
66186652
MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
66196653
slots = ma_slots(mn, mt);
6620-
pivots = ma_pivots(mn, mt);
6621-
max = pivots[0];
66226654
entry = mas_slot(mas, slots, 0);
6655+
pivots = ma_pivots(mn, mt);
66236656
if (unlikely(ma_dead_node(mn)))
66246657
return NULL;
6658+
max = pivots[0];
66256659
mas->node = entry;
66266660
mn = mas_mn(mas);
66276661
mt = mte_node_type(mas->node);
@@ -6641,13 +6675,13 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
66416675
if (likely(entry))
66426676
return entry;
66436677

6644-
pivots = ma_pivots(mn, mt);
6645-
mas->index = pivots[0] + 1;
66466678
mas->offset = 1;
66476679
entry = mas_slot(mas, slots, 1);
6680+
pivots = ma_pivots(mn, mt);
66486681
if (unlikely(ma_dead_node(mn)))
66496682
return NULL;
66506683

6684+
mas->index = pivots[0] + 1;
66516685
if (mas->index > limit)
66526686
goto none;
66536687

0 commit comments

Comments
 (0)