Skip to content

Commit f5c1210

Browse files
osalvadorvilardagaakpm00
authored andcommitted
mm,page_owner: fix refcount imbalance
Current code does not contemplate scenarios were an allocation and free operation on the same pages do not handle it in the same amount at once. To give an example, page_alloc_exact(), where we will allocate a page of enough order to stafisfy the size request, but we will free the remainings right away. In the above example, we will increment the stack_record refcount only once, but we will decrease it the same number of times as number of unused pages we have to free. This will lead to a warning because of refcount imbalance. Fix this by recording the number of base pages in the refcount field. Link: https://lkml.kernel.org/r/20240404070702.2744-3-osalvador@suse.de Reported-by: syzbot+41bbfdb8d41003d12c0f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/00000000000090e8ff0613eda0e5@google.com Fixes: 217b211 ("mm,page_owner: implement the tracking of the stacks count") Signed-off-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Konovalov <andreyknvl@gmail.com> Cc: Marco Elver <elver@google.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Palmer Dabbelt <palmer@dabbelt.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent ea4b5b3 commit f5c1210

2 files changed

Lines changed: 58 additions & 49 deletions

File tree

Documentation/mm/page_owner.rst

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
2424
each page. It is already implemented and activated if page owner is
2525
enabled. Other usages are more than welcome.
2626

27-
It can also be used to show all the stacks and their outstanding
28-
allocations, which gives us a quick overview of where the memory is going
29-
without the need to screen through all the pages and match the allocation
30-
and free operation.
27+
It can also be used to show all the stacks and their current number of
28+
allocated base pages, which gives us a quick overview of where the memory
29+
is going without the need to screen through all the pages and match the
30+
allocation and free operation.
3131

3232
page owner is disabled by default. So, if you'd like to use it, you need
3333
to add "page_owner=on" to your boot cmdline. If the kernel is built
@@ -75,42 +75,45 @@ Usage
7575

7676
cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
7777
cat stacks.txt
78-
prep_new_page+0xa9/0x120
79-
get_page_from_freelist+0x7e6/0x2140
80-
__alloc_pages+0x18a/0x370
81-
new_slab+0xc8/0x580
82-
___slab_alloc+0x1f2/0xaf0
83-
__slab_alloc.isra.86+0x22/0x40
84-
kmem_cache_alloc+0x31b/0x350
85-
__khugepaged_enter+0x39/0x100
86-
dup_mmap+0x1c7/0x5ce
87-
copy_process+0x1afe/0x1c90
88-
kernel_clone+0x9a/0x3c0
89-
__do_sys_clone+0x66/0x90
90-
do_syscall_64+0x7f/0x160
91-
entry_SYSCALL_64_after_hwframe+0x6c/0x74
92-
stack_count: 234
78+
post_alloc_hook+0x177/0x1a0
79+
get_page_from_freelist+0xd01/0xd80
80+
__alloc_pages+0x39e/0x7e0
81+
allocate_slab+0xbc/0x3f0
82+
___slab_alloc+0x528/0x8a0
83+
kmem_cache_alloc+0x224/0x3b0
84+
sk_prot_alloc+0x58/0x1a0
85+
sk_alloc+0x32/0x4f0
86+
inet_create+0x427/0xb50
87+
__sock_create+0x2e4/0x650
88+
inet_ctl_sock_create+0x30/0x180
89+
igmp_net_init+0xc1/0x130
90+
ops_init+0x167/0x410
91+
setup_net+0x304/0xa60
92+
copy_net_ns+0x29b/0x4a0
93+
create_new_namespaces+0x4a1/0x820
94+
nr_base_pages: 16
9395
...
9496
...
9597
echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
9698
cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
9799
cat stacks_7000.txt
98-
prep_new_page+0xa9/0x120
99-
get_page_from_freelist+0x7e6/0x2140
100-
__alloc_pages+0x18a/0x370
101-
alloc_pages_mpol+0xdf/0x1e0
102-
folio_alloc+0x14/0x50
103-
filemap_alloc_folio+0xb0/0x100
104-
page_cache_ra_unbounded+0x97/0x180
105-
filemap_fault+0x4b4/0x1200
106-
__do_fault+0x2d/0x110
107-
do_pte_missing+0x4b0/0xa30
108-
__handle_mm_fault+0x7fa/0xb70
109-
handle_mm_fault+0x125/0x300
110-
do_user_addr_fault+0x3c9/0x840
111-
exc_page_fault+0x68/0x150
112-
asm_exc_page_fault+0x22/0x30
113-
stack_count: 8248
100+
post_alloc_hook+0x177/0x1a0
101+
get_page_from_freelist+0xd01/0xd80
102+
__alloc_pages+0x39e/0x7e0
103+
alloc_pages_mpol+0x22e/0x490
104+
folio_alloc+0xd5/0x110
105+
filemap_alloc_folio+0x78/0x230
106+
page_cache_ra_order+0x287/0x6f0
107+
filemap_get_pages+0x517/0x1160
108+
filemap_read+0x304/0x9f0
109+
xfs_file_buffered_read+0xe6/0x1d0 [xfs]
110+
xfs_file_read_iter+0x1f0/0x380 [xfs]
111+
__kernel_read+0x3b9/0x730
112+
kernel_read_file+0x309/0x4d0
113+
__do_sys_finit_module+0x381/0x730
114+
do_syscall_64+0x8d/0x150
115+
entry_SYSCALL_64_after_hwframe+0x62/0x6a
116+
nr_base_pages: 20824
114117
...
115118

116119
cat /sys/kernel/debug/page_owner > page_owner_full.txt

mm/page_owner.c

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
196196
spin_unlock_irqrestore(&stack_list_lock, flags);
197197
}
198198

199-
static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
199+
static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
200+
int nr_base_pages)
200201
{
201202
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
202203

@@ -217,15 +218,20 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
217218
/* Add the new stack_record to our list */
218219
add_stack_record_to_list(stack_record, gfp_mask);
219220
}
220-
refcount_inc(&stack_record->count);
221+
refcount_add(nr_base_pages, &stack_record->count);
221222
}
222223

223-
static void dec_stack_record_count(depot_stack_handle_t handle)
224+
static void dec_stack_record_count(depot_stack_handle_t handle,
225+
int nr_base_pages)
224226
{
225227
struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
226228

227-
if (stack_record)
228-
refcount_dec(&stack_record->count);
229+
if (!stack_record)
230+
return;
231+
232+
if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
233+
pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
234+
handle);
229235
}
230236

231237
static inline void __update_page_owner_handle(struct page_ext *page_ext,
@@ -306,7 +312,7 @@ void __reset_page_owner(struct page *page, unsigned short order)
306312
* the machinery is not ready yet, we cannot decrement
307313
* their refcount either.
308314
*/
309-
dec_stack_record_count(alloc_handle);
315+
dec_stack_record_count(alloc_handle, 1 << order);
310316
}
311317

312318
noinline void __set_page_owner(struct page *page, unsigned short order,
@@ -325,7 +331,7 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
325331
current->pid, current->tgid, ts_nsec,
326332
current->comm);
327333
page_ext_put(page_ext);
328-
inc_stack_record_count(handle, gfp_mask);
334+
inc_stack_record_count(handle, gfp_mask, 1 << order);
329335
}
330336

331337
void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -872,11 +878,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
872878
return stack;
873879
}
874880

875-
static unsigned long page_owner_stack_threshold;
881+
static unsigned long page_owner_pages_threshold;
876882

877883
static int stack_print(struct seq_file *m, void *v)
878884
{
879-
int i, stack_count;
885+
int i, nr_base_pages;
880886
struct stack *stack = v;
881887
unsigned long *entries;
882888
unsigned long nr_entries;
@@ -887,14 +893,14 @@ static int stack_print(struct seq_file *m, void *v)
887893

888894
nr_entries = stack_record->size;
889895
entries = stack_record->entries;
890-
stack_count = refcount_read(&stack_record->count) - 1;
896+
nr_base_pages = refcount_read(&stack_record->count) - 1;
891897

892-
if (stack_count < 1 || stack_count < page_owner_stack_threshold)
898+
if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
893899
return 0;
894900

895901
for (i = 0; i < nr_entries; i++)
896902
seq_printf(m, " %pS\n", (void *)entries[i]);
897-
seq_printf(m, "stack_count: %d\n\n", stack_count);
903+
seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
898904

899905
return 0;
900906
}
@@ -924,13 +930,13 @@ static const struct file_operations page_owner_stack_operations = {
924930

925931
static int page_owner_threshold_get(void *data, u64 *val)
926932
{
927-
*val = READ_ONCE(page_owner_stack_threshold);
933+
*val = READ_ONCE(page_owner_pages_threshold);
928934
return 0;
929935
}
930936

931937
static int page_owner_threshold_set(void *data, u64 val)
932938
{
933-
WRITE_ONCE(page_owner_stack_threshold, val);
939+
WRITE_ONCE(page_owner_pages_threshold, val);
934940
return 0;
935941
}
936942

0 commit comments

Comments
 (0)