Skip to content

Commit 20d5e57

Browse files
committed
Merge branch 'for-5.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu
Pull percpu fix from Dennis Zhou: "This is just a single change to fix percpu depopulation. The code relied on depopulation code written specifically for the free path and relied on vmalloc to do the tlb flush lazily. As we're modifying the backing pages during the lifetime of a chunk, we need to also flush the tlb accordingly. Guenter Roeck reported this issue in [1] on mips. I believe we just happen to be lucky given the much larger chunk sizes on x86 and consequently less churning of this memory" Link: https://lore.kernel.org/lkml/20210702191140.GA3166599@roeck-us.net/ [1] * 'for-5.14-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu: percpu: flush tlb in pcpu_reclaim_populated()
2 parents 50be941 + 93274f1 commit 20d5e57

3 files changed

Lines changed: 35 additions & 8 deletions

File tree

mm/percpu-km.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@
3232

3333
#include <linux/log2.h>
3434

35+
static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
36+
int page_start, int page_end)
37+
{
38+
/* nothing */
39+
}
40+
3541
static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
3642
int page_start, int page_end, gfp_t gfp)
3743
{

mm/percpu-vm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,9 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
303303
* For each cpu, depopulate and unmap pages [@page_start,@page_end)
304304
* from @chunk.
305305
*
306+
* Caller is required to call pcpu_post_unmap_tlb_flush() if not returning the
307+
* region back to vmalloc() which will lazily flush the tlb.
308+
*
306309
* CONTEXT:
307310
* pcpu_alloc_mutex.
308311
*/
@@ -324,8 +327,6 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
324327

325328
pcpu_unmap_pages(chunk, pages, page_start, page_end);
326329

327-
/* no need to flush tlb, vmalloc will handle it lazily */
328-
329330
pcpu_free_pages(chunk, pages, page_start, page_end);
330331
}
331332

mm/percpu.c

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,7 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
15721572
*
15731573
* pcpu_populate_chunk - populate the specified range of a chunk
15741574
* pcpu_depopulate_chunk - depopulate the specified range of a chunk
1575+
* pcpu_post_unmap_tlb_flush - flush tlb for the specified range of a chunk
15751576
* pcpu_create_chunk - create a new chunk
15761577
* pcpu_destroy_chunk - destroy a chunk, always preceded by full depop
15771578
* pcpu_addr_to_page - translate address to physical address
@@ -1581,6 +1582,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
15811582
int page_start, int page_end, gfp_t gfp);
15821583
static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
15831584
int page_start, int page_end);
1585+
static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
1586+
int page_start, int page_end);
15841587
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
15851588
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
15861589
static struct page *pcpu_addr_to_page(void *addr);
@@ -2137,11 +2140,12 @@ static void pcpu_reclaim_populated(void)
21372140
{
21382141
struct pcpu_chunk *chunk;
21392142
struct pcpu_block_md *block;
2143+
int freed_page_start, freed_page_end;
21402144
int i, end;
2145+
bool reintegrate;
21412146

21422147
lockdep_assert_held(&pcpu_lock);
21432148

2144-
restart:
21452149
/*
21462150
* Once a chunk is isolated to the to_depopulate list, the chunk is no
21472151
* longer discoverable to allocations whom may populate pages. The only
@@ -2157,15 +2161,18 @@ static void pcpu_reclaim_populated(void)
21572161
* Scan chunk's pages in the reverse order to keep populated
21582162
* pages close to the beginning of the chunk.
21592163
*/
2164+
freed_page_start = chunk->nr_pages;
2165+
freed_page_end = 0;
2166+
reintegrate = false;
21602167
for (i = chunk->nr_pages - 1, end = -1; i >= 0; i--) {
21612168
/* no more work to do */
21622169
if (chunk->nr_empty_pop_pages == 0)
21632170
break;
21642171

21652172
/* reintegrate chunk to prevent atomic alloc failures */
21662173
if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_HIGH) {
2167-
pcpu_reintegrate_chunk(chunk);
2168-
goto restart;
2174+
reintegrate = true;
2175+
goto end_chunk;
21692176
}
21702177

21712178
/*
@@ -2194,16 +2201,29 @@ static void pcpu_reclaim_populated(void)
21942201
spin_lock_irq(&pcpu_lock);
21952202

21962203
pcpu_chunk_depopulated(chunk, i + 1, end + 1);
2204+
freed_page_start = min(freed_page_start, i + 1);
2205+
freed_page_end = max(freed_page_end, end + 1);
21972206

21982207
/* reset the range and continue */
21992208
end = -1;
22002209
}
22012210

2202-
if (chunk->free_bytes == pcpu_unit_size)
2211+
end_chunk:
2212+
/* batch tlb flush per chunk to amortize cost */
2213+
if (freed_page_start < freed_page_end) {
2214+
spin_unlock_irq(&pcpu_lock);
2215+
pcpu_post_unmap_tlb_flush(chunk,
2216+
freed_page_start,
2217+
freed_page_end);
2218+
cond_resched();
2219+
spin_lock_irq(&pcpu_lock);
2220+
}
2221+
2222+
if (reintegrate || chunk->free_bytes == pcpu_unit_size)
22032223
pcpu_reintegrate_chunk(chunk);
22042224
else
2205-
list_move(&chunk->list,
2206-
&pcpu_chunk_lists[pcpu_sidelined_slot]);
2225+
list_move_tail(&chunk->list,
2226+
&pcpu_chunk_lists[pcpu_sidelined_slot]);
22072227
}
22082228
}
22092229

0 commit comments

Comments
 (0)