Skip to content

Commit ce2b3a5

Browse files
ryanhrobwilldeacon
authored andcommitted
arm64: mm: Don't sleep in split_kernel_leaf_mapping() when in atomic context
It has been reported that split_kernel_leaf_mapping() is trying to sleep in non-sleepable context. It does this when acquiring the pgtable_split_lock mutex, when either CONFIG_DEBUG_PAGEALLOC or CONFIG_KFENCE are enabled, which change linear map permissions within softirq context during memory allocation and/or freeing. All other paths into this function are called from sleepable context and so are safe. But it turns out that the memory for which these 2 features may attempt to modify the permissions is always mapped by pte, so there is no need to attempt to split the mapping. So let's exit early in these cases and avoid attempting to take the mutex. There is one wrinkle to this approach; late-initialized kfence allocates it's pool from the buddy which may be block mapped. So we must hook that allocation and convert it to pte-mappings up front. Previously this was done as a side-effect of kfence protecting all the individual pages in its pool at init-time, but this no longer works due to the added early exit path in split_kernel_leaf_mapping(). So instead, do this via the existing arch_kfence_init_pool() arch hook, and reuse the existing linear_map_split_to_ptes() infrastructure. Closes: https://lore.kernel.org/all/f24b9032-0ec9-47b1-8b95-c0eeac7a31c5@roeck-us.net/ Fixes: a166563 ("arm64: mm: support large block mapping when rodata=full") Reported-by: Guenter Roeck <linux@roeck-us.net> Tested-by: Guenter Roeck <groeck@google.com> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Reviewed-by: David Hildenbrand (Red Hat) <david@kernel.org> Reviewed-by: Yang Shi <yang@os.amperecomputing.com> Signed-off-by: Will Deacon <will@kernel.org>
1 parent 0ec364c commit ce2b3a5

2 files changed

Lines changed: 67 additions & 28 deletions

File tree

arch/arm64/include/asm/kfence.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010

1111
#include <asm/set_memory.h>
1212

13-
static inline bool arch_kfence_init_pool(void) { return true; }
14-
1513
static inline bool kfence_protect_page(unsigned long addr, bool protect)
1614
{
1715
set_memory_valid(addr, 1, !protect);
@@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void)
2523
{
2624
return !kfence_early_init;
2725
}
26+
bool arch_kfence_init_pool(void);
2827
#else /* CONFIG_KFENCE */
2928
static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
3029
#endif /* CONFIG_KFENCE */

arch/arm64/mm/mmu.c

Lines changed: 66 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,16 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
708708
return ret;
709709
}
710710

711+
static inline bool force_pte_mapping(void)
712+
{
713+
bool bbml2 = system_capabilities_finalized() ?
714+
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
715+
716+
return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() ||
717+
is_realm_world())) ||
718+
debug_pagealloc_enabled();
719+
}
720+
711721
static DEFINE_MUTEX(pgtable_split_lock);
712722

713723
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
@@ -723,6 +733,16 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
723733
if (!system_supports_bbml2_noabort())
724734
return 0;
725735

736+
/*
737+
* If the region is within a pte-mapped area, there is no need to try to
738+
* split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may
739+
* change permissions from atomic context so for those cases (which are
740+
* always pte-mapped), we must not go any further because taking the
741+
* mutex below may sleep.
742+
*/
743+
if (force_pte_mapping() || is_kfence_address((void *)start))
744+
return 0;
745+
726746
/*
727747
* Ensure start and end are at least page-aligned since this is the
728748
* finest granularity we can split to.
@@ -758,30 +778,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
758778
return ret;
759779
}
760780

761-
static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
762-
unsigned long next,
763-
struct mm_walk *walk)
781+
static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
782+
unsigned long next, struct mm_walk *walk)
764783
{
784+
gfp_t gfp = *(gfp_t *)walk->private;
765785
pud_t pud = pudp_get(pudp);
766786
int ret = 0;
767787

768788
if (pud_leaf(pud))
769-
ret = split_pud(pudp, pud, GFP_ATOMIC, false);
789+
ret = split_pud(pudp, pud, gfp, false);
770790

771791
return ret;
772792
}
773793

774-
static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
775-
unsigned long next,
776-
struct mm_walk *walk)
794+
static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
795+
unsigned long next, struct mm_walk *walk)
777796
{
797+
gfp_t gfp = *(gfp_t *)walk->private;
778798
pmd_t pmd = pmdp_get(pmdp);
779799
int ret = 0;
780800

781801
if (pmd_leaf(pmd)) {
782802
if (pmd_cont(pmd))
783803
split_contpmd(pmdp);
784-
ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false);
804+
ret = split_pmd(pmdp, pmd, gfp, false);
785805

786806
/*
787807
* We have split the pmd directly to ptes so there is no need to
@@ -793,9 +813,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
793813
return ret;
794814
}
795815

796-
static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
797-
unsigned long next,
798-
struct mm_walk *walk)
816+
static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
817+
unsigned long next, struct mm_walk *walk)
799818
{
800819
pte_t pte = __ptep_get(ptep);
801820

@@ -805,12 +824,18 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
805824
return 0;
806825
}
807826

808-
static const struct mm_walk_ops split_to_ptes_ops __initconst = {
827+
static const struct mm_walk_ops split_to_ptes_ops = {
809828
.pud_entry = split_to_ptes_pud_entry,
810829
.pmd_entry = split_to_ptes_pmd_entry,
811830
.pte_entry = split_to_ptes_pte_entry,
812831
};
813832

833+
static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp)
834+
{
835+
return walk_kernel_page_table_range_lockless(start, end,
836+
&split_to_ptes_ops, NULL, &gfp);
837+
}
838+
814839
static bool linear_map_requires_bbml2 __initdata;
815840

816841
u32 idmap_kpti_bbml2_flag;
@@ -847,11 +872,9 @@ static int __init linear_map_split_to_ptes(void *__unused)
847872
* PTE. The kernel alias remains static throughout runtime so
848873
* can continue to be safely mapped with large mappings.
849874
*/
850-
ret = walk_kernel_page_table_range_lockless(lstart, kstart,
851-
&split_to_ptes_ops, NULL, NULL);
875+
ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC);
852876
if (!ret)
853-
ret = walk_kernel_page_table_range_lockless(kend, lend,
854-
&split_to_ptes_ops, NULL, NULL);
877+
ret = range_split_to_ptes(kend, lend, GFP_ATOMIC);
855878
if (ret)
856879
panic("Failed to split linear map\n");
857880
flush_tlb_kernel_range(lstart, lend);
@@ -1002,23 +1025,40 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp)
10021025
memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
10031026
__kfence_pool = phys_to_virt(kfence_pool);
10041027
}
1028+
1029+
bool arch_kfence_init_pool(void)
1030+
{
1031+
unsigned long start = (unsigned long)__kfence_pool;
1032+
unsigned long end = start + KFENCE_POOL_SIZE;
1033+
int ret;
1034+
1035+
/* Exit early if we know the linear map is already pte-mapped. */
1036+
if (!system_supports_bbml2_noabort() || force_pte_mapping())
1037+
return true;
1038+
1039+
/* Kfence pool is already pte-mapped for the early init case. */
1040+
if (kfence_early_init)
1041+
return true;
1042+
1043+
mutex_lock(&pgtable_split_lock);
1044+
ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL);
1045+
mutex_unlock(&pgtable_split_lock);
1046+
1047+
/*
1048+
* Since the system supports bbml2_noabort, tlb invalidation is not
1049+
* required here; the pgtable mappings have been split to pte but larger
1050+
* entries may safely linger in the TLB.
1051+
*/
1052+
1053+
return !ret;
1054+
}
10051055
#else /* CONFIG_KFENCE */
10061056

10071057
static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; }
10081058
static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { }
10091059

10101060
#endif /* CONFIG_KFENCE */
10111061

1012-
static inline bool force_pte_mapping(void)
1013-
{
1014-
bool bbml2 = system_capabilities_finalized() ?
1015-
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
1016-
1017-
return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() ||
1018-
is_realm_world())) ||
1019-
debug_pagealloc_enabled();
1020-
}
1021-
10221062
static void __init map_mem(pgd_t *pgdp)
10231063
{
10241064
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);

0 commit comments

Comments
 (0)