Skip to content

Commit 8f5a3eb

Browse files
Ricardo Kolleroupton
authored andcommitted
KVM: arm64: Add kvm_pgtable_stage2_split()
Add a new stage2 function, kvm_pgtable_stage2_split(), for splitting a range of huge pages. This will be used for eager-splitting huge pages into PAGE_SIZE pages. The goal is to avoid having to split huge pages on write-protection faults, and instead use this function to do it ahead of time for large ranges (e.g., all guest memory in 1G chunks at a time). Signed-off-by: Ricardo Koller <ricarkol@google.com> Reviewed-by: Shaoqin Huang <shahuang@redhat.com> Reviewed-by: Gavin Shan <gshan@redhat.com> Link: https://lore.kernel.org/r/20230426172330.1439644-7-ricarkol@google.com Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
1 parent 2f440b7 commit 8f5a3eb

2 files changed

Lines changed: 122 additions & 0 deletions

File tree

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
671671
*/
672672
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
673673

674+
/**
675+
* kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
676+
* to PAGE_SIZE guest pages.
677+
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
678+
* @addr: Intermediate physical address from which to split.
679+
* @size: Size of the range.
680+
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
681+
* page-table pages.
682+
*
683+
* The function tries to split any level 1 or 2 entry that overlaps
684+
* with the input range (given by @addr and @size).
685+
*
686+
* Return: 0 on success, negative error code on failure. Note that
687+
* kvm_pgtable_stage2_split() is best effort: it tries to break as many
688+
* blocks in the input range as allowed by @mc_capacity.
689+
*/
690+
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
691+
struct kvm_mmu_memory_cache *mc);
692+
674693
/**
675694
* kvm_pgtable_walk() - Walk a page-table.
676695
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,6 +1276,109 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
12761276
return pgtable;
12771277
}
12781278

1279+
/*
1280+
* Get the number of page-tables needed to replace a block with a
1281+
* fully populated tree up to the PTE entries. Note that @level is
1282+
* interpreted as in "level @level entry".
1283+
*/
1284+
static int stage2_block_get_nr_page_tables(u32 level)
1285+
{
1286+
switch (level) {
1287+
case 1:
1288+
return PTRS_PER_PTE + 1;
1289+
case 2:
1290+
return 1;
1291+
case 3:
1292+
return 0;
1293+
default:
1294+
WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
1295+
level >= KVM_PGTABLE_MAX_LEVELS);
1296+
return -EINVAL;
1297+
};
1298+
}
1299+
1300+
static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
1301+
enum kvm_pgtable_walk_flags visit)
1302+
{
1303+
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
1304+
struct kvm_mmu_memory_cache *mc = ctx->arg;
1305+
struct kvm_s2_mmu *mmu;
1306+
kvm_pte_t pte = ctx->old, new, *childp;
1307+
enum kvm_pgtable_prot prot;
1308+
u32 level = ctx->level;
1309+
bool force_pte;
1310+
int nr_pages;
1311+
u64 phys;
1312+
1313+
/* No huge-pages exist at the last level */
1314+
if (level == KVM_PGTABLE_MAX_LEVELS - 1)
1315+
return 0;
1316+
1317+
/* We only split valid block mappings */
1318+
if (!kvm_pte_valid(pte))
1319+
return 0;
1320+
1321+
nr_pages = stage2_block_get_nr_page_tables(level);
1322+
if (nr_pages < 0)
1323+
return nr_pages;
1324+
1325+
if (mc->nobjs >= nr_pages) {
1326+
/* Build a tree mapped down to the PTE granularity. */
1327+
force_pte = true;
1328+
} else {
1329+
/*
1330+
* Don't force PTEs, so create_unlinked() below does
1331+
* not populate the tree up to the PTE level. The
1332+
* consequence is that the call will require a single
1333+
* page of level 2 entries at level 1, or a single
1334+
* page of PTEs at level 2. If we are at level 1, the
1335+
* PTEs will be created recursively.
1336+
*/
1337+
force_pte = false;
1338+
nr_pages = 1;
1339+
}
1340+
1341+
if (mc->nobjs < nr_pages)
1342+
return -ENOMEM;
1343+
1344+
mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
1345+
phys = kvm_pte_to_phys(pte);
1346+
prot = kvm_pgtable_stage2_pte_prot(pte);
1347+
1348+
childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
1349+
level, prot, mc, force_pte);
1350+
if (IS_ERR(childp))
1351+
return PTR_ERR(childp);
1352+
1353+
if (!stage2_try_break_pte(ctx, mmu)) {
1354+
kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
1355+
mm_ops->put_page(childp);
1356+
return -EAGAIN;
1357+
}
1358+
1359+
/*
1360+
* Note, the contents of the page table are guaranteed to be made
1361+
* visible before the new PTE is assigned because stage2_make_pte()
1362+
* writes the PTE using smp_store_release().
1363+
*/
1364+
new = kvm_init_table_pte(childp, mm_ops);
1365+
stage2_make_pte(ctx, new);
1366+
dsb(ishst);
1367+
return 0;
1368+
}
1369+
1370+
int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
1371+
struct kvm_mmu_memory_cache *mc)
1372+
{
1373+
struct kvm_pgtable_walker walker = {
1374+
.cb = stage2_split_walker,
1375+
.flags = KVM_PGTABLE_WALK_LEAF,
1376+
.arg = mc,
1377+
};
1378+
1379+
return kvm_pgtable_walk(pgt, addr, size, &walker);
1380+
}
1381+
12791382
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
12801383
struct kvm_pgtable_mm_ops *mm_ops,
12811384
enum kvm_pgtable_stage2_flags flags,

0 commit comments

Comments
 (0)