Skip to content

Commit 94fd9b1

Browse files
author
Claudio Imbrenda
committed
KVM: s390: KVM page table management functions: lifecycle management
Add page table management functions to be used for KVM guest (gmap) page tables. This patch adds functions to handle memslot creation and destruction, additional per-pagetable data stored in the PGSTEs, mapping physical addresses into the gmap, and marking address ranges as prefix. Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
1 parent 8e03e83 commit 94fd9b1

2 files changed

Lines changed: 348 additions & 0 deletions

File tree

arch/s390/kvm/dat.c

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,38 @@ void dat_free_level(struct crst_table *table, bool owns_ptes)
102102
dat_free_crst(table);
103103
}
104104

105+
int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype)
106+
{
107+
struct crst_table *table;
108+
union crste crste;
109+
110+
while (asce->dt > newtype) {
111+
table = dereference_asce(*asce);
112+
crste = table->crstes[0];
113+
if (crste.h.fc)
114+
return 0;
115+
if (!crste.h.i) {
116+
asce->rsto = crste.h.fc0.to;
117+
dat_free_crst(table);
118+
} else {
119+
crste.h.tt--;
120+
crst_table_init((void *)table, crste.val);
121+
}
122+
asce->dt--;
123+
}
124+
while (asce->dt < newtype) {
125+
crste = _crste_fc0(asce->rsto, asce->dt + 1);
126+
table = dat_alloc_crst_noinit(mc);
127+
if (!table)
128+
return -ENOMEM;
129+
crst_table_init((void *)table, _CRSTE_HOLE(crste.h.tt).val);
130+
table->crstes[0] = crste;
131+
asce->rsto = __pa(table) >> PAGE_SHIFT;
132+
asce->dt++;
133+
}
134+
return 0;
135+
}
136+
105137
/**
106138
* dat_crstep_xchg() - Exchange a gmap CRSTE with another.
107139
* @crstep: Pointer to the CRST entry
@@ -825,3 +857,260 @@ long dat_reset_skeys(union asce asce, gfn_t start)
825857

826858
return _dat_walk_gfn_range(start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, NULL);
827859
}
860+
861+
struct slot_priv {
862+
unsigned long token;
863+
struct kvm_s390_mmu_cache *mc;
864+
};
865+
866+
static long _dat_slot_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
867+
{
868+
struct slot_priv *p = walk->priv;
869+
union crste dummy = { .val = p->token };
870+
union pte new_pte, pte = READ_ONCE(*ptep);
871+
872+
new_pte = _PTE_TOK(dummy.tok.type, dummy.tok.par);
873+
874+
/* Table entry already in the desired state. */
875+
if (pte.val == new_pte.val)
876+
return 0;
877+
878+
dat_ptep_xchg(ptep, new_pte, gfn, walk->asce, false);
879+
return 0;
880+
}
881+
882+
static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
883+
{
884+
union crste new_crste, crste = READ_ONCE(*crstep);
885+
struct slot_priv *p = walk->priv;
886+
887+
new_crste.val = p->token;
888+
new_crste.h.tt = crste.h.tt;
889+
890+
/* Table entry already in the desired state. */
891+
if (crste.val == new_crste.val)
892+
return 0;
893+
894+
/* This table entry needs to be updated. */
895+
if (walk->start <= gfn && walk->end >= next) {
896+
dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce);
897+
/* A lower level table was present, needs to be freed. */
898+
if (!crste.h.fc && !crste.h.i) {
899+
if (is_pmd(crste))
900+
dat_free_pt(dereference_pmd(crste.pmd));
901+
else
902+
dat_free_level(dereference_crste(crste), true);
903+
}
904+
return 0;
905+
}
906+
907+
/* A lower level table is present, things will handled there. */
908+
if (!crste.h.fc && !crste.h.i)
909+
return 0;
910+
/* Split (install a lower level table), and handle things there. */
911+
return dat_split_crste(p->mc, crstep, gfn, walk->asce, false);
912+
}
913+
914+
static const struct dat_walk_ops dat_slot_ops = {
915+
.pte_entry = _dat_slot_pte,
916+
.crste_ops = { _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, },
917+
};
918+
919+
int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
920+
u16 type, u16 param)
921+
{
922+
struct slot_priv priv = {
923+
.token = _CRSTE_TOK(0, type, param).val,
924+
.mc = mc,
925+
};
926+
927+
return _dat_walk_gfn_range(start, end, asce, &dat_slot_ops,
928+
DAT_WALK_IGN_HOLES | DAT_WALK_ANY, &priv);
929+
}
930+
931+
static void pgste_set_unlock_multiple(union pte *first, int n, union pgste *pgstes)
932+
{
933+
int i;
934+
935+
for (i = 0; i < n; i++) {
936+
if (!pgstes[i].pcl)
937+
break;
938+
pgste_set_unlock(first + i, pgstes[i]);
939+
}
940+
}
941+
942+
static bool pgste_get_trylock_multiple(union pte *first, int n, union pgste *pgstes)
943+
{
944+
int i;
945+
946+
for (i = 0; i < n; i++) {
947+
if (!pgste_get_trylock(first + i, pgstes + i))
948+
break;
949+
}
950+
if (i == n)
951+
return true;
952+
pgste_set_unlock_multiple(first, n, pgstes);
953+
return false;
954+
}
955+
956+
unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param)
957+
{
958+
union pgste pgstes[4] = {};
959+
unsigned long res = 0;
960+
int i, n;
961+
962+
n = param.len + 1;
963+
964+
while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes))
965+
cpu_relax();
966+
967+
for (i = 0; i < n; i++)
968+
res = res << 16 | pgstes[i].val16;
969+
970+
pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes);
971+
return res;
972+
}
973+
974+
void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val)
975+
{
976+
union pgste pgstes[4] = {};
977+
int i, n;
978+
979+
n = param.len + 1;
980+
981+
while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes))
982+
cpu_relax();
983+
984+
for (i = param.len; i >= 0; i--) {
985+
pgstes[i].val16 = val;
986+
val = val >> 16;
987+
}
988+
989+
pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes);
990+
}
991+
992+
static long _dat_test_young_pte(union pte *ptep, gfn_t start, gfn_t end, struct dat_walk *walk)
993+
{
994+
return ptep->s.y;
995+
}
996+
997+
static long _dat_test_young_crste(union crste *crstep, gfn_t start, gfn_t end,
998+
struct dat_walk *walk)
999+
{
1000+
return crstep->h.fc && crstep->s.fc1.y;
1001+
}
1002+
1003+
static const struct dat_walk_ops test_age_ops = {
1004+
.pte_entry = _dat_test_young_pte,
1005+
.pmd_entry = _dat_test_young_crste,
1006+
.pud_entry = _dat_test_young_crste,
1007+
};
1008+
1009+
/**
1010+
* dat_test_age_gfn() - Test young.
1011+
* @asce: The ASCE whose address range is to be tested.
1012+
* @start: The first guest frame of the range to check.
1013+
* @end: The guest frame after the last in the range.
1014+
*
1015+
* Context: called by KVM common code with the kvm mmu write lock held.
1016+
*
1017+
* Return: %true if any page in the given range is young, otherwise %false.
1018+
*/
1019+
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end)
1020+
{
1021+
return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0;
1022+
}
1023+
1024+
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
1025+
bool uses_skeys, struct guest_fault *f)
1026+
{
1027+
union crste oldval, newval;
1028+
union pte newpte, oldpte;
1029+
union pgste pgste;
1030+
int rc = 0;
1031+
1032+
rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep);
1033+
if (rc == -EINVAL || rc == -ENOMEM)
1034+
return rc;
1035+
if (rc)
1036+
return -EAGAIN;
1037+
1038+
if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level)))
1039+
return -EINVAL;
1040+
1041+
if (f->ptep) {
1042+
pgste = pgste_get_lock(f->ptep);
1043+
oldpte = *f->ptep;
1044+
newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page);
1045+
newpte.s.sd = oldpte.s.sd;
1046+
oldpte.s.sd = 0;
1047+
if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) {
1048+
pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys);
1049+
if (f->callback)
1050+
f->callback(f);
1051+
} else {
1052+
rc = -EAGAIN;
1053+
}
1054+
pgste_set_unlock(f->ptep, pgste);
1055+
} else {
1056+
oldval = READ_ONCE(*f->crstep);
1057+
newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable,
1058+
f->write_attempt | oldval.s.fc1.d);
1059+
newval.s.fc1.sd = oldval.s.fc1.sd;
1060+
if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val &&
1061+
crste_origin_large(oldval) != crste_origin_large(newval))
1062+
return -EAGAIN;
1063+
if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce))
1064+
return -EAGAIN;
1065+
if (f->callback)
1066+
f->callback(f);
1067+
}
1068+
1069+
return rc;
1070+
}
1071+
1072+
static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1073+
{
1074+
union crste crste = READ_ONCE(*crstep);
1075+
int *n = walk->priv;
1076+
1077+
if (!crste.h.fc || crste.h.i || crste.h.p)
1078+
return 0;
1079+
1080+
*n = 2;
1081+
if (crste.s.fc1.prefix_notif)
1082+
return 0;
1083+
crste.s.fc1.prefix_notif = 1;
1084+
dat_crstep_xchg(crstep, crste, gfn, walk->asce);
1085+
return 0;
1086+
}
1087+
1088+
static long dat_set_pn_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1089+
{
1090+
int *n = walk->priv;
1091+
union pgste pgste;
1092+
1093+
pgste = pgste_get_lock(ptep);
1094+
if (!ptep->h.i && !ptep->h.p) {
1095+
pgste.prefix_notif = 1;
1096+
*n += 1;
1097+
}
1098+
pgste_set_unlock(ptep, pgste);
1099+
return 0;
1100+
}
1101+
1102+
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn)
1103+
{
1104+
static const struct dat_walk_ops ops = {
1105+
.pte_entry = dat_set_pn_pte,
1106+
.pmd_entry = dat_set_pn_crste,
1107+
.pud_entry = dat_set_pn_crste,
1108+
};
1109+
1110+
int n = 0;
1111+
1112+
_dat_walk_gfn_range(gfn, gfn + 2, asce, &ops, DAT_WALK_IGN_HOLES, &n);
1113+
if (n != 2)
1114+
return -EAGAIN;
1115+
return 0;
1116+
}

arch/s390/kvm/dat.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,11 @@ struct dat_walk {
361361
void *priv;
362362
};
363363

364+
struct ptval_param {
365+
unsigned char offset : 6;
366+
unsigned char len : 2;
367+
};
368+
364369
/**
365370
* _pte() - Useful constructor for union pte
366371
* @pfn: the pfn this pte should point to.
@@ -459,6 +464,32 @@ struct kvm_s390_mmu_cache {
459464
short int n_rmaps;
460465
};
461466

467+
struct guest_fault {
468+
gfn_t gfn; /* Guest frame */
469+
kvm_pfn_t pfn; /* Host PFN */
470+
struct page *page; /* Host page */
471+
union pte *ptep; /* Used to resolve the fault, or NULL */
472+
union crste *crstep; /* Used to resolve the fault, or NULL */
473+
bool writable; /* Mapping is writable */
474+
bool write_attempt; /* Write access attempted */
475+
bool attempt_pfault; /* Attempt a pfault first */
476+
bool valid; /* This entry contains valid data */
477+
void (*callback)(struct guest_fault *f);
478+
void *priv;
479+
};
480+
481+
/*
482+
* 0 1 2 3 4 5 6 7
483+
* +-------+-------+-------+-------+-------+-------+-------+-------+
484+
* 0 | | PGT_ADDR |
485+
* 8 | VMADDR | |
486+
* 16 | |
487+
* 24 | |
488+
*/
489+
#define MKPTVAL(o, l) ((struct ptval_param) { .offset = (o), .len = ((l) + 1) / 2 - 1})
490+
#define PTVAL_PGT_ADDR MKPTVAL(4, 8)
491+
#define PTVAL_VMADDR MKPTVAL(8, 6)
492+
462493
union pgste __must_check __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new,
463494
gfn_t gfn, union asce asce, bool uses_skeys);
464495
bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn,
@@ -472,6 +503,7 @@ int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, in
472503
int walk_level, union crste **last, union pte **ptepp);
473504
void dat_free_level(struct crst_table *table, bool owns_ptes);
474505
struct crst_table *dat_alloc_crst_sleepable(unsigned long init);
506+
int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype);
475507
int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey);
476508
int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
477509
union skey skey, bool nq);
@@ -480,6 +512,16 @@ int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gf
480512
int dat_reset_reference_bit(union asce asce, gfn_t gfn);
481513
long dat_reset_skeys(union asce asce, gfn_t start);
482514

515+
unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param);
516+
void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val);
517+
518+
int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end,
519+
u16 type, u16 param);
520+
int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn);
521+
bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
522+
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
523+
bool uses_skeys, struct guest_fault *f);
524+
483525
int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc);
484526

485527
#define GFP_KVM_S390_MMU_CACHE (GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN)
@@ -880,4 +922,21 @@ static inline int get_level(union crste *crstep, union pte *ptep)
880922
return ptep ? TABLE_TYPE_PAGE_TABLE : crstep->h.tt;
881923
}
882924

925+
static inline int dat_delete_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
926+
unsigned long npages)
927+
{
928+
return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_PIC, PGM_ADDRESSING);
929+
}
930+
931+
static inline int dat_create_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start,
932+
unsigned long npages)
933+
{
934+
return dat_set_slot(mc, asce, start, start + npages, _DAT_TOKEN_NONE, 0);
935+
}
936+
937+
static inline bool crste_is_ucas(union crste crste)
938+
{
939+
return is_pmd(crste) && crste.h.i && crste.h.fc0.tl == 1 && crste.h.fc == 0;
940+
}
941+
883942
#endif /* __KVM_S390_DAT_H */

0 commit comments

Comments
 (0)