Skip to content

Commit 7b36847

Browse files
author
Claudio Imbrenda
committed
KVM: s390: KVM page table management functions: CMMA
Add page table management functions to be used for KVM guest (gmap) page tables. This patch adds functions to handle CMMA and the ESSA instruction. Acked-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
1 parent 94fd9b1 commit 7b36847

2 files changed

Lines changed: 302 additions & 0 deletions

File tree

arch/s390/kvm/dat.c

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,3 +1114,278 @@ int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn)
11141114
return -EAGAIN;
11151115
return 0;
11161116
}
1117+
1118+
/**
1119+
* dat_perform_essa() - Perform ESSA actions on the PGSTE.
1120+
* @asce: The asce to operate on.
1121+
* @gfn: The guest page frame to operate on.
1122+
* @orc: The specific action to perform, see the ESSA_SET_* macros.
1123+
* @state: The storage attributes to be returned to the guest.
1124+
* @dirty: Returns whether the function dirtied a previously clean entry.
1125+
*
1126+
* Context: Called with kvm->mmu_lock held.
1127+
*
1128+
* Return:
1129+
* * %1 if the page state has been altered and the page is to be added to the CBRL
1130+
* * %0 if the page state has been altered, but the page is not to be added to the CBRL
1131+
* * %-1 if the page state has not been altered and the page is not to be added to the CBRL
1132+
*/
1133+
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty)
1134+
{
1135+
union crste *crstep;
1136+
union pgste pgste;
1137+
union pte *ptep;
1138+
int res = 0;
1139+
1140+
if (dat_entry_walk(NULL, gfn, asce, 0, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep)) {
1141+
*state = (union essa_state) { .exception = 1 };
1142+
return -1;
1143+
}
1144+
1145+
pgste = pgste_get_lock(ptep);
1146+
1147+
*state = (union essa_state) {
1148+
.content = (ptep->h.i << 1) + (ptep->h.i && pgste.zero),
1149+
.nodat = pgste.nodat,
1150+
.usage = pgste.usage,
1151+
};
1152+
1153+
switch (orc) {
1154+
case ESSA_GET_STATE:
1155+
res = -1;
1156+
break;
1157+
case ESSA_SET_STABLE:
1158+
pgste.usage = PGSTE_GPS_USAGE_STABLE;
1159+
pgste.nodat = 0;
1160+
break;
1161+
case ESSA_SET_UNUSED:
1162+
pgste.usage = PGSTE_GPS_USAGE_UNUSED;
1163+
if (ptep->h.i)
1164+
res = 1;
1165+
break;
1166+
case ESSA_SET_VOLATILE:
1167+
pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1168+
if (ptep->h.i)
1169+
res = 1;
1170+
break;
1171+
case ESSA_SET_POT_VOLATILE:
1172+
if (!ptep->h.i) {
1173+
pgste.usage = PGSTE_GPS_USAGE_POT_VOLATILE;
1174+
} else if (pgste.zero) {
1175+
pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1176+
} else if (!pgste.gc) {
1177+
pgste.usage = PGSTE_GPS_USAGE_VOLATILE;
1178+
res = 1;
1179+
}
1180+
break;
1181+
case ESSA_SET_STABLE_RESIDENT:
1182+
pgste.usage = PGSTE_GPS_USAGE_STABLE;
1183+
/*
1184+
* Since the resident state can go away any time after this
1185+
* call, we will not make this page resident. We can revisit
1186+
* this decision if a guest will ever start using this.
1187+
*/
1188+
break;
1189+
case ESSA_SET_STABLE_IF_RESIDENT:
1190+
if (!ptep->h.i)
1191+
pgste.usage = PGSTE_GPS_USAGE_STABLE;
1192+
break;
1193+
case ESSA_SET_STABLE_NODAT:
1194+
pgste.usage = PGSTE_GPS_USAGE_STABLE;
1195+
pgste.nodat = 1;
1196+
break;
1197+
default:
1198+
WARN_ONCE(1, "Invalid ORC!");
1199+
res = -1;
1200+
break;
1201+
}
1202+
/* If we are discarding a page, set it to logical zero. */
1203+
pgste.zero = res == 1;
1204+
if (orc > 0) {
1205+
*dirty = !pgste.cmma_d;
1206+
pgste.cmma_d = 1;
1207+
}
1208+
1209+
pgste_set_unlock(ptep, pgste);
1210+
1211+
return res;
1212+
}
1213+
1214+
static long dat_reset_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1215+
{
1216+
union pgste pgste;
1217+
1218+
pgste = pgste_get_lock(ptep);
1219+
pgste.usage = 0;
1220+
pgste.nodat = 0;
1221+
pgste.cmma_d = 0;
1222+
pgste_set_unlock(ptep, pgste);
1223+
if (need_resched())
1224+
return next;
1225+
return 0;
1226+
}
1227+
1228+
long dat_reset_cmma(union asce asce, gfn_t start)
1229+
{
1230+
const struct dat_walk_ops dat_reset_cmma_ops = {
1231+
.pte_entry = dat_reset_cmma_pte,
1232+
};
1233+
1234+
return _dat_walk_gfn_range(start, asce_end(asce), asce, &dat_reset_cmma_ops,
1235+
DAT_WALK_IGN_HOLES, NULL);
1236+
}
1237+
1238+
struct dat_get_cmma_state {
1239+
gfn_t start;
1240+
gfn_t end;
1241+
unsigned int count;
1242+
u8 *values;
1243+
atomic64_t *remaining;
1244+
};
1245+
1246+
static long __dat_peek_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1247+
{
1248+
struct dat_get_cmma_state *state = walk->priv;
1249+
union pgste pgste;
1250+
1251+
pgste = pgste_get_lock(ptep);
1252+
state->values[gfn - walk->start] = pgste.usage | (pgste.nodat << 6);
1253+
pgste_set_unlock(ptep, pgste);
1254+
state->end = next;
1255+
1256+
return 0;
1257+
}
1258+
1259+
static long __dat_peek_cmma_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1260+
{
1261+
struct dat_get_cmma_state *state = walk->priv;
1262+
1263+
if (crstep->h.i)
1264+
state->end = min(walk->end, next);
1265+
return 0;
1266+
}
1267+
1268+
int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values)
1269+
{
1270+
const struct dat_walk_ops ops = {
1271+
.pte_entry = __dat_peek_cmma_pte,
1272+
.pmd_entry = __dat_peek_cmma_crste,
1273+
.pud_entry = __dat_peek_cmma_crste,
1274+
.p4d_entry = __dat_peek_cmma_crste,
1275+
.pgd_entry = __dat_peek_cmma_crste,
1276+
};
1277+
struct dat_get_cmma_state state = { .values = values, };
1278+
int rc;
1279+
1280+
rc = _dat_walk_gfn_range(start, start + *count, asce, &ops, DAT_WALK_DEFAULT, &state);
1281+
*count = state.end - start;
1282+
/* Return success if at least one value was saved, otherwise an error. */
1283+
return (rc == -EFAULT && *count > 0) ? 0 : rc;
1284+
}
1285+
1286+
static long __dat_get_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1287+
{
1288+
struct dat_get_cmma_state *state = walk->priv;
1289+
union pgste pgste;
1290+
1291+
if (state->start != -1) {
1292+
if ((gfn - state->end) > KVM_S390_MAX_BIT_DISTANCE)
1293+
return 1;
1294+
if (gfn - state->start >= state->count)
1295+
return 1;
1296+
}
1297+
1298+
if (!READ_ONCE(*pgste_of(ptep)).cmma_d)
1299+
return 0;
1300+
1301+
pgste = pgste_get_lock(ptep);
1302+
if (pgste.cmma_d) {
1303+
if (state->start == -1)
1304+
state->start = gfn;
1305+
pgste.cmma_d = 0;
1306+
atomic64_dec(state->remaining);
1307+
state->values[gfn - state->start] = pgste.usage | pgste.nodat << 6;
1308+
state->end = next;
1309+
}
1310+
pgste_set_unlock(ptep, pgste);
1311+
return 0;
1312+
}
1313+
1314+
int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem)
1315+
{
1316+
const struct dat_walk_ops ops = { .pte_entry = __dat_get_cmma_pte, };
1317+
struct dat_get_cmma_state state = {
1318+
.remaining = rem,
1319+
.values = values,
1320+
.count = *count,
1321+
.start = -1,
1322+
};
1323+
1324+
_dat_walk_gfn_range(*start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, &state);
1325+
1326+
if (state.start == -1) {
1327+
*count = 0;
1328+
} else {
1329+
*count = state.end - state.start;
1330+
*start = state.start;
1331+
}
1332+
1333+
return 0;
1334+
}
1335+
1336+
struct dat_set_cmma_state {
1337+
unsigned long mask;
1338+
const u8 *bits;
1339+
};
1340+
1341+
static long __dat_set_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk)
1342+
{
1343+
struct dat_set_cmma_state *state = walk->priv;
1344+
union pgste pgste, tmp;
1345+
1346+
tmp.val = (state->bits[gfn - walk->start] << 24) & state->mask;
1347+
1348+
pgste = pgste_get_lock(ptep);
1349+
pgste.usage = tmp.usage;
1350+
pgste.nodat = tmp.nodat;
1351+
pgste_set_unlock(ptep, pgste);
1352+
1353+
return 0;
1354+
}
1355+
1356+
/**
1357+
* dat_set_cmma_bits() - Set CMMA bits for a range of guest pages.
1358+
* @mc: Cache used for allocations.
1359+
* @asce: The ASCE of the guest.
1360+
* @gfn: The guest frame of the fist page whose CMMA bits are to set.
1361+
* @count: How many pages need to be processed.
1362+
* @mask: Which PGSTE bits should be set.
1363+
* @bits: Points to an array with the CMMA attributes.
1364+
*
1365+
* This function sets the CMMA attributes for the given pages. If the input
1366+
* buffer has zero length, no action is taken, otherwise the attributes are
1367+
* set and the mm->context.uses_cmm flag is set.
1368+
*
1369+
* Each byte in @bits contains new values for bits 32-39 of the PGSTE.
1370+
* Currently, only the fields NT and US are applied.
1371+
*
1372+
* Return: %0 in case of success, a negative error value otherwise.
1373+
*/
1374+
int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
1375+
unsigned long count, unsigned long mask, const uint8_t *bits)
1376+
{
1377+
const struct dat_walk_ops ops = { .pte_entry = __dat_set_cmma_pte, };
1378+
struct dat_set_cmma_state state = { .mask = mask, .bits = bits, };
1379+
union crste *crstep;
1380+
union pte *ptep;
1381+
gfn_t cur;
1382+
int rc;
1383+
1384+
for (cur = ALIGN_DOWN(gfn, _PAGE_ENTRIES); cur < gfn + count; cur += _PAGE_ENTRIES) {
1385+
rc = dat_entry_walk(mc, cur, asce, DAT_WALK_ALLOC, TABLE_TYPE_PAGE_TABLE,
1386+
&crstep, &ptep);
1387+
if (rc)
1388+
return rc;
1389+
}
1390+
return _dat_walk_gfn_range(gfn, gfn + count, asce, &ops, DAT_WALK_IGN_HOLES, &state);
1391+
}

arch/s390/kvm/dat.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@
1717
#include <asm/tlbflush.h>
1818
#include <asm/dat-bits.h>
1919

20+
/*
21+
* Base address and length must be sent at the start of each block, therefore
22+
* it's cheaper to send some clean data, as long as it's less than the size of
23+
* two longs.
24+
*/
25+
#define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
26+
/* For consistency */
27+
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
28+
2029
#define _ASCE(x) ((union asce) { .val = (x), })
2130
#define NULL_ASCE _ASCE(0)
2231

@@ -433,6 +442,17 @@ static inline union crste _crste_fc1(kvm_pfn_t pfn, int tt, bool writable, bool
433442
return res;
434443
}
435444

445+
union essa_state {
446+
unsigned char val;
447+
struct {
448+
unsigned char : 2;
449+
unsigned char nodat : 1;
450+
unsigned char exception : 1;
451+
unsigned char usage : 2;
452+
unsigned char content : 2;
453+
};
454+
};
455+
436456
/**
437457
* struct vsie_rmap - reverse mapping for shadow page table entries
438458
* @next: pointer to next rmap in the list
@@ -522,6 +542,13 @@ bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end);
522542
int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level,
523543
bool uses_skeys, struct guest_fault *f);
524544

545+
int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty);
546+
long dat_reset_cmma(union asce asce, gfn_t start_gfn);
547+
int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values);
548+
int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem);
549+
int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn,
550+
unsigned long count, unsigned long mask, const uint8_t *bits);
551+
525552
int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc);
526553

527554
#define GFP_KVM_S390_MMU_CACHE (GFP_ATOMIC | __GFP_ACCOUNT | __GFP_NOWARN)

0 commit comments

Comments
 (0)