@@ -1114,3 +1114,278 @@ int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn)
11141114 return - EAGAIN ;
11151115 return 0 ;
11161116}
1117+
1118+ /**
1119+ * dat_perform_essa() - Perform ESSA actions on the PGSTE.
1120+ * @asce: The asce to operate on.
1121+ * @gfn: The guest page frame to operate on.
1122+ * @orc: The specific action to perform, see the ESSA_SET_* macros.
1123+ * @state: The storage attributes to be returned to the guest.
1124+ * @dirty: Returns whether the function dirtied a previously clean entry.
1125+ *
1126+ * Context: Called with kvm->mmu_lock held.
1127+ *
1128+ * Return:
1129+ * * %1 if the page state has been altered and the page is to be added to the CBRL
1130+ * * %0 if the page state has been altered, but the page is not to be added to the CBRL
1131+ * * %-1 if the page state has not been altered and the page is not to be added to the CBRL
1132+ */
1133+ int dat_perform_essa (union asce asce , gfn_t gfn , int orc , union essa_state * state , bool * dirty )
1134+ {
1135+ union crste * crstep ;
1136+ union pgste pgste ;
1137+ union pte * ptep ;
1138+ int res = 0 ;
1139+
1140+ if (dat_entry_walk (NULL , gfn , asce , 0 , TABLE_TYPE_PAGE_TABLE , & crstep , & ptep )) {
1141+ * state = (union essa_state ) { .exception = 1 };
1142+ return -1 ;
1143+ }
1144+
1145+ pgste = pgste_get_lock (ptep );
1146+
1147+ * state = (union essa_state ) {
1148+ .content = (ptep -> h .i << 1 ) + (ptep -> h .i && pgste .zero ),
1149+ .nodat = pgste .nodat ,
1150+ .usage = pgste .usage ,
1151+ };
1152+
1153+ switch (orc ) {
1154+ case ESSA_GET_STATE :
1155+ res = -1 ;
1156+ break ;
1157+ case ESSA_SET_STABLE :
1158+ pgste .usage = PGSTE_GPS_USAGE_STABLE ;
1159+ pgste .nodat = 0 ;
1160+ break ;
1161+ case ESSA_SET_UNUSED :
1162+ pgste .usage = PGSTE_GPS_USAGE_UNUSED ;
1163+ if (ptep -> h .i )
1164+ res = 1 ;
1165+ break ;
1166+ case ESSA_SET_VOLATILE :
1167+ pgste .usage = PGSTE_GPS_USAGE_VOLATILE ;
1168+ if (ptep -> h .i )
1169+ res = 1 ;
1170+ break ;
1171+ case ESSA_SET_POT_VOLATILE :
1172+ if (!ptep -> h .i ) {
1173+ pgste .usage = PGSTE_GPS_USAGE_POT_VOLATILE ;
1174+ } else if (pgste .zero ) {
1175+ pgste .usage = PGSTE_GPS_USAGE_VOLATILE ;
1176+ } else if (!pgste .gc ) {
1177+ pgste .usage = PGSTE_GPS_USAGE_VOLATILE ;
1178+ res = 1 ;
1179+ }
1180+ break ;
1181+ case ESSA_SET_STABLE_RESIDENT :
1182+ pgste .usage = PGSTE_GPS_USAGE_STABLE ;
1183+ /*
1184+ * Since the resident state can go away any time after this
1185+ * call, we will not make this page resident. We can revisit
1186+ * this decision if a guest will ever start using this.
1187+ */
1188+ break ;
1189+ case ESSA_SET_STABLE_IF_RESIDENT :
1190+ if (!ptep -> h .i )
1191+ pgste .usage = PGSTE_GPS_USAGE_STABLE ;
1192+ break ;
1193+ case ESSA_SET_STABLE_NODAT :
1194+ pgste .usage = PGSTE_GPS_USAGE_STABLE ;
1195+ pgste .nodat = 1 ;
1196+ break ;
1197+ default :
1198+ WARN_ONCE (1 , "Invalid ORC!" );
1199+ res = -1 ;
1200+ break ;
1201+ }
1202+ /* If we are discarding a page, set it to logical zero. */
1203+ pgste .zero = res == 1 ;
1204+ if (orc > 0 ) {
1205+ * dirty = !pgste .cmma_d ;
1206+ pgste .cmma_d = 1 ;
1207+ }
1208+
1209+ pgste_set_unlock (ptep , pgste );
1210+
1211+ return res ;
1212+ }
1213+
1214+ static long dat_reset_cmma_pte (union pte * ptep , gfn_t gfn , gfn_t next , struct dat_walk * walk )
1215+ {
1216+ union pgste pgste ;
1217+
1218+ pgste = pgste_get_lock (ptep );
1219+ pgste .usage = 0 ;
1220+ pgste .nodat = 0 ;
1221+ pgste .cmma_d = 0 ;
1222+ pgste_set_unlock (ptep , pgste );
1223+ if (need_resched ())
1224+ return next ;
1225+ return 0 ;
1226+ }
1227+
1228+ long dat_reset_cmma (union asce asce , gfn_t start )
1229+ {
1230+ const struct dat_walk_ops dat_reset_cmma_ops = {
1231+ .pte_entry = dat_reset_cmma_pte ,
1232+ };
1233+
1234+ return _dat_walk_gfn_range (start , asce_end (asce ), asce , & dat_reset_cmma_ops ,
1235+ DAT_WALK_IGN_HOLES , NULL );
1236+ }
1237+
1238+ struct dat_get_cmma_state {
1239+ gfn_t start ;
1240+ gfn_t end ;
1241+ unsigned int count ;
1242+ u8 * values ;
1243+ atomic64_t * remaining ;
1244+ };
1245+
1246+ static long __dat_peek_cmma_pte (union pte * ptep , gfn_t gfn , gfn_t next , struct dat_walk * walk )
1247+ {
1248+ struct dat_get_cmma_state * state = walk -> priv ;
1249+ union pgste pgste ;
1250+
1251+ pgste = pgste_get_lock (ptep );
1252+ state -> values [gfn - walk -> start ] = pgste .usage | (pgste .nodat << 6 );
1253+ pgste_set_unlock (ptep , pgste );
1254+ state -> end = next ;
1255+
1256+ return 0 ;
1257+ }
1258+
1259+ static long __dat_peek_cmma_crste (union crste * crstep , gfn_t gfn , gfn_t next , struct dat_walk * walk )
1260+ {
1261+ struct dat_get_cmma_state * state = walk -> priv ;
1262+
1263+ if (crstep -> h .i )
1264+ state -> end = min (walk -> end , next );
1265+ return 0 ;
1266+ }
1267+
1268+ int dat_peek_cmma (gfn_t start , union asce asce , unsigned int * count , u8 * values )
1269+ {
1270+ const struct dat_walk_ops ops = {
1271+ .pte_entry = __dat_peek_cmma_pte ,
1272+ .pmd_entry = __dat_peek_cmma_crste ,
1273+ .pud_entry = __dat_peek_cmma_crste ,
1274+ .p4d_entry = __dat_peek_cmma_crste ,
1275+ .pgd_entry = __dat_peek_cmma_crste ,
1276+ };
1277+ struct dat_get_cmma_state state = { .values = values , };
1278+ int rc ;
1279+
1280+ rc = _dat_walk_gfn_range (start , start + * count , asce , & ops , DAT_WALK_DEFAULT , & state );
1281+ * count = state .end - start ;
1282+ /* Return success if at least one value was saved, otherwise an error. */
1283+ return (rc == - EFAULT && * count > 0 ) ? 0 : rc ;
1284+ }
1285+
1286+ static long __dat_get_cmma_pte (union pte * ptep , gfn_t gfn , gfn_t next , struct dat_walk * walk )
1287+ {
1288+ struct dat_get_cmma_state * state = walk -> priv ;
1289+ union pgste pgste ;
1290+
1291+ if (state -> start != -1 ) {
1292+ if ((gfn - state -> end ) > KVM_S390_MAX_BIT_DISTANCE )
1293+ return 1 ;
1294+ if (gfn - state -> start >= state -> count )
1295+ return 1 ;
1296+ }
1297+
1298+ if (!READ_ONCE (* pgste_of (ptep )).cmma_d )
1299+ return 0 ;
1300+
1301+ pgste = pgste_get_lock (ptep );
1302+ if (pgste .cmma_d ) {
1303+ if (state -> start == -1 )
1304+ state -> start = gfn ;
1305+ pgste .cmma_d = 0 ;
1306+ atomic64_dec (state -> remaining );
1307+ state -> values [gfn - state -> start ] = pgste .usage | pgste .nodat << 6 ;
1308+ state -> end = next ;
1309+ }
1310+ pgste_set_unlock (ptep , pgste );
1311+ return 0 ;
1312+ }
1313+
1314+ int dat_get_cmma (union asce asce , gfn_t * start , unsigned int * count , u8 * values , atomic64_t * rem )
1315+ {
1316+ const struct dat_walk_ops ops = { .pte_entry = __dat_get_cmma_pte , };
1317+ struct dat_get_cmma_state state = {
1318+ .remaining = rem ,
1319+ .values = values ,
1320+ .count = * count ,
1321+ .start = -1 ,
1322+ };
1323+
1324+ _dat_walk_gfn_range (* start , asce_end (asce ), asce , & ops , DAT_WALK_IGN_HOLES , & state );
1325+
1326+ if (state .start == -1 ) {
1327+ * count = 0 ;
1328+ } else {
1329+ * count = state .end - state .start ;
1330+ * start = state .start ;
1331+ }
1332+
1333+ return 0 ;
1334+ }
1335+
1336+ struct dat_set_cmma_state {
1337+ unsigned long mask ;
1338+ const u8 * bits ;
1339+ };
1340+
1341+ static long __dat_set_cmma_pte (union pte * ptep , gfn_t gfn , gfn_t next , struct dat_walk * walk )
1342+ {
1343+ struct dat_set_cmma_state * state = walk -> priv ;
1344+ union pgste pgste , tmp ;
1345+
1346+ tmp .val = (state -> bits [gfn - walk -> start ] << 24 ) & state -> mask ;
1347+
1348+ pgste = pgste_get_lock (ptep );
1349+ pgste .usage = tmp .usage ;
1350+ pgste .nodat = tmp .nodat ;
1351+ pgste_set_unlock (ptep , pgste );
1352+
1353+ return 0 ;
1354+ }
1355+
1356+ /**
1357+ * dat_set_cmma_bits() - Set CMMA bits for a range of guest pages.
1358+ * @mc: Cache used for allocations.
1359+ * @asce: The ASCE of the guest.
1360+ * @gfn: The guest frame of the fist page whose CMMA bits are to set.
1361+ * @count: How many pages need to be processed.
1362+ * @mask: Which PGSTE bits should be set.
1363+ * @bits: Points to an array with the CMMA attributes.
1364+ *
1365+ * This function sets the CMMA attributes for the given pages. If the input
1366+ * buffer has zero length, no action is taken, otherwise the attributes are
1367+ * set and the mm->context.uses_cmm flag is set.
1368+ *
1369+ * Each byte in @bits contains new values for bits 32-39 of the PGSTE.
1370+ * Currently, only the fields NT and US are applied.
1371+ *
1372+ * Return: %0 in case of success, a negative error value otherwise.
1373+ */
1374+ int dat_set_cmma_bits (struct kvm_s390_mmu_cache * mc , union asce asce , gfn_t gfn ,
1375+ unsigned long count , unsigned long mask , const uint8_t * bits )
1376+ {
1377+ const struct dat_walk_ops ops = { .pte_entry = __dat_set_cmma_pte , };
1378+ struct dat_set_cmma_state state = { .mask = mask , .bits = bits , };
1379+ union crste * crstep ;
1380+ union pte * ptep ;
1381+ gfn_t cur ;
1382+ int rc ;
1383+
1384+ for (cur = ALIGN_DOWN (gfn , _PAGE_ENTRIES ); cur < gfn + count ; cur += _PAGE_ENTRIES ) {
1385+ rc = dat_entry_walk (mc , cur , asce , DAT_WALK_ALLOC , TABLE_TYPE_PAGE_TABLE ,
1386+ & crstep , & ptep );
1387+ if (rc )
1388+ return rc ;
1389+ }
1390+ return _dat_walk_gfn_range (gfn , gfn + count , asce , & ops , DAT_WALK_IGN_HOLES , & state );
1391+ }
0 commit comments