|
1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | +#include <linux/ras.h> |
2 | 3 | #include "amd64_edac.h" |
3 | 4 | #include <asm/amd_nb.h> |
4 | 5 |
|
@@ -1051,281 +1052,6 @@ static int fixup_node_id(int node_id, struct mce *m) |
1051 | 1052 | return nid - gpu_node_map.base_node_id + 1; |
1052 | 1053 | } |
1053 | 1054 |
|
1054 | | -/* Protect the PCI config register pairs used for DF indirect access. */ |
1055 | | -static DEFINE_MUTEX(df_indirect_mutex); |
1056 | | - |
1057 | | -/* |
1058 | | - * Data Fabric Indirect Access uses FICAA/FICAD. |
1059 | | - * |
1060 | | - * Fabric Indirect Configuration Access Address (FICAA): Constructed based |
1061 | | - * on the device's Instance Id and the PCI function and register offset of |
1062 | | - * the desired register. |
1063 | | - * |
1064 | | - * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO |
1065 | | - * and FICAD HI registers but so far we only need the LO register. |
1066 | | - * |
1067 | | - * Use Instance Id 0xFF to indicate a broadcast read. |
1068 | | - */ |
1069 | | -#define DF_BROADCAST 0xFF |
1070 | | -static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) |
1071 | | -{ |
1072 | | - struct pci_dev *F4; |
1073 | | - u32 ficaa; |
1074 | | - int err = -ENODEV; |
1075 | | - |
1076 | | - if (node >= amd_nb_num()) |
1077 | | - goto out; |
1078 | | - |
1079 | | - F4 = node_to_amd_nb(node)->link; |
1080 | | - if (!F4) |
1081 | | - goto out; |
1082 | | - |
1083 | | - ficaa = (instance_id == DF_BROADCAST) ? 0 : 1; |
1084 | | - ficaa |= reg & 0x3FC; |
1085 | | - ficaa |= (func & 0x7) << 11; |
1086 | | - ficaa |= instance_id << 16; |
1087 | | - |
1088 | | - mutex_lock(&df_indirect_mutex); |
1089 | | - |
1090 | | - err = pci_write_config_dword(F4, 0x5C, ficaa); |
1091 | | - if (err) { |
1092 | | - pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); |
1093 | | - goto out_unlock; |
1094 | | - } |
1095 | | - |
1096 | | - err = pci_read_config_dword(F4, 0x98, lo); |
1097 | | - if (err) |
1098 | | - pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); |
1099 | | - |
1100 | | -out_unlock: |
1101 | | - mutex_unlock(&df_indirect_mutex); |
1102 | | - |
1103 | | -out: |
1104 | | - return err; |
1105 | | -} |
1106 | | - |
1107 | | -static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) |
1108 | | -{ |
1109 | | - return __df_indirect_read(node, func, reg, instance_id, lo); |
1110 | | -} |
1111 | | - |
1112 | | -static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) |
1113 | | -{ |
1114 | | - return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); |
1115 | | -} |
1116 | | - |
1117 | | -struct addr_ctx { |
1118 | | - u64 ret_addr; |
1119 | | - u32 tmp; |
1120 | | - u16 nid; |
1121 | | - u8 inst_id; |
1122 | | -}; |
1123 | | - |
1124 | | -static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) |
1125 | | -{ |
1126 | | - u64 dram_base_addr, dram_limit_addr, dram_hole_base; |
1127 | | - |
1128 | | - u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; |
1129 | | - u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; |
1130 | | - u8 intlv_addr_sel, intlv_addr_bit; |
1131 | | - u8 num_intlv_bits, hashed_bit; |
1132 | | - u8 lgcy_mmio_hole_en, base = 0; |
1133 | | - u8 cs_mask, cs_id = 0; |
1134 | | - bool hash_enabled = false; |
1135 | | - |
1136 | | - struct addr_ctx ctx; |
1137 | | - |
1138 | | - memset(&ctx, 0, sizeof(ctx)); |
1139 | | - |
1140 | | - /* Start from the normalized address */ |
1141 | | - ctx.ret_addr = norm_addr; |
1142 | | - |
1143 | | - ctx.nid = nid; |
1144 | | - ctx.inst_id = umc; |
1145 | | - |
1146 | | - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ |
1147 | | - if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) |
1148 | | - goto out_err; |
1149 | | - |
1150 | | - /* Remove HiAddrOffset from normalized address, if enabled: */ |
1151 | | - if (ctx.tmp & BIT(0)) { |
1152 | | - u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8; |
1153 | | - |
1154 | | - if (norm_addr >= hi_addr_offset) { |
1155 | | - ctx.ret_addr -= hi_addr_offset; |
1156 | | - base = 1; |
1157 | | - } |
1158 | | - } |
1159 | | - |
1160 | | - /* Read D18F0x110 (DramBaseAddress). */ |
1161 | | - if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp)) |
1162 | | - goto out_err; |
1163 | | - |
1164 | | - /* Check if address range is valid. */ |
1165 | | - if (!(ctx.tmp & BIT(0))) { |
1166 | | - pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", |
1167 | | - __func__, ctx.tmp); |
1168 | | - goto out_err; |
1169 | | - } |
1170 | | - |
1171 | | - lgcy_mmio_hole_en = ctx.tmp & BIT(1); |
1172 | | - intlv_num_chan = (ctx.tmp >> 4) & 0xF; |
1173 | | - intlv_addr_sel = (ctx.tmp >> 8) & 0x7; |
1174 | | - dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16; |
1175 | | - |
1176 | | - /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ |
1177 | | - if (intlv_addr_sel > 3) { |
1178 | | - pr_err("%s: Invalid interleave address select %d.\n", |
1179 | | - __func__, intlv_addr_sel); |
1180 | | - goto out_err; |
1181 | | - } |
1182 | | - |
1183 | | - /* Read D18F0x114 (DramLimitAddress). */ |
1184 | | - if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) |
1185 | | - goto out_err; |
1186 | | - |
1187 | | - intlv_num_sockets = (ctx.tmp >> 8) & 0x1; |
1188 | | - intlv_num_dies = (ctx.tmp >> 10) & 0x3; |
1189 | | - dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); |
1190 | | - |
1191 | | - intlv_addr_bit = intlv_addr_sel + 8; |
1192 | | - |
1193 | | - /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ |
1194 | | - switch (intlv_num_chan) { |
1195 | | - case 0: intlv_num_chan = 0; break; |
1196 | | - case 1: intlv_num_chan = 1; break; |
1197 | | - case 3: intlv_num_chan = 2; break; |
1198 | | - case 5: intlv_num_chan = 3; break; |
1199 | | - case 7: intlv_num_chan = 4; break; |
1200 | | - |
1201 | | - case 8: intlv_num_chan = 1; |
1202 | | - hash_enabled = true; |
1203 | | - break; |
1204 | | - default: |
1205 | | - pr_err("%s: Invalid number of interleaved channels %d.\n", |
1206 | | - __func__, intlv_num_chan); |
1207 | | - goto out_err; |
1208 | | - } |
1209 | | - |
1210 | | - num_intlv_bits = intlv_num_chan; |
1211 | | - |
1212 | | - if (intlv_num_dies > 2) { |
1213 | | - pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", |
1214 | | - __func__, intlv_num_dies); |
1215 | | - goto out_err; |
1216 | | - } |
1217 | | - |
1218 | | - num_intlv_bits += intlv_num_dies; |
1219 | | - |
1220 | | - /* Add a bit if sockets are interleaved. */ |
1221 | | - num_intlv_bits += intlv_num_sockets; |
1222 | | - |
1223 | | - /* Assert num_intlv_bits <= 4 */ |
1224 | | - if (num_intlv_bits > 4) { |
1225 | | - pr_err("%s: Invalid interleave bits %d.\n", |
1226 | | - __func__, num_intlv_bits); |
1227 | | - goto out_err; |
1228 | | - } |
1229 | | - |
1230 | | - if (num_intlv_bits > 0) { |
1231 | | - u64 temp_addr_x, temp_addr_i, temp_addr_y; |
1232 | | - u8 die_id_bit, sock_id_bit, cs_fabric_id; |
1233 | | - |
1234 | | - /* |
1235 | | - * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. |
1236 | | - * This is the fabric id for this coherent slave. Use |
1237 | | - * umc/channel# as instance id of the coherent slave |
1238 | | - * for FICAA. |
1239 | | - */ |
1240 | | - if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) |
1241 | | - goto out_err; |
1242 | | - |
1243 | | - cs_fabric_id = (ctx.tmp >> 8) & 0xFF; |
1244 | | - die_id_bit = 0; |
1245 | | - |
1246 | | - /* If interleaved over more than 1 channel: */ |
1247 | | - if (intlv_num_chan) { |
1248 | | - die_id_bit = intlv_num_chan; |
1249 | | - cs_mask = (1 << die_id_bit) - 1; |
1250 | | - cs_id = cs_fabric_id & cs_mask; |
1251 | | - } |
1252 | | - |
1253 | | - sock_id_bit = die_id_bit; |
1254 | | - |
1255 | | - /* Read D18F1x208 (SystemFabricIdMask). */ |
1256 | | - if (intlv_num_dies || intlv_num_sockets) |
1257 | | - if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp)) |
1258 | | - goto out_err; |
1259 | | - |
1260 | | - /* If interleaved over more than 1 die. */ |
1261 | | - if (intlv_num_dies) { |
1262 | | - sock_id_bit = die_id_bit + intlv_num_dies; |
1263 | | - die_id_shift = (ctx.tmp >> 24) & 0xF; |
1264 | | - die_id_mask = (ctx.tmp >> 8) & 0xFF; |
1265 | | - |
1266 | | - cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; |
1267 | | - } |
1268 | | - |
1269 | | - /* If interleaved over more than 1 socket. */ |
1270 | | - if (intlv_num_sockets) { |
1271 | | - socket_id_shift = (ctx.tmp >> 28) & 0xF; |
1272 | | - socket_id_mask = (ctx.tmp >> 16) & 0xFF; |
1273 | | - |
1274 | | - cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; |
1275 | | - } |
1276 | | - |
1277 | | - /* |
1278 | | - * The pre-interleaved address consists of XXXXXXIIIYYYYY |
1279 | | - * where III is the ID for this CS, and XXXXXXYYYYY are the |
1280 | | - * address bits from the post-interleaved address. |
1281 | | - * "num_intlv_bits" has been calculated to tell us how many "I" |
1282 | | - * bits there are. "intlv_addr_bit" tells us how many "Y" bits |
1283 | | - * there are (where "I" starts). |
1284 | | - */ |
1285 | | - temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0); |
1286 | | - temp_addr_i = (cs_id << intlv_addr_bit); |
1287 | | - temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; |
1288 | | - ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; |
1289 | | - } |
1290 | | - |
1291 | | - /* Add dram base address */ |
1292 | | - ctx.ret_addr += dram_base_addr; |
1293 | | - |
1294 | | - /* If legacy MMIO hole enabled */ |
1295 | | - if (lgcy_mmio_hole_en) { |
1296 | | - if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp)) |
1297 | | - goto out_err; |
1298 | | - |
1299 | | - dram_hole_base = ctx.tmp & GENMASK(31, 24); |
1300 | | - if (ctx.ret_addr >= dram_hole_base) |
1301 | | - ctx.ret_addr += (BIT_ULL(32) - dram_hole_base); |
1302 | | - } |
1303 | | - |
1304 | | - if (hash_enabled) { |
1305 | | - /* Save some parentheses and grab ls-bit at the end. */ |
1306 | | - hashed_bit = (ctx.ret_addr >> 12) ^ |
1307 | | - (ctx.ret_addr >> 18) ^ |
1308 | | - (ctx.ret_addr >> 21) ^ |
1309 | | - (ctx.ret_addr >> 30) ^ |
1310 | | - cs_id; |
1311 | | - |
1312 | | - hashed_bit &= BIT(0); |
1313 | | - |
1314 | | - if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) |
1315 | | - ctx.ret_addr ^= BIT(intlv_addr_bit); |
1316 | | - } |
1317 | | - |
1318 | | - /* Is calculated system address is above DRAM limit address? */ |
1319 | | - if (ctx.ret_addr > dram_limit_addr) |
1320 | | - goto out_err; |
1321 | | - |
1322 | | - *sys_addr = ctx.ret_addr; |
1323 | | - return 0; |
1324 | | - |
1325 | | -out_err: |
1326 | | - return -EINVAL; |
1327 | | -} |
1328 | | - |
1329 | 1055 | static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); |
1330 | 1056 |
|
1331 | 1057 | /* |
@@ -3073,9 +2799,10 @@ static void decode_umc_error(int node_id, struct mce *m) |
3073 | 2799 | { |
3074 | 2800 | u8 ecc_type = (m->status >> 45) & 0x3; |
3075 | 2801 | struct mem_ctl_info *mci; |
| 2802 | + unsigned long sys_addr; |
3076 | 2803 | struct amd64_pvt *pvt; |
| 2804 | + struct atl_err a_err; |
3077 | 2805 | struct err_info err; |
3078 | | - u64 sys_addr; |
3079 | 2806 |
|
3080 | 2807 | node_id = fixup_node_id(node_id, m); |
3081 | 2808 |
|
@@ -3106,7 +2833,12 @@ static void decode_umc_error(int node_id, struct mce *m) |
3106 | 2833 |
|
3107 | 2834 | pvt->ops->get_err_info(m, &err); |
3108 | 2835 |
|
3109 | | - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { |
| 2836 | + a_err.addr = m->addr; |
| 2837 | + a_err.ipid = m->ipid; |
| 2838 | + a_err.cpu = m->extcpu; |
| 2839 | + |
| 2840 | + sys_addr = amd_convert_umc_mca_addr_to_sys_addr(&a_err); |
| 2841 | + if (IS_ERR_VALUE(sys_addr)) { |
3110 | 2842 | err.err_code = ERR_NORM_ADDR; |
3111 | 2843 | goto log_error; |
3112 | 2844 | } |
|
0 commit comments