@@ -301,85 +301,56 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
301301 }
302302}
303303
304- static __always_inline int mce_severity_amd_smca (struct mce * m , enum context err_ctx )
304+ /* See AMD PPR(s) section Machine Check Error Handling. */
305+ static noinstr int mce_severity_amd (struct mce * m , struct pt_regs * regs , char * * msg , bool is_excp )
305306{
306- u64 mcx_cfg ;
307+ int ret ;
307308
308309 /*
309- * We need to look at the following bits:
310- * - "succor" bit (data poisoning support), and
311- * - TCC bit (Task Context Corrupt)
312- * in MCi_STATUS to determine error severity.
310+ * Default return value: Action required, the error must be handled
311+ * immediately.
313312 */
314- if (!mce_flags .succor )
315- return MCE_PANIC_SEVERITY ;
316-
317- mcx_cfg = mce_rdmsrl (MSR_AMD64_SMCA_MCx_CONFIG (m -> bank ));
318-
319- /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
320- if ((mcx_cfg & MCI_CONFIG_MCAX ) &&
321- (m -> status & MCI_STATUS_TCC ) &&
322- (err_ctx == IN_KERNEL ))
323- return MCE_PANIC_SEVERITY ;
324-
325- /* ...otherwise invoke hwpoison handler. */
326- return MCE_AR_SEVERITY ;
327- }
328-
329- /*
330- * See AMD Error Scope Hierarchy table in a newer BKDG. For example
331- * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
332- */
333- static noinstr int mce_severity_amd (struct mce * m , struct pt_regs * regs , char * * msg , bool is_excp )
334- {
335- enum context ctx = error_context (m , regs );
313+ ret = MCE_AR_SEVERITY ;
336314
337315 /* Processor Context Corrupt, no need to fumble too much, die! */
338- if (m -> status & MCI_STATUS_PCC )
339- return MCE_PANIC_SEVERITY ;
340-
341- if (m -> status & MCI_STATUS_UC ) {
342-
343- if (ctx == IN_KERNEL )
344- return MCE_PANIC_SEVERITY ;
316+ if (m -> status & MCI_STATUS_PCC ) {
317+ ret = MCE_PANIC_SEVERITY ;
318+ goto out ;
319+ }
345320
346- /*
347- * On older systems where overflow_recov flag is not present, we
348- * should simply panic if an error overflow occurs. If
349- * overflow_recov flag is present and set, then software can try
350- * to at least kill process to prolong system operation.
351- */
352- if (mce_flags .overflow_recov ) {
353- if (mce_flags .smca )
354- return mce_severity_amd_smca (m , ctx );
355-
356- /* kill current process */
357- return MCE_AR_SEVERITY ;
358- } else {
359- /* at least one error was not logged */
360- if (m -> status & MCI_STATUS_OVER )
361- return MCE_PANIC_SEVERITY ;
362- }
363-
364- /*
365- * For any other case, return MCE_UC_SEVERITY so that we log the
366- * error and exit #MC handler.
367- */
368- return MCE_UC_SEVERITY ;
321+ if (m -> status & MCI_STATUS_DEFERRED ) {
322+ ret = MCE_DEFERRED_SEVERITY ;
323+ goto out ;
369324 }
370325
371326 /*
372- * deferred error: poll handler catches these and adds to mce_ring so
373- * memory-failure can take recovery actions .
327+ * If the UC bit is not set, the system either corrected or deferred
328+ * the error. No action will be required after logging the error .
374329 */
375- if (m -> status & MCI_STATUS_DEFERRED )
376- return MCE_DEFERRED_SEVERITY ;
330+ if (!(m -> status & MCI_STATUS_UC )) {
331+ ret = MCE_KEEP_SEVERITY ;
332+ goto out ;
333+ }
377334
378335 /*
379- * corrected error: poll handler catches these and passes responsibility
380- * of decoding the error to EDAC
336+ * On MCA overflow, without the MCA overflow recovery feature the
337+ * system will not be able to recover, panic.
381338 */
382- return MCE_KEEP_SEVERITY ;
339+ if ((m -> status & MCI_STATUS_OVER ) && !mce_flags .overflow_recov ) {
340+ ret = MCE_PANIC_SEVERITY ;
341+ goto out ;
342+ }
343+
344+ if (!mce_flags .succor ) {
345+ ret = MCE_PANIC_SEVERITY ;
346+ goto out ;
347+ }
348+
349+ if (error_context (m , regs ) == IN_KERNEL )
350+ ret = MCE_PANIC_SEVERITY ;
351+
352+ out :
353+ return ret ;
383354}
384355
385356static noinstr int mce_severity_intel (struct mce * m , struct pt_regs * regs , char * * msg , bool is_excp )
0 commit comments