@@ -1499,13 +1499,57 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
14991499 return 0 ;
15001500}
15011501
1502+ static int amdgpu_ras_smu_eeprom_check (struct amdgpu_ras_eeprom_control * control )
1503+ {
1504+ struct amdgpu_device * adev = to_amdgpu_device (control );
1505+ struct amdgpu_ras * ras = amdgpu_ras_get_context (adev );
1506+
1507+ if (!__is_ras_eeprom_supported (adev ))
1508+ return 0 ;
1509+
1510+ control -> ras_num_bad_pages = ras -> bad_page_num ;
1511+
1512+ if ((ras -> bad_page_cnt_threshold < control -> ras_num_bad_pages ) &&
1513+ amdgpu_bad_page_threshold != 0 ) {
1514+ dev_warn (adev -> dev ,
1515+ "RAS records:%d exceed threshold:%d\n" ,
1516+ control -> ras_num_bad_pages , ras -> bad_page_cnt_threshold );
1517+ if ((amdgpu_bad_page_threshold == -1 ) ||
1518+ (amdgpu_bad_page_threshold == -2 )) {
1519+ dev_warn (adev -> dev ,
1520+ "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n" );
1521+ } else {
1522+ ras -> is_rma = true;
1523+ dev_warn (adev -> dev ,
1524+ "User defined threshold is set, runtime service will be halt when threshold is reached\n" );
1525+ }
1526+
1527+ return 0 ;
1528+ }
1529+
1530+ dev_dbg (adev -> dev ,
1531+ "Found existing EEPROM table with %d records" ,
1532+ control -> ras_num_bad_pages );
1533+
1534+ /* Warn if we are at 90% of the threshold or above
1535+ */
1536+ if (10 * control -> ras_num_bad_pages >= 9 * ras -> bad_page_cnt_threshold )
1537+ dev_warn (adev -> dev , "RAS records:%u exceeds 90%% of threshold:%d" ,
1538+ control -> ras_num_bad_pages ,
1539+ ras -> bad_page_cnt_threshold );
1540+ return 0 ;
1541+ }
1542+
15021543int amdgpu_ras_eeprom_check (struct amdgpu_ras_eeprom_control * control )
15031544{
15041545 struct amdgpu_device * adev = to_amdgpu_device (control );
15051546 struct amdgpu_ras_eeprom_table_header * hdr = & control -> tbl_hdr ;
15061547 struct amdgpu_ras * ras = amdgpu_ras_get_context (adev );
15071548 int res = 0 ;
15081549
1550+ if (amdgpu_ras_smu_eeprom_supported (adev ))
1551+ return amdgpu_ras_smu_eeprom_check (control );
1552+
15091553 if (!__is_ras_eeprom_supported (adev ))
15101554 return 0 ;
15111555
0 commit comments