Skip to content

Commit cd5b28a

Browse files
Gangliang Xiealexdeucher
authored andcommitted
drm/amdgpu: add check function for pmfw eeprom
add check function for pmfw eeprom Signed-off-by: Gangliang Xie <ganglxie@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 19c815d commit cd5b28a

1 file changed

Lines changed: 44 additions & 0 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,13 +1499,57 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
14991499
return 0;
15001500
}
15011501

1502+
static int amdgpu_ras_smu_eeprom_check(struct amdgpu_ras_eeprom_control *control)
1503+
{
1504+
struct amdgpu_device *adev = to_amdgpu_device(control);
1505+
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
1506+
1507+
if (!__is_ras_eeprom_supported(adev))
1508+
return 0;
1509+
1510+
control->ras_num_bad_pages = ras->bad_page_num;
1511+
1512+
if ((ras->bad_page_cnt_threshold < control->ras_num_bad_pages) &&
1513+
amdgpu_bad_page_threshold != 0) {
1514+
dev_warn(adev->dev,
1515+
"RAS records:%d exceed threshold:%d\n",
1516+
control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
1517+
if ((amdgpu_bad_page_threshold == -1) ||
1518+
(amdgpu_bad_page_threshold == -2)) {
1519+
dev_warn(adev->dev,
1520+
"Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n");
1521+
} else {
1522+
ras->is_rma = true;
1523+
dev_warn(adev->dev,
1524+
"User defined threshold is set, runtime service will be halt when threshold is reached\n");
1525+
}
1526+
1527+
return 0;
1528+
}
1529+
1530+
dev_dbg(adev->dev,
1531+
"Found existing EEPROM table with %d records",
1532+
control->ras_num_bad_pages);
1533+
1534+
/* Warn if we are at 90% of the threshold or above
1535+
*/
1536+
if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold)
1537+
dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d",
1538+
control->ras_num_bad_pages,
1539+
ras->bad_page_cnt_threshold);
1540+
return 0;
1541+
}
1542+
15021543
int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
15031544
{
15041545
struct amdgpu_device *adev = to_amdgpu_device(control);
15051546
struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
15061547
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
15071548
int res = 0;
15081549

1550+
if (amdgpu_ras_smu_eeprom_supported(adev))
1551+
return amdgpu_ras_smu_eeprom_check(control);
1552+
15091553
if (!__is_ras_eeprom_supported(adev))
15101554
return 0;
15111555

0 commit comments

Comments
 (0)