Skip to content

Commit 7f34ddf

Browse files
Tao Zhoualexdeucher
authored andcommitted
drm/amdgpu: add ras_eeprom_read_idx interface
PMFW will manage RAS eeprom data by itself, add new interface to read eeprom data via PMFW, we can read part of records by setting index. v2: use IPID parse interface. pa is not used and set it to a fixed value. v3: optimize the null pointer check for IPID parse interface. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Yang Wang <kevinyang.wang@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent cd74132 commit 7f34ddf

2 files changed

Lines changed: 55 additions & 0 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,50 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
970970
return res;
971971
}
972972

973+
int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
974+
struct eeprom_table_record *record, u32 rec_idx,
975+
const u32 num)
976+
{
977+
struct amdgpu_device *adev = to_amdgpu_device(control);
978+
uint64_t ts, end_idx;
979+
int i, ret;
980+
u64 mca, ipid;
981+
982+
if (!amdgpu_ras_smu_eeprom_supported(adev))
983+
return 0;
984+
985+
if (!adev->umc.ras || !adev->umc.ras->mca_ipid_parse)
986+
return -EOPNOTSUPP;
987+
988+
end_idx = rec_idx + num;
989+
for (i = rec_idx; i < end_idx; i++) {
990+
ret = amdgpu_ras_smu_get_badpage_mca_addr(adev, i, &mca);
991+
if (ret)
992+
return ret;
993+
994+
ret = amdgpu_ras_smu_get_badpage_ipid(adev, i, &ipid);
995+
if (ret)
996+
return ret;
997+
998+
ret = amdgpu_ras_smu_get_timestamp(adev, i, &ts);
999+
if (ret)
1000+
return ret;
1001+
1002+
record[i - rec_idx].address = mca;
1003+
/* retired_page (pa) is unused now */
1004+
record[i - rec_idx].retired_page = 0x1ULL;
1005+
record[i - rec_idx].ts = ts;
1006+
record[i - rec_idx].err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
1007+
record[i - rec_idx].cu = 0;
1008+
1009+
adev->umc.ras->mca_ipid_parse(adev, ipid, NULL,
1010+
(uint32_t *)&(record[i - rec_idx].mem_channel),
1011+
(uint32_t *)&(record[i - rec_idx].mcumc_id), NULL);
1012+
}
1013+
1014+
return 0;
1015+
}
1016+
9731017
/**
9741018
* amdgpu_ras_eeprom_read -- read EEPROM
9751019
* @control: pointer to control structure
@@ -991,6 +1035,9 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
9911035
u8 *buf, *pp;
9921036
u32 g0, g1;
9931037

1038+
if (amdgpu_ras_smu_eeprom_supported(adev))
1039+
return amdgpu_ras_eeprom_read_idx(control, record, 0, num);
1040+
9941041
if (!__is_ras_eeprom_supported(adev))
9951042
return 0;
9961043

@@ -1162,6 +1209,10 @@ static ssize_t amdgpu_ras_debugfs_table_read(struct file *f, char __user *buf,
11621209
int res = -EFAULT;
11631210
size_t data_len;
11641211

1212+
/* pmfw manages eeprom data by itself */
1213+
if (amdgpu_ras_smu_eeprom_supported(adev))
1214+
return 0;
1215+
11651216
mutex_lock(&control->ras_tbl_mutex);
11661217

11671218
/* We want *pos - data_len > 0, which means there's

drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ int amdgpu_ras_smu_get_badpage_ipid(struct amdgpu_device *adev,
186186
int amdgpu_ras_smu_erase_ras_table(struct amdgpu_device *adev,
187187
uint32_t *result);
188188

189+
int amdgpu_ras_eeprom_read_idx(struct amdgpu_ras_eeprom_control *control,
190+
struct eeprom_table_record *record, u32 rec_idx,
191+
const u32 num);
192+
189193
extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops;
190194
extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops;
191195

0 commit comments

Comments
 (0)