7272/* Bad GPU tag ‘BADG’ */
7373#define RAS_TABLE_HDR_BAD 0x42414447
7474
75+ /**
76+ * EEPROM Table structure v1
77+ * ---------------------------------
78+ * | |
79+ * | EEPROM TABLE HEADER |
80+ * | ( size 20 Bytes ) |
81+ * | |
82+ * ---------------------------------
83+ * | |
84+ * | BAD PAGE RECORD AREA |
85+ * | |
86+ * ---------------------------------
87+ */
88+
7589/* Assume 2-Mbit size EEPROM and take up the whole space. */
7690#define RAS_TBL_SIZE_BYTES (256 * 1024)
7791#define RAS_TABLE_START 0
8094#define RAS_MAX_RECORD_COUNT ((RAS_TBL_SIZE_BYTES - RAS_TABLE_HEADER_SIZE) \
8195 / RAS_TABLE_RECORD_SIZE)
8296
97+ /**
98+ * EEPROM Table structrue v2.1
99+ * ---------------------------------
100+ * | |
101+ * | EEPROM TABLE HEADER |
102+ * | ( size 20 Bytes ) |
103+ * | |
104+ * ---------------------------------
105+ * | |
106+ * | EEPROM TABLE RAS INFO |
107+ * | (available info size 4 Bytes) |
108+ * | ( reserved size 252 Bytes ) |
109+ * | |
110+ * ---------------------------------
111+ * | |
112+ * | BAD PAGE RECORD AREA |
113+ * | |
114+ * ---------------------------------
115+ */
116+
83117/* EEPROM Table V2_1 */
84118#define RAS_TABLE_V2_1_INFO_SIZE 256
85119#define RAS_TABLE_V2_1_INFO_START RAS_TABLE_HEADER_SIZE
@@ -242,6 +276,69 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control)
242276 return res ;
243277}
244278
279+ static void
280+ __encode_table_ras_info_to_buf (struct amdgpu_ras_eeprom_table_ras_info * rai ,
281+ unsigned char * buf )
282+ {
283+ u32 * pp = (uint32_t * )buf ;
284+ u32 tmp ;
285+
286+ tmp = ((uint32_t )(rai -> rma_status ) & 0xFF ) |
287+ (((uint32_t )(rai -> health_percent ) << 8 ) & 0xFF00 ) |
288+ (((uint32_t )(rai -> ecc_page_threshold ) << 16 ) & 0xFFFF0000 );
289+ pp [0 ] = cpu_to_le32 (tmp );
290+ }
291+
292+ static void
293+ __decode_table_ras_info_from_buf (struct amdgpu_ras_eeprom_table_ras_info * rai ,
294+ unsigned char * buf )
295+ {
296+ u32 * pp = (uint32_t * )buf ;
297+ u32 tmp ;
298+
299+ tmp = le32_to_cpu (pp [0 ]);
300+ rai -> rma_status = tmp & 0xFF ;
301+ rai -> health_percent = (tmp >> 8 ) & 0xFF ;
302+ rai -> ecc_page_threshold = (tmp >> 16 ) & 0xFFFF ;
303+ }
304+
305+ static int __write_table_ras_info (struct amdgpu_ras_eeprom_control * control )
306+ {
307+ struct amdgpu_device * adev = to_amdgpu_device (control );
308+ u8 * buf ;
309+ int res ;
310+
311+ buf = kzalloc (RAS_TABLE_V2_1_INFO_SIZE , GFP_KERNEL );
312+ if (!buf ) {
313+ DRM_ERROR ("Failed to alloc buf to write table ras info\n" );
314+ return - ENOMEM ;
315+ }
316+
317+ __encode_table_ras_info_to_buf (& control -> tbl_rai , buf );
318+
319+ /* i2c may be unstable in gpu reset */
320+ down_read (& adev -> reset_domain -> sem );
321+ res = amdgpu_eeprom_write (adev -> pm .ras_eeprom_i2c_bus ,
322+ control -> i2c_address +
323+ control -> ras_info_offset ,
324+ buf , RAS_TABLE_V2_1_INFO_SIZE );
325+ up_read (& adev -> reset_domain -> sem );
326+
327+ if (res < 0 ) {
328+ DRM_ERROR ("Failed to write EEPROM table ras info:%d" , res );
329+ } else if (res < RAS_TABLE_V2_1_INFO_SIZE ) {
330+ DRM_ERROR ("Short write:%d out of %d\n" ,
331+ res , RAS_TABLE_V2_1_INFO_SIZE );
332+ res = - EIO ;
333+ } else {
334+ res = 0 ;
335+ }
336+
337+ kfree (buf );
338+
339+ return res ;
340+ }
341+
245342static u8 __calc_hdr_byte_sum (const struct amdgpu_ras_eeprom_control * control )
246343{
247344 int ii ;
@@ -301,14 +398,27 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
301398 mutex_lock (& control -> ras_tbl_mutex );
302399
303400 hdr -> header = RAS_TABLE_HDR_VAL ;
304- hdr -> version = RAS_TABLE_VER_V1 ;
305- hdr -> first_rec_offset = RAS_RECORD_START ;
306- hdr -> tbl_size = RAS_TABLE_HEADER_SIZE ;
401+ if (adev -> umc .ras &&
402+ adev -> umc .ras -> set_eeprom_table_version )
403+ adev -> umc .ras -> set_eeprom_table_version (hdr );
404+ else
405+ hdr -> version = RAS_TABLE_VER_V1 ;
406+
407+ if (hdr -> version == RAS_TABLE_VER_V2_1 ) {
408+ hdr -> first_rec_offset = RAS_RECORD_START_V2_1 ;
409+ hdr -> tbl_size = RAS_TABLE_HEADER_SIZE +
410+ RAS_TABLE_V2_1_INFO_SIZE ;
411+ } else {
412+ hdr -> first_rec_offset = RAS_RECORD_START ;
413+ hdr -> tbl_size = RAS_TABLE_HEADER_SIZE ;
414+ }
307415
308416 csum = __calc_hdr_byte_sum (control );
309417 csum = - csum ;
310418 hdr -> checksum = csum ;
311419 res = __write_table_header (control );
420+ if (!res && hdr -> version > RAS_TABLE_VER_V1 )
421+ res = __write_table_ras_info (control );
312422
313423 control -> ras_num_recs = 0 ;
314424 control -> ras_fri = 0 ;
@@ -587,9 +697,13 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
587697 control -> tbl_hdr .header = RAS_TABLE_HDR_BAD ;
588698 }
589699
590- control -> tbl_hdr .version = RAS_TABLE_VER_V1 ;
591- control -> tbl_hdr .first_rec_offset = RAS_INDEX_TO_OFFSET (control , control -> ras_fri );
592- control -> tbl_hdr .tbl_size = RAS_TABLE_HEADER_SIZE + control -> ras_num_recs * RAS_TABLE_RECORD_SIZE ;
700+ if (control -> tbl_hdr .version == RAS_TABLE_VER_V2_1 )
701+ control -> tbl_hdr .tbl_size = RAS_TABLE_HEADER_SIZE +
702+ RAS_TABLE_V2_1_INFO_SIZE +
703+ control -> ras_num_recs * RAS_TABLE_RECORD_SIZE ;
704+ else
705+ control -> tbl_hdr .tbl_size = RAS_TABLE_HEADER_SIZE +
706+ control -> ras_num_recs * RAS_TABLE_RECORD_SIZE ;
593707 control -> tbl_hdr .checksum = 0 ;
594708
595709 buf_size = control -> ras_num_recs * RAS_TABLE_RECORD_SIZE ;
@@ -629,6 +743,8 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
629743 csum = - csum ;
630744 control -> tbl_hdr .checksum = csum ;
631745 res = __write_table_header (control );
746+ if (!res && control -> tbl_hdr .version > RAS_TABLE_VER_V1 )
747+ res = __write_table_ras_info (control );
632748Out :
633749 kfree (buf );
634750 return res ;
@@ -819,9 +935,12 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control,
819935 return res ;
820936}
821937
822- uint32_t amdgpu_ras_eeprom_max_record_count (void )
938+ uint32_t amdgpu_ras_eeprom_max_record_count (struct amdgpu_ras_eeprom_control * control )
823939{
824- return RAS_MAX_RECORD_COUNT ;
940+ if (control -> tbl_hdr .version == RAS_TABLE_VER_V2_1 )
941+ return RAS_MAX_RECORD_COUNT_V2_1 ;
942+ else
943+ return RAS_MAX_RECORD_COUNT ;
825944}
826945
827946static ssize_t
@@ -1063,8 +1182,14 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
10631182 int buf_size , res ;
10641183 u8 csum , * buf , * pp ;
10651184
1066- buf_size = RAS_TABLE_HEADER_SIZE +
1067- control -> ras_num_recs * RAS_TABLE_RECORD_SIZE ;
1185+ if (control -> tbl_hdr .version == RAS_TABLE_VER_V2_1 )
1186+ buf_size = RAS_TABLE_HEADER_SIZE +
1187+ RAS_TABLE_V2_1_INFO_SIZE +
1188+ control -> ras_num_recs * RAS_TABLE_RECORD_SIZE ;
1189+ else
1190+ buf_size = RAS_TABLE_HEADER_SIZE +
1191+ control -> ras_num_recs * RAS_TABLE_RECORD_SIZE ;
1192+
10681193 buf = kzalloc (buf_size , GFP_KERNEL );
10691194 if (!buf ) {
10701195 DRM_ERROR ("Out of memory checking RAS table checksum.\n" );
@@ -1092,6 +1217,39 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control
10921217 return res < 0 ? res : csum ;
10931218}
10941219
1220+ static int __read_table_ras_info (struct amdgpu_ras_eeprom_control * control )
1221+ {
1222+ struct amdgpu_ras_eeprom_table_ras_info * rai = & control -> tbl_rai ;
1223+ struct amdgpu_device * adev = to_amdgpu_device (control );
1224+ unsigned char * buf ;
1225+ int res ;
1226+
1227+ buf = kzalloc (RAS_TABLE_V2_1_INFO_SIZE , GFP_KERNEL );
1228+ if (!buf ) {
1229+ DRM_ERROR ("Failed to alloc buf to read EEPROM table ras info\n" );
1230+ return - ENOMEM ;
1231+ }
1232+
1233+ /**
1234+ * EEPROM table V2_1 supports ras info,
1235+ * read EEPROM table ras info
1236+ */
1237+ res = amdgpu_eeprom_read (adev -> pm .ras_eeprom_i2c_bus ,
1238+ control -> i2c_address + control -> ras_info_offset ,
1239+ buf , RAS_TABLE_V2_1_INFO_SIZE );
1240+ if (res < RAS_TABLE_V2_1_INFO_SIZE ) {
1241+ DRM_ERROR ("Failed to read EEPROM table ras info, res:%d" , res );
1242+ res = res >= 0 ? - EIO : res ;
1243+ goto Out ;
1244+ }
1245+
1246+ __decode_table_ras_info_from_buf (rai , buf );
1247+
1248+ Out :
1249+ kfree (buf );
1250+ return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res ;
1251+ }
1252+
10951253int amdgpu_ras_eeprom_init (struct amdgpu_ras_eeprom_control * control ,
10961254 bool * exceed_err_limit )
10971255{
@@ -1114,8 +1272,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
11141272 return - EINVAL ;
11151273
11161274 control -> ras_header_offset = RAS_HDR_START ;
1117- control -> ras_record_offset = RAS_RECORD_START ;
1118- control -> ras_max_record_count = RAS_MAX_RECORD_COUNT ;
1275+ control -> ras_info_offset = RAS_TABLE_V2_1_INFO_START ;
11191276 mutex_init (& control -> ras_tbl_mutex );
11201277
11211278 /* Read the table header from EEPROM address */
@@ -1129,12 +1286,27 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
11291286
11301287 __decode_table_header_from_buf (hdr , buf );
11311288
1132- control -> ras_num_recs = RAS_NUM_RECS (hdr );
1289+ if (hdr -> version == RAS_TABLE_VER_V2_1 ) {
1290+ control -> ras_num_recs = RAS_NUM_RECS_V2_1 (hdr );
1291+ control -> ras_record_offset = RAS_RECORD_START_V2_1 ;
1292+ control -> ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1 ;
1293+ } else {
1294+ control -> ras_num_recs = RAS_NUM_RECS (hdr );
1295+ control -> ras_record_offset = RAS_RECORD_START ;
1296+ control -> ras_max_record_count = RAS_MAX_RECORD_COUNT ;
1297+ }
11331298 control -> ras_fri = RAS_OFFSET_TO_INDEX (control , hdr -> first_rec_offset );
11341299
11351300 if (hdr -> header == RAS_TABLE_HDR_VAL ) {
11361301 DRM_DEBUG_DRIVER ("Found existing EEPROM table with %d records" ,
11371302 control -> ras_num_recs );
1303+
1304+ if (hdr -> version == RAS_TABLE_VER_V2_1 ) {
1305+ res = __read_table_ras_info (control );
1306+ if (res )
1307+ return res ;
1308+ }
1309+
11381310 res = __verify_ras_table_checksum (control );
11391311 if (res )
11401312 DRM_ERROR ("RAS table incorrect checksum or error:%d\n" ,
@@ -1148,6 +1320,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
11481320 ras -> bad_page_cnt_threshold );
11491321 } else if (hdr -> header == RAS_TABLE_HDR_BAD &&
11501322 amdgpu_bad_page_threshold != 0 ) {
1323+ if (hdr -> version == RAS_TABLE_VER_V2_1 ) {
1324+ res = __read_table_ras_info (control );
1325+ if (res )
1326+ return res ;
1327+ }
1328+
11511329 res = __verify_ras_table_checksum (control );
11521330 if (res )
11531331 DRM_ERROR ("RAS Table incorrect checksum or error:%d\n" ,
0 commit comments