@@ -103,6 +103,16 @@ static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = {
103103 smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
104104};
105105
106+ static const int xgmi3x16_pcs_err_status_reg_v6_4 [] = {
107+ smnPCS_XGMI3X16_PCS_ERROR_STATUS ,
108+ smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000
109+ };
110+
111+ static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4 [] = {
112+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK ,
113+ smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000
114+ };
115+
106116static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields [] = {
107117 {"XGMI PCS DataLossErr" ,
108118 SOC15_REG_FIELD (XGMI0_PCS_GOPX16_PCS_ERROR_STATUS , DataLossErr )},
@@ -952,6 +962,16 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
952962 default :
953963 break ;
954964 }
965+
966+ switch (amdgpu_ip_version (adev , XGMI_HWIP , 0 )) {
967+ case IP_VERSION (6 , 4 , 0 ):
968+ for (i = 0 ; i < ARRAY_SIZE (xgmi3x16_pcs_err_status_reg_v6_4 ); i ++ )
969+ pcs_clear_status (adev ,
970+ xgmi3x16_pcs_err_status_reg_v6_4 [i ]);
971+ break ;
972+ default :
973+ break ;
974+ }
955975}
956976
957977static int amdgpu_xgmi_query_pcs_error_status (struct amdgpu_device * adev ,
@@ -969,7 +989,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
969989
970990 if (is_xgmi_pcs ) {
971991 if (amdgpu_ip_version (adev , XGMI_HWIP , 0 ) ==
972- IP_VERSION (6 , 1 , 0 )) {
992+ IP_VERSION (6 , 1 , 0 ) ||
993+ amdgpu_ip_version (adev , XGMI_HWIP , 0 ) ==
994+ IP_VERSION (6 , 4 , 0 )) {
973995 pcs_ras_fields = & xgmi3x16_pcs_ras_fields [0 ];
974996 field_array_size = ARRAY_SIZE (xgmi3x16_pcs_ras_fields );
975997 } else {
@@ -1007,7 +1029,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
10071029 void * ras_error_status )
10081030{
10091031 struct ras_err_data * err_data = (struct ras_err_data * )ras_error_status ;
1010- int i ;
1032+ int i , supported = 1 ;
10111033 uint32_t data , mask_data = 0 ;
10121034 uint32_t ue_cnt = 0 , ce_cnt = 0 ;
10131035
@@ -1071,7 +1093,25 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
10711093 }
10721094 break ;
10731095 default :
1074- dev_warn (adev -> dev , "XGMI RAS error query not supported" );
1096+ supported = 0 ;
1097+ break ;
1098+ }
1099+
1100+ switch (amdgpu_ip_version (adev , XGMI_HWIP , 0 )) {
1101+ case IP_VERSION (6 , 4 , 0 ):
1102+ /* check xgmi3x16 pcs error */
1103+ for (i = 0 ; i < ARRAY_SIZE (xgmi3x16_pcs_err_status_reg_v6_4 ); i ++ ) {
1104+ data = RREG32_PCIE (xgmi3x16_pcs_err_status_reg_v6_4 [i ]);
1105+ mask_data =
1106+ RREG32_PCIE (xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4 [i ]);
1107+ if (data )
1108+ amdgpu_xgmi_query_pcs_error_status (adev , data ,
1109+ mask_data , & ue_cnt , & ce_cnt , true, true);
1110+ }
1111+ break ;
1112+ default :
1113+ if (!supported )
1114+ dev_warn (adev -> dev , "XGMI RAS error query not supported" );
10751115 break ;
10761116 }
10771117
0 commit comments