@@ -1165,13 +1165,53 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
11651165 }
11661166}
11671167
1168- /* query/inject/cure begin */
1169- int amdgpu_ras_query_error_status (struct amdgpu_device * adev ,
1170- struct ras_query_if * info )
1168+ static int amdgpu_ras_query_error_status_helper (struct amdgpu_device * adev ,
1169+ struct ras_query_if * info ,
1170+ struct ras_err_data * err_data ,
1171+ unsigned int error_query_mode )
11711172{
1173+ enum amdgpu_ras_block blk = info ? info -> head .block : AMDGPU_RAS_BLOCK_COUNT ;
11721174 struct amdgpu_ras_block_object * block_obj = NULL ;
1175+
1176+ if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY )
1177+ return - EINVAL ;
1178+
1179+ if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY ) {
1180+ if (info -> head .block == AMDGPU_RAS_BLOCK__UMC ) {
1181+ amdgpu_ras_get_ecc_info (adev , err_data );
1182+ } else {
1183+ block_obj = amdgpu_ras_get_ras_block (adev , info -> head .block , 0 );
1184+ if (!block_obj || !block_obj -> hw_ops ) {
1185+ dev_dbg_once (adev -> dev , "%s doesn't config RAS function\n" ,
1186+ get_ras_block_str (& info -> head ));
1187+ return - EINVAL ;
1188+ }
1189+
1190+ if (block_obj -> hw_ops -> query_ras_error_count )
1191+ block_obj -> hw_ops -> query_ras_error_count (adev , & err_data );
1192+
1193+ if ((info -> head .block == AMDGPU_RAS_BLOCK__SDMA ) ||
1194+ (info -> head .block == AMDGPU_RAS_BLOCK__GFX ) ||
1195+ (info -> head .block == AMDGPU_RAS_BLOCK__MMHUB )) {
1196+ if (block_obj -> hw_ops -> query_ras_error_status )
1197+ block_obj -> hw_ops -> query_ras_error_status (adev );
1198+ }
1199+ }
1200+ } else {
1201+ /* FIXME: add code to check return value later */
1202+ amdgpu_mca_smu_log_ras_error (adev , blk , AMDGPU_MCA_ERROR_TYPE_UE , err_data );
1203+ amdgpu_mca_smu_log_ras_error (adev , blk , AMDGPU_MCA_ERROR_TYPE_CE , err_data );
1204+ }
1205+
1206+ return 0 ;
1207+ }
1208+
1209+ /* query/inject/cure begin */
1210+ int amdgpu_ras_query_error_status (struct amdgpu_device * adev , struct ras_query_if * info )
1211+ {
11731212 struct ras_manager * obj = amdgpu_ras_find_obj (adev , & info -> head );
11741213 struct ras_err_data err_data ;
1214+ unsigned int error_query_mode ;
11751215 int ret ;
11761216
11771217 if (!obj )
@@ -1181,27 +1221,14 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
11811221 if (ret )
11821222 return ret ;
11831223
1184- if (info -> head .block == AMDGPU_RAS_BLOCK__UMC ) {
1185- amdgpu_ras_get_ecc_info (adev , & err_data );
1186- } else {
1187- block_obj = amdgpu_ras_get_ras_block (adev , info -> head .block , 0 );
1188- if (!block_obj || !block_obj -> hw_ops ) {
1189- dev_dbg_once (adev -> dev , "%s doesn't config RAS function\n" ,
1190- get_ras_block_str (& info -> head ));
1191- ret = - EINVAL ;
1192- goto out_fini_err_data ;
1193- }
1194-
1195- if (block_obj -> hw_ops -> query_ras_error_count )
1196- block_obj -> hw_ops -> query_ras_error_count (adev , & err_data );
1224+ if (!amdgpu_ras_get_error_query_mode (adev , & error_query_mode ))
1225+ return - EINVAL ;
11971226
1198- if ((info -> head .block == AMDGPU_RAS_BLOCK__SDMA ) ||
1199- (info -> head .block == AMDGPU_RAS_BLOCK__GFX ) ||
1200- (info -> head .block == AMDGPU_RAS_BLOCK__MMHUB )) {
1201- if (block_obj -> hw_ops -> query_ras_error_status )
1202- block_obj -> hw_ops -> query_ras_error_status (adev );
1203- }
1204- }
1227+ ret = amdgpu_ras_query_error_status_helper (adev , info ,
1228+ & err_data ,
1229+ error_query_mode );
1230+ if (ret )
1231+ goto out_fini_err_data ;
12051232
12061233 amdgpu_rasmgr_error_data_statistic_update (obj , & err_data );
12071234
@@ -3397,6 +3424,26 @@ bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev)
33973424 return true;
33983425}
33993426
3427+ bool amdgpu_ras_get_error_query_mode (struct amdgpu_device * adev ,
3428+ unsigned int * error_query_mode )
3429+ {
3430+ struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
3431+ const struct amdgpu_mca_smu_funcs * mca_funcs = adev -> mca .mca_funcs ;
3432+
3433+ if (!con ) {
3434+ * error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY ;
3435+ return false;
3436+ }
3437+
3438+ if (mca_funcs && mca_funcs -> mca_set_debug_mode )
3439+ * error_query_mode =
3440+ (con -> is_mca_debug_mode ) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY ;
3441+ else
3442+ * error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY ;
3443+
3444+ return true;
3445+ }
3446+
34003447/* Register each ip ras block into amdgpu ras */
34013448int amdgpu_ras_register_ras_block (struct amdgpu_device * adev ,
34023449 struct amdgpu_ras_block_object * ras_block_obj )
0 commit comments