File tree Expand file tree Collapse file tree
drivers/gpu/drm/amd/amdgpu Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -2065,6 +2065,14 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
20652065 ras -> gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET ;
20662066 reset_context .method = AMD_RESET_METHOD_MODE2 ;
20672067 }
2068+
2069+ /* Fatal error occurs in poison mode, mode1 reset is used to
2070+ * recover gpu.
2071+ */
2072+ if (ras -> gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET ) {
2073+ ras -> gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET ;
2074+ set_bit (AMDGPU_NEED_FULL_RESET , & reset_context .flags );
2075+ }
20682076 }
20692077
20702078 amdgpu_device_gpu_recover (ras -> adev , NULL , & reset_context );
@@ -2955,9 +2963,12 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
29552963 return ;
29562964
29572965 if (atomic_cmpxchg (& amdgpu_ras_in_intr , 0 , 1 ) == 0 ) {
2966+ struct amdgpu_ras * ras = amdgpu_ras_get_context (adev );
2967+
29582968 dev_info (adev -> dev , "uncorrectable hardware error"
29592969 "(ERREVENT_ATHUB_INTERRUPT) detected!\n" );
29602970
2971+ ras -> gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET ;
29612972 amdgpu_ras_reset_gpu (adev );
29622973 }
29632974}
Original file line number Diff line number Diff line change @@ -340,6 +340,7 @@ enum amdgpu_ras_ret {
340340#define AMDGPU_RAS_ERR_ADDRESS_VALID (1 << 2)
341341
342342#define AMDGPU_RAS_GPU_RESET_MODE2_RESET (0x1 << 0)
343+ #define AMDGPU_RAS_GPU_RESET_MODE1_RESET (0x1 << 1)
343344
344345struct amdgpu_ras_err_status_reg_entry {
345346 uint32_t hwip ;
You can’t perform that action at this time.
0 commit comments