Skip to content

Commit 8f74c70

Browse files
ChristianKoenigAMDalexdeucher
authored andcommitted
drm/amdgpu: block CE CS if not explicitely allowed by module option
The Constant Engine found on gfx6-gfx10 HW has been a notorious source of problems. RADV never used it in the first place, radeonsi only used it for a few releases around 2017 for gfx6-gfx9 before dropping support for it as well. While investigating another problem I just recently found that submitting to the CE seems to be completely broken on gfx9 for quite a while. Since nobody complained about that problem it most likely means that nobody is using any of the affected radeonsi versions on current Linux kernels any more. So to potentially phase out the support for the CE and eliminate another source of problems block submitting CE IBs unless it is enabled again using a debug flag. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Timur Kristóf <timur.kristof@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 5d55ed1 commit 8f74c70

3 files changed

Lines changed: 14 additions & 1 deletion

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,7 @@ struct amdgpu_device {
12901290
bool debug_disable_gpu_ring_reset;
12911291
bool debug_vm_userptr;
12921292
bool debug_disable_ce_logs;
1293+
bool debug_enable_ce_cs;
12931294

12941295
/* Protection for the following isolation structure */
12951296
struct mutex enforce_isolation_mutex;

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
364364
if (p->uf_bo && ring->funcs->no_user_fence)
365365
return -EINVAL;
366366

367+
if (!p->adev->debug_enable_ce_cs &&
368+
chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
369+
dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
370+
return -EINVAL;
371+
}
372+
367373
if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
368374
chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
369375
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK {
144144
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
145145
AMDGPU_DEBUG_SMU_POOL = BIT(7),
146146
AMDGPU_DEBUG_VM_USERPTR = BIT(8),
147-
AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
147+
AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
148+
AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
148149
};
149150

150151
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
22892290
pr_info("debug: disable kernel logs of correctable errors\n");
22902291
adev->debug_disable_ce_logs = true;
22912292
}
2293+
2294+
if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
2295+
pr_info("debug: allowing command submission to CE engine\n");
2296+
adev->debug_enable_ce_cs = true;
2297+
}
22922298
}
22932299

22942300
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)

0 commit comments

Comments
 (0)