Skip to content

Commit 38b2096

Browse files
committed
drm/amdgpu: move scheduler wqueue handling into callbacks
Move the scheduler wqueue stopping and starting into the ring reset callbacks. On some IPs we have to reset an engine which may have multiple queues. Move the wqueue handling into the backend so we can handle them as needed based on the type of reset available. Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 43ca5eb commit 38b2096

19 files changed

Lines changed: 55 additions & 21 deletions

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -135,17 +135,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
135135
} else if (amdgpu_gpu_recovery && ring->funcs->reset) {
136136
dev_err(adev->dev, "Starting %s ring reset\n",
137137
s_job->sched->name);
138-
139-
/*
140-
* Stop the scheduler to prevent anybody else from touching the
141-
* ring buffer.
142-
*/
143-
drm_sched_wqueue_stop(&ring->sched);
144-
145138
r = amdgpu_ring_reset(ring, job->vmid, NULL);
146139
if (!r) {
147140
atomic_inc(&ring->adev->gpu_reset_counter);
148-
drm_sched_wqueue_start(&ring->sched);
149141
dev_err(adev->dev, "Ring %s reset succeeded\n",
150142
ring->sched.name);
151143
drm_dev_wedged_event(adev_to_drm(adev),

drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -554,22 +554,16 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
554554
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
555555
struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
556556
struct amdgpu_ring *page_ring = &sdma_instance->page;
557-
bool gfx_sched_stopped = false, page_sched_stopped = false;
558557

559558
mutex_lock(&sdma_instance->engine_reset_mutex);
560559
/* Stop the scheduler's work queue for the GFX and page rings if they are running.
561560
* This ensures that no new tasks are submitted to the queues while
562561
* the reset is in progress.
563562
*/
564-
if (!amdgpu_ring_sched_ready(gfx_ring)) {
565-
drm_sched_wqueue_stop(&gfx_ring->sched);
566-
gfx_sched_stopped = true;
567-
}
563+
drm_sched_wqueue_stop(&gfx_ring->sched);
568564

569-
if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) {
565+
if (adev->sdma.has_page_queue)
570566
drm_sched_wqueue_stop(&page_ring->sched);
571-
page_sched_stopped = true;
572-
}
573567

574568
if (sdma_instance->funcs->stop_kernel_queue) {
575569
sdma_instance->funcs->stop_kernel_queue(gfx_ring);
@@ -596,12 +590,9 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
596590
* to be submitted to the queues after the reset is complete.
597591
*/
598592
if (!ret) {
599-
if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) {
600-
drm_sched_wqueue_start(&gfx_ring->sched);
601-
}
602-
if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) {
593+
drm_sched_wqueue_start(&gfx_ring->sched);
594+
if (adev->sdma.has_page_queue)
603595
drm_sched_wqueue_start(&page_ring->sched);
604-
}
605596
}
606597
mutex_unlock(&sdma_instance->engine_reset_mutex);
607598

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9540,6 +9540,8 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
95409540
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
95419541
return -EINVAL;
95429542

9543+
drm_sched_wqueue_stop(&ring->sched);
9544+
95439545
spin_lock_irqsave(&kiq->ring_lock, flags);
95449546

95459547
if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) {
@@ -9581,6 +9583,7 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring,
95819583
if (r)
95829584
return r;
95839585
amdgpu_fence_driver_force_completion(ring);
9586+
drm_sched_wqueue_start(&ring->sched);
95849587
return 0;
95859588
}
95869589

@@ -9600,6 +9603,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
96009603
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
96019604
return -EINVAL;
96029605

9606+
drm_sched_wqueue_stop(&ring->sched);
9607+
96039608
spin_lock_irqsave(&kiq->ring_lock, flags);
96049609

96059610
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -9658,6 +9663,7 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
96589663
if (r)
96599664
return r;
96609665
amdgpu_fence_driver_force_completion(ring);
9666+
drm_sched_wqueue_start(&ring->sched);
96619667
return 0;
96629668
}
96639669

drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6821,6 +6821,8 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
68216821
if (amdgpu_sriov_vf(adev))
68226822
return -EINVAL;
68236823

6824+
drm_sched_wqueue_stop(&ring->sched);
6825+
68246826
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
68256827
if (r) {
68266828

@@ -6846,6 +6848,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring,
68466848
if (r)
68476849
return r;
68486850
amdgpu_fence_driver_force_completion(ring);
6851+
drm_sched_wqueue_start(&ring->sched);
68496852
return 0;
68506853
}
68516854

@@ -6989,6 +6992,8 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
69896992
if (amdgpu_sriov_vf(adev))
69906993
return -EINVAL;
69916994

6995+
drm_sched_wqueue_stop(&ring->sched);
6996+
69926997
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
69936998
if (r) {
69946999
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
@@ -7012,6 +7017,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring,
70127017
if (r)
70137018
return r;
70147019
amdgpu_fence_driver_force_completion(ring);
7020+
drm_sched_wqueue_start(&ring->sched);
70157021
return 0;
70167022
}
70177023

drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5317,6 +5317,8 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
53175317
if (amdgpu_sriov_vf(adev))
53185318
return -EINVAL;
53195319

5320+
drm_sched_wqueue_stop(&ring->sched);
5321+
53205322
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false);
53215323
if (r) {
53225324
dev_warn(adev->dev, "reset via MES failed and try pipe reset %d\n", r);
@@ -5341,6 +5343,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring,
53415343
if (r)
53425344
return r;
53435345
amdgpu_fence_driver_force_completion(ring);
5346+
drm_sched_wqueue_start(&ring->sched);
53445347
return 0;
53455348
}
53465349

@@ -5437,6 +5440,8 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
54375440
if (amdgpu_sriov_vf(adev))
54385441
return -EINVAL;
54395442

5443+
drm_sched_wqueue_stop(&ring->sched);
5444+
54405445
r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true);
54415446
if (r) {
54425447
dev_warn(adev->dev, "fail(%d) to reset kcq and try pipe reset\n", r);
@@ -5460,6 +5465,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring,
54605465
if (r)
54615466
return r;
54625467
amdgpu_fence_driver_force_completion(ring);
5468+
drm_sched_wqueue_start(&ring->sched);
54635469
return 0;
54645470
}
54655471

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7187,6 +7187,8 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
71877187
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
71887188
return -EINVAL;
71897189

7190+
drm_sched_wqueue_stop(&ring->sched);
7191+
71907192
spin_lock_irqsave(&kiq->ring_lock, flags);
71917193

71927194
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -7247,6 +7249,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
72477249
if (r)
72487250
return r;
72497251
amdgpu_fence_driver_force_completion(ring);
7252+
drm_sched_wqueue_start(&ring->sched);
72507253
return 0;
72517254
}
72527255

drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3567,6 +3567,8 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
35673567
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
35683568
return -EINVAL;
35693569

3570+
drm_sched_wqueue_stop(&ring->sched);
3571+
35703572
spin_lock_irqsave(&kiq->ring_lock, flags);
35713573

35723574
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
@@ -3625,6 +3627,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring,
36253627
if (r)
36263628
return r;
36273629
amdgpu_fence_driver_force_completion(ring);
3630+
drm_sched_wqueue_start(&ring->sched);
36283631
return 0;
36293632
}
36303633

drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,12 +770,14 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring,
770770
{
771771
int r;
772772

773+
drm_sched_wqueue_stop(&ring->sched);
773774
jpeg_v2_0_stop(ring->adev);
774775
jpeg_v2_0_start(ring->adev);
775776
r = amdgpu_ring_test_helper(ring);
776777
if (r)
777778
return r;
778779
amdgpu_fence_driver_force_completion(ring);
780+
drm_sched_wqueue_start(&ring->sched);
779781
return 0;
780782
}
781783

drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,12 +649,14 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring,
649649
{
650650
int r;
651651

652+
drm_sched_wqueue_stop(&ring->sched);
652653
jpeg_v2_5_stop_inst(ring->adev, ring->me);
653654
jpeg_v2_5_start_inst(ring->adev, ring->me);
654655
r = amdgpu_ring_test_helper(ring);
655656
if (r)
656657
return r;
657658
amdgpu_fence_driver_force_completion(ring);
659+
drm_sched_wqueue_start(&ring->sched);
658660
return 0;
659661
}
660662

drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,12 +561,14 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring,
561561
{
562562
int r;
563563

564+
drm_sched_wqueue_stop(&ring->sched);
564565
jpeg_v3_0_stop(ring->adev);
565566
jpeg_v3_0_start(ring->adev);
566567
r = amdgpu_ring_test_helper(ring);
567568
if (r)
568569
return r;
569570
amdgpu_fence_driver_force_completion(ring);
571+
drm_sched_wqueue_start(&ring->sched);
570572
return 0;
571573
}
572574

0 commit comments

Comments
 (0)