Skip to content

Commit 0692602

Browse files
committed
Merge tag 'amd-drm-next-6.19-2025-12-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.19-2025-12-02: amdgpu: - Unified MES fix - SMU 11 unbalanced irq fix - Fix for driver reloading on APUs - pp_table sysfs fix - Fix memory leak in fence handling - HDMI fix - DC cursor fixes - eDP panel parsing fix - Brightness fix - DC analog fixes - EDID retry fixes - UserQ fixes - RAS fixes - IP discovery fix - Add missing locking in amdgpu_ttm_access_memory_sdma() - Smart Power OLED fix - PRT and page fault fixes for GC 6-8 - VMID reservation fix - ACP platform device fix - Add missing vm fault handling for GC 11-12 - VPE fix amdkfd: - Partitioning fix Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20251202220101.2039347-1-alexander.deucher@amd.com
2 parents b3239df + 3925683 commit 0692602

65 files changed

Lines changed: 1972 additions & 184 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -302,17 +302,19 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
302302
adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
303303

304304
adev->acp.acp_cell[0].name = "acp_audio_dma";
305+
adev->acp.acp_cell[0].id = 0;
305306
adev->acp.acp_cell[0].num_resources = 3;
306307
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
307308
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
308309
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
309310

310311
adev->acp.acp_cell[1].name = "designware-i2s";
312+
adev->acp.acp_cell[1].id = 1;
311313
adev->acp.acp_cell[1].num_resources = 1;
312314
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
313315
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
314316
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
315-
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2);
317+
r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, 2, NULL, 0, NULL);
316318
if (r)
317319
goto failure;
318320
r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
@@ -410,30 +412,34 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block)
410412
adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
411413

412414
adev->acp.acp_cell[0].name = "acp_audio_dma";
415+
adev->acp.acp_cell[0].id = 0;
413416
adev->acp.acp_cell[0].num_resources = 5;
414417
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
415418
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
416419
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
417420

418421
adev->acp.acp_cell[1].name = "designware-i2s";
422+
adev->acp.acp_cell[1].id = 1;
419423
adev->acp.acp_cell[1].num_resources = 1;
420424
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
421425
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
422426
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
423427

424428
adev->acp.acp_cell[2].name = "designware-i2s";
429+
adev->acp.acp_cell[2].id = 2;
425430
adev->acp.acp_cell[2].num_resources = 1;
426431
adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
427432
adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
428433
adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
429434

430435
adev->acp.acp_cell[3].name = "designware-i2s";
436+
adev->acp.acp_cell[3].id = 3;
431437
adev->acp.acp_cell[3].num_resources = 1;
432438
adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
433439
adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
434440
adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
435441

436-
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS);
442+
r = mfd_add_devices(adev->acp.parent, 0, adev->acp.acp_cell, ACP_DEVS, NULL, 0, NULL);
437443
if (r)
438444
goto failure;
439445

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2665,6 +2665,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
26652665
chip_name = "navi12";
26662666
break;
26672667
case CHIP_CYAN_SKILLFISH:
2668+
if (adev->discovery.bin)
2669+
return 0;
26682670
chip_name = "cyan_skillfish";
26692671
break;
26702672
}
@@ -3680,6 +3682,20 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
36803682
"failed to release exclusive mode on fini\n");
36813683
}
36823684

3685+
/*
3686+
* Driver reload on the APU can fail due to firmware validation because
3687+
* the PSP is always running, as it is shared across the whole SoC.
3688+
* This same issue does not occur on dGPU because it has a mechanism
3689+
* that checks whether the PSP is running. A solution for those issues
3690+
* in the APU is to trigger a GPU reset, but this should be done during
3691+
* the unload phase to avoid adding boot latency and screen flicker.
3692+
*/
3693+
if ((adev->flags & AMD_IS_APU) && !adev->gmc.is_app_apu) {
3694+
r = amdgpu_asic_reset(adev);
3695+
if (r)
3696+
dev_err(adev->dev, "asic reset on %s failed\n", __func__);
3697+
}
3698+
36833699
return 0;
36843700
}
36853701

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
597597
/* reserve engine 5 for firmware */
598598
if (adev->enable_mes)
599599
vm_inv_engs[i] &= ~(1 << 5);
600+
/* reserve engine 6 for uni mes */
601+
if (adev->enable_uni_mes)
602+
vm_inv_engs[i] &= ~(1 << 6);
600603
/* reserve mmhub engine 3 for firmware */
601604
if (adev->enable_umsch_mm)
602605
vm_inv_engs[i] &= ~(1 << 3);

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ enum amdgpu_memory_partition {
8686

8787
#define AMDGPU_MAX_MEM_RANGES 8
8888

89+
#define AMDGPU_GMC9_FAULT_SOURCE_DATA_RETRY 0x80
90+
#define AMDGPU_GMC9_FAULT_SOURCE_DATA_READ 0x40
91+
#define AMDGPU_GMC9_FAULT_SOURCE_DATA_WRITE 0x20
92+
#define AMDGPU_GMC9_FAULT_SOURCE_DATA_EXE 0x10
93+
8994
/*
9095
* GMC page fault information
9196
*/

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
224224
kfree((*job)->hw_fence);
225225
err_job:
226226
kfree(*job);
227+
*job = NULL;
227228

228229
return r;
229230
}
@@ -245,7 +246,10 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
245246
if (r) {
246247
if (entity)
247248
drm_sched_job_cleanup(&(*job)->base);
249+
kfree((*job)->hw_vm_fence);
250+
kfree((*job)->hw_fence);
248251
kfree(*job);
252+
*job = NULL;
249253
}
250254

251255
return r;

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ static void amdgpu_ras_critical_region_fini(struct amdgpu_device *adev);
150150

151151
#ifdef CONFIG_X86_MCE_AMD
152152
static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev);
153+
static void
154+
amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev);
153155
struct mce_notifier_adev_list {
154156
struct amdgpu_device *devs[MAX_GPU_INSTANCE];
155157
int num_gpu;
@@ -3954,7 +3956,9 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
39543956
mutex_unlock(&con->recovery_lock);
39553957

39563958
amdgpu_ras_critical_region_init(adev);
3957-
3959+
#ifdef CONFIG_X86_MCE_AMD
3960+
amdgpu_unregister_bad_pages_mca_notifier(adev);
3961+
#endif
39583962
return 0;
39593963
}
39603964
/* recovery end */
@@ -4988,6 +4992,28 @@ static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev)
49884992
notifier_registered = true;
49894993
}
49904994
}
4995+
static void amdgpu_unregister_bad_pages_mca_notifier(struct amdgpu_device *adev)
4996+
{
4997+
int i, j;
4998+
4999+
if (!notifier_registered && !mce_adev_list.num_gpu)
5000+
return;
5001+
for (i = 0, j = 0; i < mce_adev_list.num_gpu; i++) {
5002+
if (mce_adev_list.devs[i] == adev)
5003+
mce_adev_list.devs[i] = NULL;
5004+
if (!mce_adev_list.devs[i])
5005+
++j;
5006+
}
5007+
5008+
if (j == mce_adev_list.num_gpu) {
5009+
mce_adev_list.num_gpu = 0;
5010+
/* Unregister x86 notifier with MCE subsystem. */
5011+
if (notifier_registered) {
5012+
mce_unregister_decode_chain(&amdgpu_bad_page_nb);
5013+
notifier_registered = false;
5014+
}
5015+
}
5016+
}
49915017
#endif
49925018

49935019
struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev)

drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,9 +520,14 @@ static ssize_t amdgpu_ras_cper_debugfs_read(struct file *f, char __user *buf,
520520
return -ENOMEM;
521521

522522
if (!(*offset)) {
523+
/* Need at least 12 bytes for the header on the first read */
524+
if (size < ring_header_size)
525+
return -EINVAL;
526+
523527
if (copy_to_user(buf, ring_header, ring_header_size))
524528
return -EFAULT;
525529
buf += ring_header_size;
530+
size -= ring_header_size;
526531
}
527532

528533
r = amdgpu_ras_mgr_handle_ras_cmd(ring->adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1329,7 +1329,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
13291329
mem->mem_type == AMDGPU_PL_MMIO_REMAP)) {
13301330
flags |= AMDGPU_PTE_SYSTEM;
13311331

1332-
if (ttm->caching == ttm_cached)
1332+
if (ttm && ttm->caching == ttm_cached)
13331333
flags |= AMDGPU_PTE_SNOOPED;
13341334
}
13351335

@@ -1486,6 +1486,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
14861486
if (r)
14871487
goto out;
14881488

1489+
mutex_lock(&adev->mman.gtt_window_lock);
14891490
amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
14901491
src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
14911492
src_mm.start;
@@ -1500,6 +1501,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
15001501
WARN_ON(job->ibs[0].length_dw > num_dw);
15011502

15021503
fence = amdgpu_job_submit(job);
1504+
mutex_unlock(&adev->mman.gtt_window_lock);
15031505

15041506
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
15051507
r = -ETIMEDOUT;

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
10691069
}
10701070

10711071
/* Prepare a TLB flush fence to be attached to PTs */
1072-
if (!params->unlocked && vm->is_compute_context) {
1072+
if (!params->unlocked) {
10731073
amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
10741074

10751075
/* Makes sure no PD/PT is freed before the flush */
@@ -2093,7 +2093,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
20932093
struct amdgpu_bo *bo = before->bo_va->base.bo;
20942094

20952095
amdgpu_vm_it_insert(before, &vm->va);
2096-
if (before->flags & AMDGPU_PTE_PRT_FLAG(adev))
2096+
if (before->flags & AMDGPU_VM_PAGE_PRT)
20972097
amdgpu_vm_prt_get(adev);
20982098

20992099
if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
@@ -2108,7 +2108,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
21082108
struct amdgpu_bo *bo = after->bo_va->base.bo;
21092109

21102110
amdgpu_vm_it_insert(after, &vm->va);
2111-
if (after->flags & AMDGPU_PTE_PRT_FLAG(adev))
2111+
if (after->flags & AMDGPU_VM_PAGE_PRT)
21122112
amdgpu_vm_prt_get(adev);
21132113

21142114
if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
@@ -2916,8 +2916,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
29162916
switch (args->in.op) {
29172917
case AMDGPU_VM_OP_RESERVE_VMID:
29182918
/* We only have requirement to reserve vmid from gfxhub */
2919-
amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
2920-
break;
2919+
return amdgpu_vmid_alloc_reserved(adev, vm, AMDGPU_GFXHUB(0));
29212920
case AMDGPU_VM_OP_UNRESERVE_VMID:
29222921
amdgpu_vmid_free_reserved(adev, vm, AMDGPU_GFXHUB(0));
29232922
break;

drivers/gpu/drm/amd/amdgpu/cik_ih.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
156156
/* enable irqs */
157157
cik_ih_enable_interrupts(adev);
158158

159+
if (adev->irq.ih_soft.ring_size)
160+
adev->irq.ih_soft.enabled = true;
161+
159162
return 0;
160163
}
161164

@@ -192,6 +195,9 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
192195

193196
wptr = le32_to_cpu(*ih->wptr_cpu);
194197

198+
if (ih == &adev->irq.ih_soft)
199+
goto out;
200+
195201
if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
196202
wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
197203
/* When a ring buffer overflow happen start parsing interrupt
@@ -211,6 +217,8 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
211217
tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
212218
WREG32(mmIH_RB_CNTL, tmp);
213219
}
220+
221+
out:
214222
return (wptr & ih->ptr_mask);
215223
}
216224

@@ -306,6 +314,10 @@ static int cik_ih_sw_init(struct amdgpu_ip_block *ip_block)
306314
if (r)
307315
return r;
308316

317+
r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true);
318+
if (r)
319+
return r;
320+
309321
r = amdgpu_irq_init(adev);
310322

311323
return r;

0 commit comments

Comments
 (0)