Skip to content

Commit 504f3ce

Browse files
committed
Merge tag 'amd-drm-next-6.20-2026-01-23' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.20-2026-01-23: amdgpu: - GC 12 fix - UserQ fixes - Misc error path fixes - IP discovery fixes - Misc cleanups - HDMI fixes - Documentation update - Panel replay fixes - Panel type handling fixes - DCN 3.1.x fixes - DC analog display fix - SMU 6 fixes - VCN 4.0.3 queue reset fix - VCN 5.0.1 queue reset fix - GPUVM TLB flush fix - RAS fixes - DC idle optimization fix amdkfd: - MQD fix for GC 9.4.3 and 9.5 - GC 11 cooperative launch fix - GC 12.1 trap handler fixes - Misc cleanups radeon: - Use drm_* logging helpers for VCE Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20260123173537.17221-1-alexander.deucher@amd.com
2 parents b2c6627 + 1412482 commit 504f3ce

65 files changed

Lines changed: 1434 additions & 1188 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Documentation/gpu/amdgpu/apu-asic-info-table.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,6 @@ Ryzen AI 300 series, Strix Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0, 14.0.0
1616
Ryzen AI 330 series, Krackan Point, 3.6.0, 11.5.3, 4.0.5, 6.1.3, 14.0.5, 14.0.5
1717
Ryzen AI 350 series, Krackan Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4, 14.0.4
1818
Ryzen AI Max 300 series, Strix Halo, 3.5.1, 11.5.1, 4.0.6, 6.1.1, 14.0.1, 14.0.1
19+
Ryzen AI 9 475 / 470 / 465, Gorgon Point, 3.5.0, 11.5.0, 4.0.5, 6.1.0, 14.0.0, 14.0.0
20+
Ryzen AI 7 450, Gorgon Point, 3.5.0, 11.5.2, 4.0.5, 6.1.2, 14.0.4, 14.0.4
21+
Ryzen AI 5 440 / 435, Gorgon Point, 3.6.0, 11.5.3, 4.0.5, 6.1.3, 14.0.5, 14.0.5

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,8 @@ int amdgpu_amdkfd_alloc_kernel_mem(struct amdgpu_device *adev, size_t size,
334334
bp.size = size;
335335
bp.byte_align = PAGE_SIZE;
336336
bp.domain = domain;
337-
bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
337+
bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
338+
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
338339
bp.type = ttm_bo_type_kernel;
339340
bp.resv = NULL;
340341
bp.bo_ptr_size = sizeof(struct amdgpu_bo);

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,21 @@ static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size
380380
return checksum;
381381
}
382382

383-
static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
383+
static inline bool amdgpu_discovery_verify_checksum(struct amdgpu_device *adev,
384+
uint8_t *data, uint32_t size,
384385
uint16_t expected)
385386
{
386-
return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
387+
uint16_t calculated;
388+
389+
calculated = amdgpu_discovery_calculate_checksum(data, size);
390+
391+
if (calculated != expected) {
392+
dev_err(adev->dev, "Discovery checksum failed: calc 0x%04x != exp 0x%04x, size %u.\n",
393+
calculated, expected, size);
394+
return false;
395+
}
396+
397+
return true;
387398
}
388399

389400
static inline bool amdgpu_discovery_verify_binary_signature(uint8_t *binary)
@@ -439,7 +450,7 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
439450
return -EINVAL;
440451
}
441452

442-
if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
453+
if (!amdgpu_discovery_verify_checksum(adev, discovery_bin + offset,
443454
le32_to_cpu(nhdr->size_bytes),
444455
checksum)) {
445456
dev_dbg(adev->dev, "invalid nps info data table checksum\n");
@@ -529,7 +540,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
529540
size = le16_to_cpu(bhdr->binary_size) - offset;
530541
checksum = le16_to_cpu(bhdr->binary_checksum);
531542

532-
if (!amdgpu_discovery_verify_checksum(discovery_bin + offset, size,
543+
if (!amdgpu_discovery_verify_checksum(adev, discovery_bin + offset, size,
533544
checksum)) {
534545
dev_err(adev->dev, "invalid ip discovery binary checksum\n");
535546
r = -EINVAL;
@@ -549,7 +560,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
549560
goto out;
550561
}
551562

552-
if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
563+
if (!amdgpu_discovery_verify_checksum(adev, discovery_bin + offset,
553564
le16_to_cpu(ihdr->size),
554565
checksum)) {
555566
dev_err(adev->dev, "invalid ip discovery data table checksum\n");
@@ -572,7 +583,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
572583
goto out;
573584
}
574585

575-
if (!amdgpu_discovery_verify_checksum(discovery_bin + offset,
586+
if (!amdgpu_discovery_verify_checksum(adev, discovery_bin + offset,
576587
le32_to_cpu(ghdr->size),
577588
checksum)) {
578589
dev_err(adev->dev, "invalid gc data table checksum\n");
@@ -595,7 +606,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
595606
goto out;
596607
}
597608

598-
if (!amdgpu_discovery_verify_checksum(
609+
if (!amdgpu_discovery_verify_checksum(adev,
599610
discovery_bin + offset,
600611
sizeof(struct harvest_table), checksum)) {
601612
dev_err(adev->dev, "invalid harvest data table checksum\n");
@@ -618,7 +629,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
618629
goto out;
619630
}
620631

621-
if (!amdgpu_discovery_verify_checksum(
632+
if (!amdgpu_discovery_verify_checksum(adev,
622633
discovery_bin + offset,
623634
le32_to_cpu(vhdr->size_bytes), checksum)) {
624635
dev_err(adev->dev, "invalid vcn data table checksum\n");
@@ -641,7 +652,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
641652
goto out;
642653
}
643654

644-
if (!amdgpu_discovery_verify_checksum(
655+
if (!amdgpu_discovery_verify_checksum(adev,
645656
discovery_bin + offset,
646657
le32_to_cpu(mhdr->size_bytes), checksum)) {
647658
dev_err(adev->dev, "invalid mall data table checksum\n");
@@ -1867,7 +1878,7 @@ static int amdgpu_discovery_refresh_nps_info(struct amdgpu_device *adev,
18671878
sizeof(*nps_data), false);
18681879

18691880
nhdr = (struct nps_info_header *)(nps_data);
1870-
if (!amdgpu_discovery_verify_checksum((uint8_t *)nps_data,
1881+
if (!amdgpu_discovery_verify_checksum(adev, (uint8_t *)nps_data,
18711882
le32_to_cpu(nhdr->size_bytes),
18721883
checksum)) {
18731884
dev_err(adev->dev, "nps data refresh, checksum mismatch\n");

drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -708,12 +708,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring)
708708
*/
709709

710710
/**
711-
* amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence
711+
* amdgpu_fence_driver_update_timedout_fence_state - Update fence state and set errors
712712
*
713-
* @af: fence of the ring to signal
713+
* @af: fence of the ring to update
714714
*
715715
*/
716-
void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *af)
716+
void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
717717
{
718718
struct dma_fence *unprocessed;
719719
struct dma_fence __rcu **ptr;
@@ -763,7 +763,7 @@ void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
763763
}
764764

765765
static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
766-
u64 start_wptr, u32 end_wptr)
766+
u64 start_wptr, u64 end_wptr)
767767
{
768768
unsigned int first_idx = start_wptr & ring->buf_mask;
769769
unsigned int last_idx = end_wptr & ring->buf_mask;

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1298,7 +1298,8 @@ int amdgpu_kiq_hdp_flush(struct amdgpu_device *adev)
12981298
failed_unlock:
12991299
spin_unlock_irqrestore(&kiq->ring_lock, flags);
13001300
failed_kiq_hdp_flush:
1301-
dev_err(adev->dev, "failed to flush HDP via KIQ\n");
1301+
if (!amdgpu_in_reset(adev))
1302+
dev_err(adev->dev, "failed to flush HDP via KIQ\n");
13021303
return r < 0 ? r : -EIO;
13031304
}
13041305

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -781,8 +781,10 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
781781

782782
if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
783783

784-
if (!adev->gmc.gmc_funcs->flush_gpu_tlb_pasid)
785-
return 0;
784+
if (!adev->gmc.gmc_funcs->flush_gpu_tlb_pasid) {
785+
r = 0;
786+
goto error_unlock_reset;
787+
}
786788

787789
if (adev->gmc.flush_tlb_needs_extra_type_2)
788790
adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,

drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
302302
if (job && job->vmid)
303303
amdgpu_vmid_reset(adev, ring->vm_hub, job->vmid);
304304
amdgpu_ring_undo(ring);
305-
return r;
305+
goto free_fence;
306306
}
307307
*f = &af->base;
308308
/* get a ref for the job */

drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
218218
if (!entity)
219219
return 0;
220220

221-
return drm_sched_job_init(&(*job)->base, entity, 1, owner,
222-
drm_client_id);
221+
r = drm_sched_job_init(&(*job)->base, entity, 1, owner, drm_client_id);
222+
if (!r)
223+
return 0;
224+
225+
kfree((*job)->hw_vm_fence);
223226

224227
err_fence:
225228
kfree((*job)->hw_fence);

drivers/gpu/drm/amd/amdgpu/amdgpu_job.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ enum amdgpu_ib_pool_type;
5959
#define AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB (18446744073709551604ULL)
6060
#define AMDGPU_KERNEL_JOB_ID_KFD_GART_MAP (18446744073709551603ULL)
6161
#define AMDGPU_KERNEL_JOB_ID_VCN_RING_TEST (18446744073709551602ULL)
62+
#define AMDGPU_KERNEL_JOB_ID_GFX_RING_TEST (18446744073709551601ULL)
63+
#define AMDGPU_KERNEL_JOB_ID_SDMA_RING_TEST (18446744073709551600ULL)
64+
#define AMDGPU_KERNEL_JOB_ID_VPE_RING_TEST (18446744073709551599ULL)
65+
#define AMDGPU_KERNEL_JOB_ID_RUN_SHADER (18446744073709551598ULL)
6266

6367
struct amdgpu_job {
6468
struct drm_sched_job base;

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2784,6 +2784,10 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
27842784
if (!data->bps[i].ts)
27852785
continue;
27862786

2787+
/* U64_MAX is used to mark the record as invalid */
2788+
if (data->bps[i].retired_page == U64_MAX)
2789+
continue;
2790+
27872791
bps[r].bp = data->bps[i].retired_page;
27882792
r++;
27892793
if (r >= count)
@@ -3090,6 +3094,8 @@ static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev,
30903094

30913095
if (amdgpu_ras_check_bad_page_unlock(con,
30923096
bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
3097+
/* set to U64_MAX to mark it as invalid */
3098+
data->bps[data->count].retired_page = U64_MAX;
30933099
data->count++;
30943100
data->space_left--;
30953101
continue;

0 commit comments

Comments
 (0)