Skip to content

Commit cf99b26

Browse files
committed
Merge tag 'amd-drm-next-6.18-2025-09-09' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.18-2025-09-09: amdgpu: - Add CRIU support for gem objects - SI UVD fix - SI DPM fixes - Misc code cleanups - RAS updates - GPUVM debugfs fixes - Cyan Skillfish updates - UserQ updates - OEM i2c fix - SMU 13.0.x updates - DPCD probe quirk fix - Make vbios build number available in sysfs - HDCP updates - Brightness curve fixes - eDP updates - Vblank fixes - DCN 3.5 PG fix - PBN calcution fix amdkfd: - Add CRIU support for gem objects - Flexible array fix - P2P topology fix - APU memlimit fixes - Misc code cleanups UAPI: - Add CRIU support for gem objects Proposed userspace: checkpoint-restore/criu#2613 radeon: - Use dev_warn_once() in CS parsers Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://lore.kernel.org/r/20250909161928.942785-1-alexander.deucher@amd.com
2 parents 8d04ea1 + 2fd653b commit cf99b26

106 files changed

Lines changed: 1810 additions & 1027 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/gpu/drm/amd/amdgpu/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ amdgpu-y += \
8484
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
8585
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
8686
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
87-
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o
87+
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o \
88+
cyan_skillfish_reg_init.o
8889

8990
# add DF block
9091
amdgpu-y += \

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,9 +1149,6 @@ struct amdgpu_device {
11491149
/* for userq and VM fences */
11501150
struct amdgpu_seq64 seq64;
11511151

1152-
/* KFD */
1153-
struct amdgpu_kfd_dev kfd;
1154-
11551152
/* UMC */
11561153
struct amdgpu_umc umc;
11571154

@@ -1314,6 +1311,11 @@ struct amdgpu_device {
13141311
struct mutex userq_mutex;
13151312
bool userq_halt_for_enforce_isolation;
13161313
struct amdgpu_uid *uid_info;
1314+
1315+
/* KFD
1316+
* Must be last --ends in a flexible-array member.
1317+
*/
1318+
struct amdgpu_kfd_dev kfd;
13171319
};
13181320

13191321
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,13 @@ struct amdgpu_kfd_dev {
107107
bool init_complete;
108108
struct work_struct reset_work;
109109

110-
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
111-
struct dev_pagemap pgmap;
112-
113110
/* Client for KFD BO GEM handle allocations */
114111
struct drm_client_dev client;
112+
113+
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping
114+
* Must be last --ends in a flexible-array member.
115+
*/
116+
struct dev_pagemap pgmap;
115117
};
116118

117119
enum kgd_engine_type {

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
213213
spin_lock(&kfd_mem_limit.mem_limit_lock);
214214

215215
if (kfd_mem_limit.system_mem_used + system_mem_needed >
216-
kfd_mem_limit.max_system_mem_limit)
216+
kfd_mem_limit.max_system_mem_limit) {
217217
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
218+
if (!no_system_mem_limit) {
219+
ret = -ENOMEM;
220+
goto release;
221+
}
222+
}
218223

219-
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
220-
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
221-
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
222-
kfd_mem_limit.max_ttm_mem_limit) ||
223-
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
224-
vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
224+
if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
225+
kfd_mem_limit.max_ttm_mem_limit) {
225226
ret = -ENOMEM;
226227
goto release;
227228
}
228229

230+
/*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
231+
* carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
232+
* VRAM check since ttm_mem_limit check already cover this allocation
233+
*/
234+
235+
if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
236+
uint64_t vram_available =
237+
vram_size - reserved_for_pt - reserved_for_ras -
238+
atomic64_read(&adev->vram_pin_size);
239+
if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
240+
ret = -ENOMEM;
241+
goto release;
242+
}
243+
}
244+
229245
/* Update memory accounting by decreasing available system
230246
* memory, TTM memory and GPU memory as computed above
231247
*/
@@ -1627,11 +1643,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
16271643
uint64_t vram_available, system_mem_available, ttm_mem_available;
16281644

16291645
spin_lock(&kfd_mem_limit.mem_limit_lock);
1630-
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
1631-
- adev->kfd.vram_used_aligned[xcp_id]
1632-
- atomic64_read(&adev->vram_pin_size)
1633-
- reserved_for_pt
1634-
- reserved_for_ras;
1646+
if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
1647+
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
1648+
- adev->kfd.vram_used_aligned[xcp_id];
1649+
else
1650+
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
1651+
- adev->kfd.vram_used_aligned[xcp_id]
1652+
- atomic64_read(&adev->vram_pin_size)
1653+
- reserved_for_pt
1654+
- reserved_for_ras;
16351655

16361656
if (adev->apu_prefer_gtt) {
16371657
system_mem_available = no_system_mem_limit ?

drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,16 +1816,43 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev,
18161816
return sysfs_emit(buf, "%s\n", ctx->vbios_pn);
18171817
}
18181818

1819+
static ssize_t amdgpu_atombios_get_vbios_build(struct device *dev,
1820+
struct device_attribute *attr,
1821+
char *buf)
1822+
{
1823+
struct drm_device *ddev = dev_get_drvdata(dev);
1824+
struct amdgpu_device *adev = drm_to_adev(ddev);
1825+
struct atom_context *ctx = adev->mode_info.atom_context;
1826+
1827+
return sysfs_emit(buf, "%s\n", ctx->build_num);
1828+
}
1829+
18191830
static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
18201831
NULL);
1832+
static DEVICE_ATTR(vbios_build, 0444, amdgpu_atombios_get_vbios_build, NULL);
18211833

18221834
static struct attribute *amdgpu_vbios_version_attrs[] = {
1823-
&dev_attr_vbios_version.attr,
1824-
NULL
1835+
&dev_attr_vbios_version.attr, &dev_attr_vbios_build.attr, NULL
18251836
};
18261837

1838+
static umode_t amdgpu_vbios_version_attrs_is_visible(struct kobject *kobj,
1839+
struct attribute *attr,
1840+
int index)
1841+
{
1842+
struct device *dev = kobj_to_dev(kobj);
1843+
struct drm_device *ddev = dev_get_drvdata(dev);
1844+
struct amdgpu_device *adev = drm_to_adev(ddev);
1845+
struct atom_context *ctx = adev->mode_info.atom_context;
1846+
1847+
if (attr == &dev_attr_vbios_build.attr && !strlen(ctx->build_num))
1848+
return 0;
1849+
1850+
return attr->mode;
1851+
}
1852+
18271853
const struct attribute_group amdgpu_vbios_version_attr_group = {
1828-
.attrs = amdgpu_vbios_version_attrs
1854+
.attrs = amdgpu_vbios_version_attrs,
1855+
.is_visible = amdgpu_vbios_version_attrs_is_visible,
18291856
};
18301857

18311858
int amdgpu_atombios_sysfs_init(struct amdgpu_device *adev)

drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,7 +1196,10 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector)
11961196
}
11971197

11981198
/**
1199-
* Returns the maximum supported HDMI (TMDS) pixel clock in KHz.
1199+
* amdgpu_max_hdmi_pixel_clock - Return max supported HDMI (TMDS) pixel clock
1200+
* @adev: pointer to amdgpu_device
1201+
*
1202+
* Return: maximum supported HDMI (TMDS) pixel clock in KHz.
12001203
*/
12011204
static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev)
12021205
{
@@ -1209,8 +1212,14 @@ static int amdgpu_max_hdmi_pixel_clock(const struct amdgpu_device *adev)
12091212
}
12101213

12111214
/**
1212-
* Validates the given display mode on DVI and HDMI connectors,
1213-
* including analog signals on DVI-I.
1215+
* amdgpu_connector_dvi_mode_valid - Validate a mode on DVI/HDMI connectors
1216+
* @connector: DRM connector to validate the mode on
1217+
* @mode: display mode to validate
1218+
*
1219+
* Validate the given display mode on DVI and HDMI connectors, including
1220+
* analog signals on DVI-I.
1221+
*
1222+
* Return: drm_mode_status indicating whether the mode is valid.
12141223
*/
12151224
static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector,
12161225
const struct drm_display_mode *mode)

drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev,
6868
hdr->error_severity = sev;
6969

7070
hdr->valid_bits.platform_id = 1;
71-
hdr->valid_bits.partition_id = 1;
7271
hdr->valid_bits.timestamp = 1;
7372

7473
amdgpu_cper_get_timestamp(&hdr->timestamp);
@@ -174,7 +173,7 @@ int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev,
174173
struct cper_sec_nonstd_err *section;
175174
bool poison;
176175

177-
poison = (sev == CPER_SEV_NON_FATAL_CORRECTED) ? false : true;
176+
poison = sev != CPER_SEV_NON_FATAL_CORRECTED;
178177
section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx));
179178
section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr +
180179
NONSTD_SEC_OFFSET(hdr->sec_cnt, idx));
@@ -220,7 +219,10 @@ int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev
220219
section->hdr.valid_bits.err_context_cnt = 1;
221220

222221
section->info.error_type = RUNTIME;
222+
section->info.valid_bits.ms_chk = 1;
223223
section->info.ms_chk_bits.err_type_valid = 1;
224+
section->info.ms_chk_bits.err_type = 1;
225+
section->info.ms_chk_bits.pcc = 1;
224226
section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH;
225227
section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump);
226228

drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2136,12 +2136,14 @@ static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
21362136
struct drm_file *file;
21372137
struct amdgpu_fpriv *fpriv;
21382138
struct amdgpu_bo *root_bo;
2139+
struct amdgpu_device *adev;
21392140
int r;
21402141

21412142
file = m->private;
21422143
if (!file)
21432144
return -EINVAL;
21442145

2146+
adev = drm_to_adev(file->minor->dev);
21452147
fpriv = file->driver_priv;
21462148
if (!fpriv || !fpriv->vm.root.bo)
21472149
return -ENODEV;
@@ -2153,7 +2155,11 @@ static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
21532155
return -EINVAL;
21542156
}
21552157

2156-
seq_printf(m, "gpu_address: 0x%llx\n", amdgpu_bo_gpu_offset(fpriv->vm.root.bo));
2158+
seq_printf(m, "pd_address: 0x%llx\n", amdgpu_gmc_pd_addr(fpriv->vm.root.bo));
2159+
seq_printf(m, "max_pfn: 0x%llx\n", adev->vm_manager.max_pfn);
2160+
seq_printf(m, "num_level: 0x%x\n", adev->vm_manager.num_level);
2161+
seq_printf(m, "block_size: 0x%x\n", adev->vm_manager.block_size);
2162+
seq_printf(m, "fragment_size: 0x%x\n", adev->vm_manager.fragment_size);
21572163

21582164
amdgpu_bo_unreserve(root_bo);
21592165
amdgpu_bo_unref(&root_bo);

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
9595
MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
9696
MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
9797
MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
98+
MODULE_FIRMWARE("amdgpu/cyan_skillfish_gpu_info.bin");
9899

99100
#define AMDGPU_RESUME_MS 2000
100101
#define AMDGPU_MAX_RETRY_LIMIT 2
@@ -2629,6 +2630,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
26292630
return 0;
26302631
chip_name = "navi12";
26312632
break;
2633+
case CHIP_CYAN_SKILLFISH:
2634+
chip_name = "cyan_skillfish";
2635+
break;
26322636
}
26332637

26342638
err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw,

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2124,14 +2124,17 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
21242124
case IP_VERSION(11, 0, 5):
21252125
case IP_VERSION(11, 0, 9):
21262126
case IP_VERSION(11, 0, 7):
2127-
case IP_VERSION(11, 0, 8):
21282127
case IP_VERSION(11, 0, 11):
21292128
case IP_VERSION(11, 0, 12):
21302129
case IP_VERSION(11, 0, 13):
21312130
case IP_VERSION(11, 5, 0):
21322131
case IP_VERSION(11, 5, 2):
21332132
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
21342133
break;
2134+
case IP_VERSION(11, 0, 8):
2135+
if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2)
2136+
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
2137+
break;
21352138
case IP_VERSION(12, 0, 0):
21362139
case IP_VERSION(12, 0, 1):
21372140
amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block);
@@ -2746,6 +2749,36 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
27462749
adev->ip_versions[UVD_HWIP][1] = IP_VERSION(2, 6, 0);
27472750
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
27482751
break;
2752+
case CHIP_CYAN_SKILLFISH:
2753+
if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) {
2754+
r = amdgpu_discovery_reg_base_init(adev);
2755+
if (r)
2756+
return -EINVAL;
2757+
2758+
amdgpu_discovery_harvest_ip(adev);
2759+
amdgpu_discovery_get_gfx_info(adev);
2760+
amdgpu_discovery_get_mall_info(adev);
2761+
amdgpu_discovery_get_vcn_info(adev);
2762+
} else {
2763+
cyan_skillfish_reg_base_init(adev);
2764+
adev->sdma.num_instances = 2;
2765+
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3);
2766+
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3);
2767+
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1);
2768+
adev->ip_versions[HDP_HWIP][0] = IP_VERSION(5, 0, 1);
2769+
adev->ip_versions[SDMA0_HWIP][0] = IP_VERSION(5, 0, 1);
2770+
adev->ip_versions[SDMA1_HWIP][1] = IP_VERSION(5, 0, 1);
2771+
adev->ip_versions[DF_HWIP][0] = IP_VERSION(3, 5, 0);
2772+
adev->ip_versions[NBIO_HWIP][0] = IP_VERSION(2, 1, 1);
2773+
adev->ip_versions[UMC_HWIP][0] = IP_VERSION(8, 1, 1);
2774+
adev->ip_versions[MP0_HWIP][0] = IP_VERSION(11, 0, 8);
2775+
adev->ip_versions[MP1_HWIP][0] = IP_VERSION(11, 0, 8);
2776+
adev->ip_versions[THM_HWIP][0] = IP_VERSION(11, 0, 1);
2777+
adev->ip_versions[SMUIO_HWIP][0] = IP_VERSION(11, 0, 8);
2778+
adev->ip_versions[GC_HWIP][0] = IP_VERSION(10, 1, 3);
2779+
adev->ip_versions[UVD_HWIP][0] = IP_VERSION(2, 0, 3);
2780+
}
2781+
break;
27492782
default:
27502783
r = amdgpu_discovery_reg_base_init(adev);
27512784
if (r) {

0 commit comments

Comments
 (0)