Skip to content

Commit 466d617

Browse files
committed
Merge tag 'drm-fixes-2026-02-27' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie: "Regular fixes pull, amdxdna and amdgpu are the main ones, with a couple of intel fixes, then a scattering of fixes across drivers, nothing too major. i915/display: - Fix Panel Replay stuck with X during mode transitions on Panther Lake xe: - W/a fix for multi-cast registers - Fix xe_sync initialization issues amdgpu: - UserQ fixes - DC fix - RAS fixes - VCN 5 fix - Slot reset fix - Remove MES workaround that's no longer needed amdxdna: - deadlock fix - NULL ptr deref fix - suspend failure fix - OOB access fix - buffer overflow fix - input sanitiation fix - firmware loading fix dw-dp: - An error handling fix ethosu: - A binary shift overflow fix imx: - An error handling fix logicvc: - A dt node reference leak fix nouveau: - A WARN_ON removal samsung-dsim: - A memory leak fix tiny: - sharp-memory: NULL pointer deref fix vmwgfx: - A reference count and error handling fix" * tag 'drm-fixes-2026-02-27' of https://gitlab.freedesktop.org/drm/kernel: (39 commits) drm/amd: Disable MES LR compute W/A drm/amdgpu: Fix error handling in slot reset drm/amdgpu/vcn5: Add SMU dpm interface type drm/amdgpu: Fix locking bugs in error paths drm/amdgpu: Unlock a mutex before destroying it drm/amd/display: Use GFP_ATOMIC in dc_create_stream_for_sink drm/amdgpu: add upper bound check on user inputs in wait ioctl drm/amdgpu: add upper bound check on user inputs in signal ioctl drm/amdgpu/userq: Do not allow userspace to trivially triger kernel warnings drm/amdgpu/userq: Fix reference leak in amdgpu_userq_wait_ioctl accel/amdxdna: Use a different name for latest firmware drm/client: Do not destroy NULL modes drm/gpusvm: Fix drm_gpusvm_pages_valid_unlocked() kernel-doc drm/xe/sync: Fix user fence leak on alloc failure drm/xe/sync: Cleanup partially initialized sync on parse failure drm/xe/wa: Steer RMW of MCR registers while building default LRC accel/amdxdna: Validate command buffer payload count accel/amdxdna: Prevent ubuf size overflow accel/amdxdna: Fix out-of-bounds memset in command slot handling accel/amdxdna: Fix command hang on suspended hardware context ...
2 parents a75cb86 + 103d53e commit 466d617

40 files changed

Lines changed: 298 additions & 159 deletions

drivers/accel/amdxdna/aie2_ctx.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@
2323
#include "amdxdna_pci_drv.h"
2424
#include "amdxdna_pm.h"
2525

26-
static bool force_cmdlist;
26+
static bool force_cmdlist = true;
2727
module_param(force_cmdlist, bool, 0600);
28-
MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
28+
MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default true)");
2929

3030
#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
3131

@@ -53,6 +53,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct
5353
{
5454
drm_sched_stop(&hwctx->priv->sched, bad_job);
5555
aie2_destroy_context(xdna->dev_handle, hwctx);
56+
drm_sched_start(&hwctx->priv->sched, 0);
5657
}
5758

5859
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
@@ -80,7 +81,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
8081
}
8182

8283
out:
83-
drm_sched_start(&hwctx->priv->sched, 0);
8484
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
8585
return ret;
8686
}
@@ -297,19 +297,23 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
297297
struct dma_fence *fence;
298298
int ret;
299299

300-
if (!hwctx->priv->mbox_chann)
300+
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
301+
if (ret)
302+
return NULL;
303+
304+
if (!hwctx->priv->mbox_chann) {
305+
amdxdna_pm_suspend_put(hwctx->client->xdna);
301306
return NULL;
307+
}
302308

303-
if (!mmget_not_zero(job->mm))
309+
if (!mmget_not_zero(job->mm)) {
310+
amdxdna_pm_suspend_put(hwctx->client->xdna);
304311
return ERR_PTR(-ESRCH);
312+
}
305313

306314
kref_get(&job->refcnt);
307315
fence = dma_fence_get(job->fence);
308316

309-
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
310-
if (ret)
311-
goto out;
312-
313317
if (job->drv_cmd) {
314318
switch (job->drv_cmd->opcode) {
315319
case SYNC_DEBUG_BO:
@@ -497,7 +501,7 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
497501

498502
if (AIE2_FEATURE_ON(xdna->dev_handle, AIE2_TEMPORAL_ONLY)) {
499503
ret = aie2_destroy_context(xdna->dev_handle, hwctx);
500-
if (ret)
504+
if (ret && ret != -ENODEV)
501505
XDNA_ERR(xdna, "Destroy temporal only context failed, ret %d", ret);
502506
} else {
503507
ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
@@ -629,7 +633,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
629633
goto free_entity;
630634
}
631635

632-
ret = amdxdna_pm_resume_get(xdna);
636+
ret = amdxdna_pm_resume_get_locked(xdna);
633637
if (ret)
634638
goto free_col_list;
635639

@@ -760,7 +764,7 @@ static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size
760764
if (!hwctx->cus)
761765
return -ENOMEM;
762766

763-
ret = amdxdna_pm_resume_get(xdna);
767+
ret = amdxdna_pm_resume_get_locked(xdna);
764768
if (ret)
765769
goto free_cus;
766770

@@ -1070,6 +1074,8 @@ void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
10701074

10711075
ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
10721076
true, MAX_SCHEDULE_TIMEOUT);
1073-
if (!ret || ret == -ERESTARTSYS)
1077+
if (!ret)
10741078
XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
1079+
else if (ret == -ERESTARTSYS)
1080+
XDNA_DBG(xdna, "Wait for bo interrupted by signal");
10751081
}

drivers/accel/amdxdna/aie2_message.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -216,8 +216,10 @@ static int aie2_destroy_context_req(struct amdxdna_dev_hdl *ndev, u32 id)
216216

217217
req.context_id = id;
218218
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
219-
if (ret)
219+
if (ret && ret != -ENODEV)
220220
XDNA_WARN(xdna, "Destroy context failed, ret %d", ret);
221+
else if (ret == -ENODEV)
222+
XDNA_DBG(xdna, "Destroy context: device already stopped");
221223

222224
return ret;
223225
}
@@ -318,6 +320,9 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc
318320
struct amdxdna_dev *xdna = ndev->xdna;
319321
int ret;
320322

323+
if (!hwctx->priv->mbox_chann)
324+
return 0;
325+
321326
xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
322327
ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
323328
xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
@@ -694,11 +699,11 @@ aie2_cmdlist_fill_npu_cf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *siz
694699
u32 cmd_len;
695700
void *cmd;
696701

697-
memset(npu_slot, 0, sizeof(*npu_slot));
698702
cmd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
699703
if (*size < sizeof(*npu_slot) + cmd_len)
700704
return -EINVAL;
701705

706+
memset(npu_slot, 0, sizeof(*npu_slot));
702707
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
703708
if (npu_slot->cu_idx == INVALID_CU_IDX)
704709
return -EINVAL;
@@ -719,7 +724,6 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
719724
u32 cmd_len;
720725
u32 arg_sz;
721726

722-
memset(npu_slot, 0, sizeof(*npu_slot));
723727
sn = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
724728
arg_sz = cmd_len - sizeof(*sn);
725729
if (cmd_len < sizeof(*sn) || arg_sz > MAX_NPU_ARGS_SIZE)
@@ -728,6 +732,7 @@ aie2_cmdlist_fill_npu_dpu(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
728732
if (*size < sizeof(*npu_slot) + arg_sz)
729733
return -EINVAL;
730734

735+
memset(npu_slot, 0, sizeof(*npu_slot));
731736
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
732737
if (npu_slot->cu_idx == INVALID_CU_IDX)
733738
return -EINVAL;
@@ -751,7 +756,6 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t
751756
u32 cmd_len;
752757
u32 arg_sz;
753758

754-
memset(npu_slot, 0, sizeof(*npu_slot));
755759
pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
756760
arg_sz = cmd_len - sizeof(*pd);
757761
if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
@@ -760,6 +764,7 @@ aie2_cmdlist_fill_npu_preempt(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t
760764
if (*size < sizeof(*npu_slot) + arg_sz)
761765
return -EINVAL;
762766

767+
memset(npu_slot, 0, sizeof(*npu_slot));
763768
npu_slot->cu_idx = amdxdna_cmd_get_cu_idx(cmd_bo);
764769
if (npu_slot->cu_idx == INVALID_CU_IDX)
765770
return -EINVAL;
@@ -787,7 +792,6 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
787792
u32 cmd_len;
788793
u32 arg_sz;
789794

790-
memset(npu_slot, 0, sizeof(*npu_slot));
791795
pd = amdxdna_cmd_get_payload(cmd_bo, &cmd_len);
792796
arg_sz = cmd_len - sizeof(*pd);
793797
if (cmd_len < sizeof(*pd) || arg_sz > MAX_NPU_ARGS_SIZE)
@@ -796,6 +800,7 @@ aie2_cmdlist_fill_npu_elf(struct amdxdna_gem_obj *cmd_bo, void *slot, size_t *si
796800
if (*size < sizeof(*npu_slot) + arg_sz)
797801
return -EINVAL;
798802

803+
memset(npu_slot, 0, sizeof(*npu_slot));
799804
npu_slot->type = EXEC_NPU_TYPE_ELF;
800805
npu_slot->inst_buf_addr = pd->inst_buf;
801806
npu_slot->save_buf_addr = pd->save_buf;

drivers/accel/amdxdna/aie2_pci.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ static int aie2_max_col = XRS_MAX_COL;
3232
module_param(aie2_max_col, uint, 0600);
3333
MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
3434

35+
static char *npu_fw[] = {
36+
"npu_7.sbin",
37+
"npu.sbin"
38+
};
39+
3540
/*
3641
* The management mailbox channel is allocated by firmware.
3742
* The related register and ring buffer information is on SRAM BAR.
@@ -323,6 +328,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
323328
return;
324329
}
325330

331+
aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
326332
aie2_mgmt_fw_fini(ndev);
327333
xdna_mailbox_stop_channel(ndev->mgmt_chann);
328334
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
@@ -406,15 +412,15 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
406412
goto stop_psp;
407413
}
408414

409-
ret = aie2_pm_init(ndev);
415+
ret = aie2_mgmt_fw_init(ndev);
410416
if (ret) {
411-
XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
417+
XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
412418
goto destroy_mgmt_chann;
413419
}
414420

415-
ret = aie2_mgmt_fw_init(ndev);
421+
ret = aie2_pm_init(ndev);
416422
if (ret) {
417-
XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
423+
XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
418424
goto destroy_mgmt_chann;
419425
}
420426

@@ -451,7 +457,6 @@ static int aie2_hw_suspend(struct amdxdna_dev *xdna)
451457
{
452458
struct amdxdna_client *client;
453459

454-
guard(mutex)(&xdna->dev_lock);
455460
list_for_each_entry(client, &xdna->client_list, node)
456461
aie2_hwctx_suspend(client);
457462

@@ -489,6 +494,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
489494
struct psp_config psp_conf;
490495
const struct firmware *fw;
491496
unsigned long bars = 0;
497+
char *fw_full_path;
492498
int i, nvec, ret;
493499

494500
if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
@@ -503,7 +509,19 @@ static int aie2_init(struct amdxdna_dev *xdna)
503509
ndev->priv = xdna->dev_info->dev_priv;
504510
ndev->xdna = xdna;
505511

506-
ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
512+
for (i = 0; i < ARRAY_SIZE(npu_fw); i++) {
513+
fw_full_path = kasprintf(GFP_KERNEL, "%s%s", ndev->priv->fw_path, npu_fw[i]);
514+
if (!fw_full_path)
515+
return -ENOMEM;
516+
517+
ret = firmware_request_nowarn(&fw, fw_full_path, &pdev->dev);
518+
kfree(fw_full_path);
519+
if (!ret) {
520+
XDNA_INFO(xdna, "Load firmware %s%s", ndev->priv->fw_path, npu_fw[i]);
521+
break;
522+
}
523+
}
524+
507525
if (ret) {
508526
XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
509527
ndev->priv->fw_path, ret);
@@ -951,7 +969,7 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
951969
if (!drm_dev_enter(&xdna->ddev, &idx))
952970
return -ENODEV;
953971

954-
ret = amdxdna_pm_resume_get(xdna);
972+
ret = amdxdna_pm_resume_get_locked(xdna);
955973
if (ret)
956974
goto dev_exit;
957975

@@ -1044,7 +1062,7 @@ static int aie2_get_array(struct amdxdna_client *client,
10441062
if (!drm_dev_enter(&xdna->ddev, &idx))
10451063
return -ENODEV;
10461064

1047-
ret = amdxdna_pm_resume_get(xdna);
1065+
ret = amdxdna_pm_resume_get_locked(xdna);
10481066
if (ret)
10491067
goto dev_exit;
10501068

@@ -1134,7 +1152,7 @@ static int aie2_set_state(struct amdxdna_client *client,
11341152
if (!drm_dev_enter(&xdna->ddev, &idx))
11351153
return -ENODEV;
11361154

1137-
ret = amdxdna_pm_resume_get(xdna);
1155+
ret = amdxdna_pm_resume_get_locked(xdna);
11381156
if (ret)
11391157
goto dev_exit;
11401158

drivers/accel/amdxdna/aie2_pm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
3131
{
3232
int ret;
3333

34-
ret = amdxdna_pm_resume_get(ndev->xdna);
34+
ret = amdxdna_pm_resume_get_locked(ndev->xdna);
3535
if (ret)
3636
return ret;
3737

drivers/accel/amdxdna/amdxdna_ctx.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,10 @@ void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
104104

105105
if (size) {
106106
count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
107-
if (unlikely(count <= num_masks)) {
107+
if (unlikely(count <= num_masks ||
108+
count * sizeof(u32) +
109+
offsetof(struct amdxdna_cmd, data[0]) >
110+
abo->mem.size)) {
108111
*size = 0;
109112
return NULL;
110113
}
@@ -266,9 +269,9 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
266269
struct amdxdna_drm_config_hwctx *args = data;
267270
struct amdxdna_dev *xdna = to_xdna_dev(dev);
268271
struct amdxdna_hwctx *hwctx;
269-
int ret, idx;
270272
u32 buf_size;
271273
void *buf;
274+
int ret;
272275
u64 val;
273276

274277
if (XDNA_MBZ_DBG(xdna, &args->pad, sizeof(args->pad)))
@@ -310,20 +313,17 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
310313
return -EINVAL;
311314
}
312315

313-
mutex_lock(&xdna->dev_lock);
314-
idx = srcu_read_lock(&client->hwctx_srcu);
316+
guard(mutex)(&xdna->dev_lock);
315317
hwctx = xa_load(&client->hwctx_xa, args->handle);
316318
if (!hwctx) {
317319
XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
318320
ret = -EINVAL;
319-
goto unlock_srcu;
321+
goto free_buf;
320322
}
321323

322324
ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
323325

324-
unlock_srcu:
325-
srcu_read_unlock(&client->hwctx_srcu, idx);
326-
mutex_unlock(&xdna->dev_lock);
326+
free_buf:
327327
kfree(buf);
328328
return ret;
329329
}
@@ -334,7 +334,7 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
334334
struct amdxdna_hwctx *hwctx;
335335
struct amdxdna_gem_obj *abo;
336336
struct drm_gem_object *gobj;
337-
int ret, idx;
337+
int ret;
338338

339339
if (!xdna->dev_info->ops->hwctx_sync_debug_bo)
340340
return -EOPNOTSUPP;
@@ -345,17 +345,15 @@ int amdxdna_hwctx_sync_debug_bo(struct amdxdna_client *client, u32 debug_bo_hdl)
345345

346346
abo = to_xdna_obj(gobj);
347347
guard(mutex)(&xdna->dev_lock);
348-
idx = srcu_read_lock(&client->hwctx_srcu);
349348
hwctx = xa_load(&client->hwctx_xa, abo->assigned_hwctx);
350349
if (!hwctx) {
351350
ret = -EINVAL;
352-
goto unlock_srcu;
351+
goto put_obj;
353352
}
354353

355354
ret = xdna->dev_info->ops->hwctx_sync_debug_bo(hwctx, debug_bo_hdl);
356355

357-
unlock_srcu:
358-
srcu_read_unlock(&client->hwctx_srcu, idx);
356+
put_obj:
359357
drm_gem_object_put(gobj);
360358
return ret;
361359
}

0 commit comments

Comments
 (0)