Skip to content

Commit dfb3142

Browse files
committed
Merge tag 'drm-fixes-2026-03-07' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie: "Weekly fixes pull. There is one mm fix in here for a HMM livelock triggered by the xe driver tests. Otherwise it's a pretty wide range of fixes across the board, ttm UAF regression fix, amdgpu fixes, nouveau doesn't crash my laptop anymore fix, and a fair bit of misc. Seems about right for rc3. mm: - mm: Fix a hmm_range_fault() livelock / starvation problem pagemap: - Revert "drm/pagemap: Disable device-to-device migration" ttm: - fix function return breaking reclaim - fix build failure on PREEMPT_RT - fix bo->resource UAF dma-buf: - include ioctl.h in uapi header sched: - fix kernel doc warning amdgpu: - LUT fixes - VCN5 fix - Dispclk fix - SMU 13.x fix - Fix race in VM acquire - PSP 15.x fix - UserQ fix amdxdna: - fix invalid payload for failed command - fix NULL ptr dereference - fix major fw version check - avoid inconsistent fw state on error i915/display: - Fix for Lenovo T14 G7 display not refreshing xe: - Do not preempt fence signaling CS instructions - Some leak and finalization fixes - Workaround fix nouveau: - avoid runtime suspend oops when using dp aux panthor: - fix gem_sync argument ordering solomon: - fix incorrect display output renesas: - fix DSI divider programming ethosu: - fix job submit error clean-up refcount - fix NPU_OP_ELEMENTWISE validation - handle possible underflows in IFM size calcs" * tag 'drm-fixes-2026-03-07' of https://gitlab.freedesktop.org/drm/kernel: (38 commits) accel: ethosu: Handle possible underflow in IFM size calculations accel: ethosu: Fix NPU_OP_ELEMENTWISE validation with scalar accel: ethosu: Fix job submit error clean-up refcount underflows accel/amdxdna: Split mailbox channel create function drm/panthor: Correct the order of arguments passed to gem_sync Revert "drm/syncobj: Fix handle <-> fd ioctls with dirty stack" drm/ttm: Fix bo resource use-after-free nouveau/dpcd: return EBUSY for aux xfer if the device is asleep accel/amdxdna: Fix major version check on NPU1 platform drm/amdgpu/userq: refcount userqueues to avoid any race conditions drm/amdgpu/userq: Consolidate wait ioctl exit path drm/amdgpu/psp: Use Indirect access address for GFX to PSP mailbox drm/amdgpu: Fix use-after-free race in VM acquire drm/amd/pm: remove invalid gpu_metrics.energy_accumulator on smu v13.0.x drm/xe: Fix memory leak in xe_vm_madvise_ioctl drm/xe/reg_sr: Fix leak on xa_store failure drm/xe/xe2_hpg: Correct implementation of Wa_16025250150 drm/xe/gsc: Fix GSC proxy cleanup on early initialization failure Revert "drm/pagemap: Disable device-to-device migration" drm/i915/psr: Fix for Panel Replay X granularity DPCD register handling ...
2 parents 3593e67 + 96bfe9f commit dfb3142

51 files changed

Lines changed: 511 additions & 286 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/accel/amdxdna/aie2_ctx.c

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -186,13 +186,13 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
186186
cmd_abo = job->cmd_bo;
187187

188188
if (unlikely(job->job_timeout)) {
189-
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
189+
amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
190190
ret = -EINVAL;
191191
goto out;
192192
}
193193

194194
if (unlikely(!data) || unlikely(size != sizeof(u32))) {
195-
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
195+
amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
196196
ret = -EINVAL;
197197
goto out;
198198
}
@@ -202,7 +202,7 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
202202
if (status == AIE2_STATUS_SUCCESS)
203203
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
204204
else
205-
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
205+
amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ERROR);
206206

207207
out:
208208
aie2_sched_notify(job);
@@ -244,13 +244,13 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
244244
cmd_abo = job->cmd_bo;
245245

246246
if (unlikely(job->job_timeout)) {
247-
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
247+
amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_TIMEOUT);
248248
ret = -EINVAL;
249249
goto out;
250250
}
251251

252252
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
253-
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
253+
amdxdna_cmd_set_error(cmd_abo, job, 0, ERT_CMD_STATE_ABORT);
254254
ret = -EINVAL;
255255
goto out;
256256
}
@@ -270,19 +270,12 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
270270
fail_cmd_idx, fail_cmd_status);
271271

272272
if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
273-
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
273+
amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ABORT);
274274
ret = -EINVAL;
275-
goto out;
275+
} else {
276+
amdxdna_cmd_set_error(cmd_abo, job, fail_cmd_idx, ERT_CMD_STATE_ERROR);
276277
}
277-
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
278278

279-
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
280-
struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
281-
282-
cc->error_index = fail_cmd_idx;
283-
if (cc->error_index >= cc->command_count)
284-
cc->error_index = 0;
285-
}
286279
out:
287280
aie2_sched_notify(job);
288281
return ret;

drivers/accel/amdxdna/aie2_message.c

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,8 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
4040
return -ENODEV;
4141

4242
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
43-
if (ret == -ETIME) {
44-
xdna_mailbox_stop_channel(ndev->mgmt_chann);
45-
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
46-
ndev->mgmt_chann = NULL;
47-
}
43+
if (ret == -ETIME)
44+
aie2_destroy_mgmt_chann(ndev);
4845

4946
if (!ret && *hdl->status != AIE2_STATUS_SUCCESS) {
5047
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
@@ -296,20 +293,29 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
296293
}
297294

298295
intr_reg = i2x.mb_head_ptr_reg + 4;
299-
hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
300-
intr_reg, ret);
296+
hwctx->priv->mbox_chann = xdna_mailbox_alloc_channel(ndev->mbox);
301297
if (!hwctx->priv->mbox_chann) {
302298
XDNA_ERR(xdna, "Not able to create channel");
303299
ret = -EINVAL;
304300
goto del_ctx_req;
305301
}
302+
303+
ret = xdna_mailbox_start_channel(hwctx->priv->mbox_chann, &x2i, &i2x,
304+
intr_reg, ret);
305+
if (ret) {
306+
XDNA_ERR(xdna, "Not able to create channel");
307+
ret = -EINVAL;
308+
goto free_channel;
309+
}
306310
ndev->hwctx_num++;
307311

308312
XDNA_DBG(xdna, "Mailbox channel irq: %d, msix_id: %d", ret, resp.msix_id);
309313
XDNA_DBG(xdna, "Created fw ctx %d pasid %d", hwctx->fw_ctx_id, hwctx->client->pasid);
310314

311315
return 0;
312316

317+
free_channel:
318+
xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
313319
del_ctx_req:
314320
aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
315321
return ret;
@@ -325,7 +331,7 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc
325331

326332
xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
327333
ret = aie2_destroy_context_req(ndev, hwctx->fw_ctx_id);
328-
xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
334+
xdna_mailbox_free_channel(hwctx->priv->mbox_chann);
329335
XDNA_DBG(xdna, "Destroyed fw ctx %d", hwctx->fw_ctx_id);
330336
hwctx->priv->mbox_chann = NULL;
331337
hwctx->fw_ctx_id = -1;
@@ -914,6 +920,20 @@ void aie2_msg_init(struct amdxdna_dev_hdl *ndev)
914920
ndev->exec_msg_ops = &legacy_exec_message_ops;
915921
}
916922

923+
void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev)
924+
{
925+
struct amdxdna_dev *xdna = ndev->xdna;
926+
927+
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
928+
929+
if (!ndev->mgmt_chann)
930+
return;
931+
932+
xdna_mailbox_stop_channel(ndev->mgmt_chann);
933+
xdna_mailbox_free_channel(ndev->mgmt_chann);
934+
ndev->mgmt_chann = NULL;
935+
}
936+
917937
static inline struct amdxdna_gem_obj *
918938
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
919939
{

drivers/accel/amdxdna/aie2_pci.c

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,7 @@ static void aie2_hw_stop(struct amdxdna_dev *xdna)
330330

331331
aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, NULL);
332332
aie2_mgmt_fw_fini(ndev);
333-
xdna_mailbox_stop_channel(ndev->mgmt_chann);
334-
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
335-
ndev->mgmt_chann = NULL;
333+
aie2_destroy_mgmt_chann(ndev);
336334
drmm_kfree(&xdna->ddev, ndev->mbox);
337335
ndev->mbox = NULL;
338336
aie2_psp_stop(ndev->psp_hdl);
@@ -363,10 +361,29 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
363361
}
364362
pci_set_master(pdev);
365363

364+
mbox_res.ringbuf_base = ndev->sram_base;
365+
mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
366+
mbox_res.mbox_base = ndev->mbox_base;
367+
mbox_res.mbox_size = MBOX_SIZE(ndev);
368+
mbox_res.name = "xdna_mailbox";
369+
ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
370+
if (!ndev->mbox) {
371+
XDNA_ERR(xdna, "failed to create mailbox device");
372+
ret = -ENODEV;
373+
goto disable_dev;
374+
}
375+
376+
ndev->mgmt_chann = xdna_mailbox_alloc_channel(ndev->mbox);
377+
if (!ndev->mgmt_chann) {
378+
XDNA_ERR(xdna, "failed to alloc channel");
379+
ret = -ENODEV;
380+
goto disable_dev;
381+
}
382+
366383
ret = aie2_smu_init(ndev);
367384
if (ret) {
368385
XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
369-
goto disable_dev;
386+
goto free_channel;
370387
}
371388

372389
ret = aie2_psp_start(ndev->psp_hdl);
@@ -381,18 +398,6 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
381398
goto stop_psp;
382399
}
383400

384-
mbox_res.ringbuf_base = ndev->sram_base;
385-
mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
386-
mbox_res.mbox_base = ndev->mbox_base;
387-
mbox_res.mbox_size = MBOX_SIZE(ndev);
388-
mbox_res.name = "xdna_mailbox";
389-
ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
390-
if (!ndev->mbox) {
391-
XDNA_ERR(xdna, "failed to create mailbox device");
392-
ret = -ENODEV;
393-
goto stop_psp;
394-
}
395-
396401
mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
397402
if (mgmt_mb_irq < 0) {
398403
ret = mgmt_mb_irq;
@@ -401,52 +406,55 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
401406
}
402407

403408
xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
404-
ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
405-
&ndev->mgmt_x2i,
406-
&ndev->mgmt_i2x,
407-
xdna_mailbox_intr_reg,
408-
mgmt_mb_irq);
409-
if (!ndev->mgmt_chann) {
410-
XDNA_ERR(xdna, "failed to create management mailbox channel");
409+
ret = xdna_mailbox_start_channel(ndev->mgmt_chann,
410+
&ndev->mgmt_x2i,
411+
&ndev->mgmt_i2x,
412+
xdna_mailbox_intr_reg,
413+
mgmt_mb_irq);
414+
if (ret) {
415+
XDNA_ERR(xdna, "failed to start management mailbox channel");
411416
ret = -EINVAL;
412417
goto stop_psp;
413418
}
414419

415420
ret = aie2_mgmt_fw_init(ndev);
416421
if (ret) {
417422
XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
418-
goto destroy_mgmt_chann;
423+
goto stop_fw;
419424
}
420425

421426
ret = aie2_pm_init(ndev);
422427
if (ret) {
423428
XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
424-
goto destroy_mgmt_chann;
429+
goto stop_fw;
425430
}
426431

427432
ret = aie2_mgmt_fw_query(ndev);
428433
if (ret) {
429434
XDNA_ERR(xdna, "failed to query fw, ret %d", ret);
430-
goto destroy_mgmt_chann;
435+
goto stop_fw;
431436
}
432437

433438
ret = aie2_error_async_events_alloc(ndev);
434439
if (ret) {
435440
XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
436-
goto destroy_mgmt_chann;
441+
goto stop_fw;
437442
}
438443

439444
ndev->dev_status = AIE2_DEV_START;
440445

441446
return 0;
442447

443-
destroy_mgmt_chann:
448+
stop_fw:
449+
aie2_suspend_fw(ndev);
444450
xdna_mailbox_stop_channel(ndev->mgmt_chann);
445-
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
446451
stop_psp:
447452
aie2_psp_stop(ndev->psp_hdl);
448453
fini_smu:
449454
aie2_smu_fini(ndev);
455+
free_channel:
456+
xdna_mailbox_free_channel(ndev->mgmt_chann);
457+
ndev->mgmt_chann = NULL;
450458
disable_dev:
451459
pci_disable_device(pdev);
452460

drivers/accel/amdxdna/aie2_pci.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ int aie2_get_array_async_error(struct amdxdna_dev_hdl *ndev,
303303

304304
/* aie2_message.c */
305305
void aie2_msg_init(struct amdxdna_dev_hdl *ndev);
306+
void aie2_destroy_mgmt_chann(struct amdxdna_dev_hdl *ndev);
306307
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
307308
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
308309
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);

drivers/accel/amdxdna/amdxdna_ctx.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,33 @@ u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
135135
return INVALID_CU_IDX;
136136
}
137137

138+
int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo,
139+
struct amdxdna_sched_job *job, u32 cmd_idx,
140+
enum ert_cmd_state error_state)
141+
{
142+
struct amdxdna_client *client = job->hwctx->client;
143+
struct amdxdna_cmd *cmd = abo->mem.kva;
144+
struct amdxdna_cmd_chain *cc = NULL;
145+
146+
cmd->header &= ~AMDXDNA_CMD_STATE;
147+
cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, error_state);
148+
149+
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN) {
150+
cc = amdxdna_cmd_get_payload(abo, NULL);
151+
cc->error_index = (cmd_idx < cc->command_count) ? cmd_idx : 0;
152+
abo = amdxdna_gem_get_obj(client, cc->data[0], AMDXDNA_BO_CMD);
153+
if (!abo)
154+
return -EINVAL;
155+
cmd = abo->mem.kva;
156+
}
157+
158+
memset(cmd->data, 0xff, abo->mem.size - sizeof(*cmd));
159+
if (cc)
160+
amdxdna_gem_put_obj(abo);
161+
162+
return 0;
163+
}
164+
138165
/*
139166
* This should be called in close() and remove(). DO NOT call in other syscalls.
140167
* This guarantee that when hwctx and resources will be released, if user

drivers/accel/amdxdna/amdxdna_ctx.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
167167

168168
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
169169
u32 amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
170+
int amdxdna_cmd_set_error(struct amdxdna_gem_obj *abo,
171+
struct amdxdna_sched_job *job, u32 cmd_idx,
172+
enum ert_cmd_state error_state);
170173

171174
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
172175
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);

0 commit comments

Comments
 (0)