Skip to content

Commit 681d787

Browse files
committed
Merge tag 'amd-drm-fixes-7.0-2026-03-04' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes
amd-drm-fixes-7.0-2026-03-04: amdgpu: - LUT fixes - VCN5 fix - Dispclk fix - SMU 13.x fix - Fix race in VM acquire - PSP 15.x fix - UserQ fix Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patch.msgid.link/20260304204837.1937266-1-alexander.deucher@amd.com
2 parents 11439c4 + 65b5c32 commit 681d787

14 files changed

Lines changed: 172 additions & 88 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,10 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
14391439
*process_info = info;
14401440
}
14411441

1442-
vm->process_info = *process_info;
1442+
if (cmpxchg(&vm->process_info, NULL, *process_info) != NULL) {
1443+
ret = -EINVAL;
1444+
goto already_acquired;
1445+
}
14431446

14441447
/* Validate page directory and attach eviction fence */
14451448
ret = amdgpu_bo_reserve(vm->root.bo, true);
@@ -1479,6 +1482,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
14791482
amdgpu_bo_unreserve(vm->root.bo);
14801483
reserve_pd_fail:
14811484
vm->process_info = NULL;
1485+
already_acquired:
14821486
if (info) {
14831487
dma_fence_put(&info->eviction_fence->base);
14841488
*process_info = NULL;

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c

Lines changed: 81 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -446,8 +446,7 @@ static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
446446
return ret;
447447
}
448448

449-
static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
450-
int queue_id)
449+
static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue)
451450
{
452451
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
453452
struct amdgpu_device *adev = uq_mgr->adev;
@@ -461,7 +460,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
461460
uq_funcs->mqd_destroy(queue);
462461
amdgpu_userq_fence_driver_free(queue);
463462
/* Use interrupt-safe locking since IRQ handlers may access these XArrays */
464-
xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id);
465463
xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
466464
queue->userq_mgr = NULL;
467465
list_del(&queue->userq_va_list);
@@ -470,12 +468,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
470468
up_read(&adev->reset_domain->sem);
471469
}
472470

473-
static struct amdgpu_usermode_queue *
474-
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
475-
{
476-
return xa_load(&uq_mgr->userq_xa, qid);
477-
}
478-
479471
void
480472
amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
481473
struct amdgpu_eviction_fence_mgr *evf_mgr)
@@ -625,22 +617,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
625617
}
626618

627619
static int
628-
amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
620+
amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
629621
{
630-
struct amdgpu_fpriv *fpriv = filp->driver_priv;
631-
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
632622
struct amdgpu_device *adev = uq_mgr->adev;
633-
struct amdgpu_usermode_queue *queue;
634623
int r = 0;
635624

636625
cancel_delayed_work_sync(&uq_mgr->resume_work);
637626
mutex_lock(&uq_mgr->userq_mutex);
638-
queue = amdgpu_userq_find(uq_mgr, queue_id);
639-
if (!queue) {
640-
drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
641-
mutex_unlock(&uq_mgr->userq_mutex);
642-
return -EINVAL;
643-
}
644627
amdgpu_userq_wait_for_last_fence(queue);
645628
/* Cancel any pending hang detection work and cleanup */
646629
if (queue->hang_detect_fence) {
@@ -672,14 +655,45 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
672655
drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
673656
queue->state = AMDGPU_USERQ_STATE_HUNG;
674657
}
675-
amdgpu_userq_cleanup(queue, queue_id);
658+
amdgpu_userq_cleanup(queue);
676659
mutex_unlock(&uq_mgr->userq_mutex);
677660

678661
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
679662

680663
return r;
681664
}
682665

666+
static void amdgpu_userq_kref_destroy(struct kref *kref)
667+
{
668+
int r;
669+
struct amdgpu_usermode_queue *queue =
670+
container_of(kref, struct amdgpu_usermode_queue, refcount);
671+
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
672+
673+
r = amdgpu_userq_destroy(uq_mgr, queue);
674+
if (r)
675+
drm_file_err(uq_mgr->file, "Failed to destroy usermode queue %d\n", r);
676+
}
677+
678+
struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid)
679+
{
680+
struct amdgpu_usermode_queue *queue;
681+
682+
xa_lock(&uq_mgr->userq_xa);
683+
queue = xa_load(&uq_mgr->userq_xa, qid);
684+
if (queue)
685+
kref_get(&queue->refcount);
686+
xa_unlock(&uq_mgr->userq_xa);
687+
688+
return queue;
689+
}
690+
691+
void amdgpu_userq_put(struct amdgpu_usermode_queue *queue)
692+
{
693+
if (queue)
694+
kref_put(&queue->refcount, amdgpu_userq_kref_destroy);
695+
}
696+
683697
static int amdgpu_userq_priority_permit(struct drm_file *filp,
684698
int priority)
685699
{
@@ -834,6 +848,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
834848
goto unlock;
835849
}
836850

851+
/* drop this refcount during queue destroy */
852+
kref_init(&queue->refcount);
853+
837854
/* Wait for mode-1 reset to complete */
838855
down_read(&adev->reset_domain->sem);
839856
r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
@@ -985,7 +1002,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
9851002
struct drm_file *filp)
9861003
{
9871004
union drm_amdgpu_userq *args = data;
988-
int r;
1005+
struct amdgpu_fpriv *fpriv = filp->driver_priv;
1006+
struct amdgpu_usermode_queue *queue;
1007+
int r = 0;
9891008

9901009
if (!amdgpu_userq_enabled(dev))
9911010
return -ENOTSUPP;
@@ -1000,11 +1019,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
10001019
drm_file_err(filp, "Failed to create usermode queue\n");
10011020
break;
10021021

1003-
case AMDGPU_USERQ_OP_FREE:
1004-
r = amdgpu_userq_destroy(filp, args->in.queue_id);
1005-
if (r)
1006-
drm_file_err(filp, "Failed to destroy usermode queue\n");
1022+
case AMDGPU_USERQ_OP_FREE: {
1023+
xa_lock(&fpriv->userq_mgr.userq_xa);
1024+
queue = __xa_erase(&fpriv->userq_mgr.userq_xa, args->in.queue_id);
1025+
xa_unlock(&fpriv->userq_mgr.userq_xa);
1026+
if (!queue)
1027+
return -ENOENT;
1028+
1029+
amdgpu_userq_put(queue);
10071030
break;
1031+
}
10081032

10091033
default:
10101034
drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
@@ -1023,16 +1047,23 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
10231047

10241048
/* Resume all the queues for this process */
10251049
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
1050+
queue = amdgpu_userq_get(uq_mgr, queue_id);
1051+
if (!queue)
1052+
continue;
1053+
10261054
if (!amdgpu_userq_buffer_vas_mapped(queue)) {
10271055
drm_file_err(uq_mgr->file,
10281056
"trying restore queue without va mapping\n");
10291057
queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
1058+
amdgpu_userq_put(queue);
10301059
continue;
10311060
}
10321061

10331062
r = amdgpu_userq_restore_helper(queue);
10341063
if (r)
10351064
ret = r;
1065+
1066+
amdgpu_userq_put(queue);
10361067
}
10371068

10381069
if (ret)
@@ -1266,9 +1297,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
12661297
amdgpu_userq_detect_and_reset_queues(uq_mgr);
12671298
/* Try to unmap all the queues in this process ctx */
12681299
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
1300+
queue = amdgpu_userq_get(uq_mgr, queue_id);
1301+
if (!queue)
1302+
continue;
12691303
r = amdgpu_userq_preempt_helper(queue);
12701304
if (r)
12711305
ret = r;
1306+
amdgpu_userq_put(queue);
12721307
}
12731308

12741309
if (ret)
@@ -1301,16 +1336,24 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
13011336
int ret;
13021337

13031338
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
1339+
queue = amdgpu_userq_get(uq_mgr, queue_id);
1340+
if (!queue)
1341+
continue;
1342+
13041343
struct dma_fence *f = queue->last_fence;
13051344

1306-
if (!f || dma_fence_is_signaled(f))
1345+
if (!f || dma_fence_is_signaled(f)) {
1346+
amdgpu_userq_put(queue);
13071347
continue;
1348+
}
13081349
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
13091350
if (ret <= 0) {
13101351
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
13111352
f->context, f->seqno);
1353+
amdgpu_userq_put(queue);
13121354
return -ETIMEDOUT;
13131355
}
1356+
amdgpu_userq_put(queue);
13141357
}
13151358

13161359
return 0;
@@ -1361,20 +1404,23 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
13611404
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
13621405
{
13631406
struct amdgpu_usermode_queue *queue;
1364-
unsigned long queue_id;
1407+
unsigned long queue_id = 0;
1408+
1409+
for (;;) {
1410+
xa_lock(&userq_mgr->userq_xa);
1411+
queue = xa_find(&userq_mgr->userq_xa, &queue_id, ULONG_MAX,
1412+
XA_PRESENT);
1413+
if (queue)
1414+
__xa_erase(&userq_mgr->userq_xa, queue_id);
1415+
xa_unlock(&userq_mgr->userq_xa);
13651416

1366-
cancel_delayed_work_sync(&userq_mgr->resume_work);
1417+
if (!queue)
1418+
break;
13671419

1368-
mutex_lock(&userq_mgr->userq_mutex);
1369-
amdgpu_userq_detect_and_reset_queues(userq_mgr);
1370-
xa_for_each(&userq_mgr->userq_xa, queue_id, queue) {
1371-
amdgpu_userq_wait_for_last_fence(queue);
1372-
amdgpu_userq_unmap_helper(queue);
1373-
amdgpu_userq_cleanup(queue, queue_id);
1420+
amdgpu_userq_put(queue);
13741421
}
13751422

13761423
xa_destroy(&userq_mgr->userq_xa);
1377-
mutex_unlock(&userq_mgr->userq_mutex);
13781424
mutex_destroy(&userq_mgr->userq_mutex);
13791425
}
13801426

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ struct amdgpu_usermode_queue {
7474
struct dentry *debugfs_queue;
7575
struct delayed_work hang_detect_work;
7676
struct dma_fence *hang_detect_fence;
77+
struct kref refcount;
7778

7879
struct list_head userq_va_list;
7980
};
@@ -112,6 +113,9 @@ struct amdgpu_db_info {
112113
struct amdgpu_userq_obj *db_obj;
113114
};
114115

116+
struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid);
117+
void amdgpu_userq_put(struct amdgpu_usermode_queue *queue);
118+
115119
int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
116120

117121
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,

drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
466466
struct drm_amdgpu_userq_signal *args = data;
467467
struct drm_gem_object **gobj_write = NULL;
468468
struct drm_gem_object **gobj_read = NULL;
469-
struct amdgpu_usermode_queue *queue;
469+
struct amdgpu_usermode_queue *queue = NULL;
470470
struct amdgpu_userq_fence *userq_fence;
471471
struct drm_syncobj **syncobj = NULL;
472472
u32 *bo_handles_write, num_write_bo_handles;
@@ -553,7 +553,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
553553
}
554554

555555
/* Retrieve the user queue */
556-
queue = xa_load(&userq_mgr->userq_xa, args->queue_id);
556+
queue = amdgpu_userq_get(userq_mgr, args->queue_id);
557557
if (!queue) {
558558
r = -ENOENT;
559559
goto put_gobj_write;
@@ -648,6 +648,9 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
648648
free_syncobj_handles:
649649
kfree(syncobj_handles);
650650

651+
if (queue)
652+
amdgpu_userq_put(queue);
653+
651654
return r;
652655
}
653656

@@ -660,7 +663,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
660663
struct drm_amdgpu_userq_wait *wait_info = data;
661664
struct amdgpu_fpriv *fpriv = filp->driver_priv;
662665
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
663-
struct amdgpu_usermode_queue *waitq;
666+
struct amdgpu_usermode_queue *waitq = NULL;
664667
struct drm_gem_object **gobj_write;
665668
struct drm_gem_object **gobj_read;
666669
struct dma_fence **fences = NULL;
@@ -926,7 +929,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
926929
*/
927930
num_fences = dma_fence_dedup_array(fences, num_fences);
928931

929-
waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id);
932+
waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id);
930933
if (!waitq) {
931934
r = -EINVAL;
932935
goto free_fences;
@@ -983,32 +986,14 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
983986
r = -EFAULT;
984987
goto free_fences;
985988
}
986-
987-
kfree(fences);
988-
kfree(fence_info);
989989
}
990990

991-
drm_exec_fini(&exec);
992-
for (i = 0; i < num_read_bo_handles; i++)
993-
drm_gem_object_put(gobj_read[i]);
994-
kfree(gobj_read);
995-
996-
for (i = 0; i < num_write_bo_handles; i++)
997-
drm_gem_object_put(gobj_write[i]);
998-
kfree(gobj_write);
999-
1000-
kfree(timeline_points);
1001-
kfree(timeline_handles);
1002-
kfree(syncobj_handles);
1003-
kfree(bo_handles_write);
1004-
kfree(bo_handles_read);
1005-
1006-
return 0;
1007-
1008991
free_fences:
1009-
while (num_fences-- > 0)
1010-
dma_fence_put(fences[num_fences]);
1011-
kfree(fences);
992+
if (fences) {
993+
while (num_fences-- > 0)
994+
dma_fence_put(fences[num_fences]);
995+
kfree(fences);
996+
}
1012997
free_fence_info:
1013998
kfree(fence_info);
1014999
exec_fini:
@@ -1032,5 +1017,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
10321017
free_bo_handles_read:
10331018
kfree(bo_handles_read);
10341019

1020+
if (waitq)
1021+
amdgpu_userq_put(waitq);
1022+
10351023
return r;
10361024
}

0 commit comments

Comments
 (0)