Skip to content

Commit 65b5c32

Browse files
amd-sukhatrialexdeucher
authored andcommitted
drm/amdgpu/userq: refcount userqueues to avoid any race conditions
To avoid race condition and avoid UAF cases, implement kref based queues and protect the below operations using xa lock a. Getting a queue from xarray b. Increment/Decrement it's refcount Every time some one want to access a queue, always get via amdgpu_userq_get to make sure we have locks in place and get the object if active. A userqueue is destroyed on the last refcount is dropped which typically would be via IOCTL or during fini. v2: Add the missing drop in one the condition in the signal ioclt [Alex] v3: remove the queue from the xarray first in the free queue ioctl path [Christian] - Pass queue to the amdgpu_userq_put directly. - make amdgpu_userq_put xa_lock free since we are doing put for each get only and final put is done via destroy and we remove the queue from xa with lock. - use userq_put in fini too so cleanup is done fully. v4: Use xa_erase directly rather than doing load and erase in free ioctl. Also remove some of the error logs which could be exploited by the user to flood the logs [Christian] Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> (cherry picked from commit 4952189) Cc: <stable@vger.kernel.org> # 048c1c4: drm/amdgpu/userq: Consolidate wait ioctl exit path Cc: <stable@vger.kernel.org>
1 parent 048c1c4 commit 65b5c32

3 files changed

Lines changed: 95 additions & 39 deletions

File tree

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c

Lines changed: 81 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -446,8 +446,7 @@ static int amdgpu_userq_wait_for_last_fence(struct amdgpu_usermode_queue *queue)
446446
return ret;
447447
}
448448

449-
static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
450-
int queue_id)
449+
static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue)
451450
{
452451
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
453452
struct amdgpu_device *adev = uq_mgr->adev;
@@ -461,7 +460,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
461460
uq_funcs->mqd_destroy(queue);
462461
amdgpu_userq_fence_driver_free(queue);
463462
/* Use interrupt-safe locking since IRQ handlers may access these XArrays */
464-
xa_erase_irq(&uq_mgr->userq_xa, (unsigned long)queue_id);
465463
xa_erase_irq(&adev->userq_doorbell_xa, queue->doorbell_index);
466464
queue->userq_mgr = NULL;
467465
list_del(&queue->userq_va_list);
@@ -470,12 +468,6 @@ static void amdgpu_userq_cleanup(struct amdgpu_usermode_queue *queue,
470468
up_read(&adev->reset_domain->sem);
471469
}
472470

473-
static struct amdgpu_usermode_queue *
474-
amdgpu_userq_find(struct amdgpu_userq_mgr *uq_mgr, int qid)
475-
{
476-
return xa_load(&uq_mgr->userq_xa, qid);
477-
}
478-
479471
void
480472
amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *uq_mgr,
481473
struct amdgpu_eviction_fence_mgr *evf_mgr)
@@ -625,22 +617,13 @@ amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
625617
}
626618

627619
static int
628-
amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
620+
amdgpu_userq_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
629621
{
630-
struct amdgpu_fpriv *fpriv = filp->driver_priv;
631-
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
632622
struct amdgpu_device *adev = uq_mgr->adev;
633-
struct amdgpu_usermode_queue *queue;
634623
int r = 0;
635624

636625
cancel_delayed_work_sync(&uq_mgr->resume_work);
637626
mutex_lock(&uq_mgr->userq_mutex);
638-
queue = amdgpu_userq_find(uq_mgr, queue_id);
639-
if (!queue) {
640-
drm_dbg_driver(adev_to_drm(uq_mgr->adev), "Invalid queue id to destroy\n");
641-
mutex_unlock(&uq_mgr->userq_mutex);
642-
return -EINVAL;
643-
}
644627
amdgpu_userq_wait_for_last_fence(queue);
645628
/* Cancel any pending hang detection work and cleanup */
646629
if (queue->hang_detect_fence) {
@@ -672,14 +655,45 @@ amdgpu_userq_destroy(struct drm_file *filp, int queue_id)
672655
drm_warn(adev_to_drm(uq_mgr->adev), "trying to destroy a HW mapping userq\n");
673656
queue->state = AMDGPU_USERQ_STATE_HUNG;
674657
}
675-
amdgpu_userq_cleanup(queue, queue_id);
658+
amdgpu_userq_cleanup(queue);
676659
mutex_unlock(&uq_mgr->userq_mutex);
677660

678661
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
679662

680663
return r;
681664
}
682665

666+
static void amdgpu_userq_kref_destroy(struct kref *kref)
667+
{
668+
int r;
669+
struct amdgpu_usermode_queue *queue =
670+
container_of(kref, struct amdgpu_usermode_queue, refcount);
671+
struct amdgpu_userq_mgr *uq_mgr = queue->userq_mgr;
672+
673+
r = amdgpu_userq_destroy(uq_mgr, queue);
674+
if (r)
675+
drm_file_err(uq_mgr->file, "Failed to destroy usermode queue %d\n", r);
676+
}
677+
678+
struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid)
679+
{
680+
struct amdgpu_usermode_queue *queue;
681+
682+
xa_lock(&uq_mgr->userq_xa);
683+
queue = xa_load(&uq_mgr->userq_xa, qid);
684+
if (queue)
685+
kref_get(&queue->refcount);
686+
xa_unlock(&uq_mgr->userq_xa);
687+
688+
return queue;
689+
}
690+
691+
void amdgpu_userq_put(struct amdgpu_usermode_queue *queue)
692+
{
693+
if (queue)
694+
kref_put(&queue->refcount, amdgpu_userq_kref_destroy);
695+
}
696+
683697
static int amdgpu_userq_priority_permit(struct drm_file *filp,
684698
int priority)
685699
{
@@ -834,6 +848,9 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)
834848
goto unlock;
835849
}
836850

851+
/* drop this refcount during queue destroy */
852+
kref_init(&queue->refcount);
853+
837854
/* Wait for mode-1 reset to complete */
838855
down_read(&adev->reset_domain->sem);
839856
r = xa_err(xa_store_irq(&adev->userq_doorbell_xa, index, queue, GFP_KERNEL));
@@ -985,7 +1002,9 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
9851002
struct drm_file *filp)
9861003
{
9871004
union drm_amdgpu_userq *args = data;
988-
int r;
1005+
struct amdgpu_fpriv *fpriv = filp->driver_priv;
1006+
struct amdgpu_usermode_queue *queue;
1007+
int r = 0;
9891008

9901009
if (!amdgpu_userq_enabled(dev))
9911010
return -ENOTSUPP;
@@ -1000,11 +1019,16 @@ int amdgpu_userq_ioctl(struct drm_device *dev, void *data,
10001019
drm_file_err(filp, "Failed to create usermode queue\n");
10011020
break;
10021021

1003-
case AMDGPU_USERQ_OP_FREE:
1004-
r = amdgpu_userq_destroy(filp, args->in.queue_id);
1005-
if (r)
1006-
drm_file_err(filp, "Failed to destroy usermode queue\n");
1022+
case AMDGPU_USERQ_OP_FREE: {
1023+
xa_lock(&fpriv->userq_mgr.userq_xa);
1024+
queue = __xa_erase(&fpriv->userq_mgr.userq_xa, args->in.queue_id);
1025+
xa_unlock(&fpriv->userq_mgr.userq_xa);
1026+
if (!queue)
1027+
return -ENOENT;
1028+
1029+
amdgpu_userq_put(queue);
10071030
break;
1031+
}
10081032

10091033
default:
10101034
drm_dbg_driver(dev, "Invalid user queue op specified: %d\n", args->in.op);
@@ -1023,16 +1047,23 @@ amdgpu_userq_restore_all(struct amdgpu_userq_mgr *uq_mgr)
10231047

10241048
/* Resume all the queues for this process */
10251049
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
1050+
queue = amdgpu_userq_get(uq_mgr, queue_id);
1051+
if (!queue)
1052+
continue;
1053+
10261054
if (!amdgpu_userq_buffer_vas_mapped(queue)) {
10271055
drm_file_err(uq_mgr->file,
10281056
"trying restore queue without va mapping\n");
10291057
queue->state = AMDGPU_USERQ_STATE_INVALID_VA;
1058+
amdgpu_userq_put(queue);
10301059
continue;
10311060
}
10321061

10331062
r = amdgpu_userq_restore_helper(queue);
10341063
if (r)
10351064
ret = r;
1065+
1066+
amdgpu_userq_put(queue);
10361067
}
10371068

10381069
if (ret)
@@ -1266,9 +1297,13 @@ amdgpu_userq_evict_all(struct amdgpu_userq_mgr *uq_mgr)
12661297
amdgpu_userq_detect_and_reset_queues(uq_mgr);
12671298
/* Try to unmap all the queues in this process ctx */
12681299
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
1300+
queue = amdgpu_userq_get(uq_mgr, queue_id);
1301+
if (!queue)
1302+
continue;
12691303
r = amdgpu_userq_preempt_helper(queue);
12701304
if (r)
12711305
ret = r;
1306+
amdgpu_userq_put(queue);
12721307
}
12731308

12741309
if (ret)
@@ -1301,16 +1336,24 @@ amdgpu_userq_wait_for_signal(struct amdgpu_userq_mgr *uq_mgr)
13011336
int ret;
13021337

13031338
xa_for_each(&uq_mgr->userq_xa, queue_id, queue) {
1339+
queue = amdgpu_userq_get(uq_mgr, queue_id);
1340+
if (!queue)
1341+
continue;
1342+
13041343
struct dma_fence *f = queue->last_fence;
13051344

1306-
if (!f || dma_fence_is_signaled(f))
1345+
if (!f || dma_fence_is_signaled(f)) {
1346+
amdgpu_userq_put(queue);
13071347
continue;
1348+
}
13081349
ret = dma_fence_wait_timeout(f, true, msecs_to_jiffies(100));
13091350
if (ret <= 0) {
13101351
drm_file_err(uq_mgr->file, "Timed out waiting for fence=%llu:%llu\n",
13111352
f->context, f->seqno);
1353+
amdgpu_userq_put(queue);
13121354
return -ETIMEDOUT;
13131355
}
1356+
amdgpu_userq_put(queue);
13141357
}
13151358

13161359
return 0;
@@ -1361,20 +1404,23 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *f
13611404
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr)
13621405
{
13631406
struct amdgpu_usermode_queue *queue;
1364-
unsigned long queue_id;
1407+
unsigned long queue_id = 0;
1408+
1409+
for (;;) {
1410+
xa_lock(&userq_mgr->userq_xa);
1411+
queue = xa_find(&userq_mgr->userq_xa, &queue_id, ULONG_MAX,
1412+
XA_PRESENT);
1413+
if (queue)
1414+
__xa_erase(&userq_mgr->userq_xa, queue_id);
1415+
xa_unlock(&userq_mgr->userq_xa);
13651416

1366-
cancel_delayed_work_sync(&userq_mgr->resume_work);
1417+
if (!queue)
1418+
break;
13671419

1368-
mutex_lock(&userq_mgr->userq_mutex);
1369-
amdgpu_userq_detect_and_reset_queues(userq_mgr);
1370-
xa_for_each(&userq_mgr->userq_xa, queue_id, queue) {
1371-
amdgpu_userq_wait_for_last_fence(queue);
1372-
amdgpu_userq_unmap_helper(queue);
1373-
amdgpu_userq_cleanup(queue, queue_id);
1420+
amdgpu_userq_put(queue);
13741421
}
13751422

13761423
xa_destroy(&userq_mgr->userq_xa);
1377-
mutex_unlock(&userq_mgr->userq_mutex);
13781424
mutex_destroy(&userq_mgr->userq_mutex);
13791425
}
13801426

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ struct amdgpu_usermode_queue {
7474
struct dentry *debugfs_queue;
7575
struct delayed_work hang_detect_work;
7676
struct dma_fence *hang_detect_fence;
77+
struct kref refcount;
7778

7879
struct list_head userq_va_list;
7980
};
@@ -112,6 +113,9 @@ struct amdgpu_db_info {
112113
struct amdgpu_userq_obj *db_obj;
113114
};
114115

116+
struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid);
117+
void amdgpu_userq_put(struct amdgpu_usermode_queue *queue);
118+
115119
int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
116120

117121
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,

drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
466466
struct drm_amdgpu_userq_signal *args = data;
467467
struct drm_gem_object **gobj_write = NULL;
468468
struct drm_gem_object **gobj_read = NULL;
469-
struct amdgpu_usermode_queue *queue;
469+
struct amdgpu_usermode_queue *queue = NULL;
470470
struct amdgpu_userq_fence *userq_fence;
471471
struct drm_syncobj **syncobj = NULL;
472472
u32 *bo_handles_write, num_write_bo_handles;
@@ -553,7 +553,7 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
553553
}
554554

555555
/* Retrieve the user queue */
556-
queue = xa_load(&userq_mgr->userq_xa, args->queue_id);
556+
queue = amdgpu_userq_get(userq_mgr, args->queue_id);
557557
if (!queue) {
558558
r = -ENOENT;
559559
goto put_gobj_write;
@@ -648,6 +648,9 @@ int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
648648
free_syncobj_handles:
649649
kfree(syncobj_handles);
650650

651+
if (queue)
652+
amdgpu_userq_put(queue);
653+
651654
return r;
652655
}
653656

@@ -660,7 +663,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
660663
struct drm_amdgpu_userq_wait *wait_info = data;
661664
struct amdgpu_fpriv *fpriv = filp->driver_priv;
662665
struct amdgpu_userq_mgr *userq_mgr = &fpriv->userq_mgr;
663-
struct amdgpu_usermode_queue *waitq;
666+
struct amdgpu_usermode_queue *waitq = NULL;
664667
struct drm_gem_object **gobj_write;
665668
struct drm_gem_object **gobj_read;
666669
struct dma_fence **fences = NULL;
@@ -926,7 +929,7 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
926929
*/
927930
num_fences = dma_fence_dedup_array(fences, num_fences);
928931

929-
waitq = xa_load(&userq_mgr->userq_xa, wait_info->waitq_id);
932+
waitq = amdgpu_userq_get(userq_mgr, wait_info->waitq_id);
930933
if (!waitq) {
931934
r = -EINVAL;
932935
goto free_fences;
@@ -1014,5 +1017,8 @@ int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
10141017
free_bo_handles_read:
10151018
kfree(bo_handles_read);
10161019

1020+
if (waitq)
1021+
amdgpu_userq_put(waitq);
1022+
10171023
return r;
10181024
}

0 commit comments

Comments
 (0)