Skip to content

Commit a7b2451

Browse files
amd-yahuilalexdeucher
authored andcommitted
drm/amdkfd: Fix circular lock in nocpsch path
Calling free_mqd inside of destroy_queue_nocpsch_locked can cause a circular lock. destroy_queue_nocpsch_locked is called under a DQM lock, which is taken in MMU notifiers, potentially in FS reclaim context. Taking another lock, which is BO reservation lock from free_mqd, while causing an FS reclaim inside the DQM lock creates a problematic circular lock dependency. Therefore move free_mqd out of destroy_queue_nocpsch_locked and call it after unlocking DQM. Signed-off-by: Amber Lin <Amber.Lin@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent d760895 commit a7b2451

1 file changed

Lines changed: 13 additions & 5 deletions

File tree

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -486,9 +486,6 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
486486
if (retval == -ETIME)
487487
qpd->reset_wavefronts = true;
488488

489-
490-
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
491-
492489
list_del(&q->list);
493490
if (list_empty(&qpd->queues_list)) {
494491
if (qpd->reset_wavefronts) {
@@ -523,6 +520,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
523520
int retval;
524521
uint64_t sdma_val = 0;
525522
struct kfd_process_device *pdd = qpd_to_pdd(qpd);
523+
struct mqd_manager *mqd_mgr =
524+
dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
526525

527526
/* Get the SDMA queue stats */
528527
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
@@ -540,6 +539,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
540539
pdd->sdma_past_activity_counter += sdma_val;
541540
dqm_unlock(dqm);
542541

542+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
543+
543544
return retval;
544545
}
545546

@@ -1629,20 +1630,27 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
16291630
static int process_termination_nocpsch(struct device_queue_manager *dqm,
16301631
struct qcm_process_device *qpd)
16311632
{
1632-
struct queue *q, *next;
1633+
struct queue *q;
16331634
struct device_process_node *cur, *next_dpn;
16341635
int retval = 0;
16351636
bool found = false;
16361637

16371638
dqm_lock(dqm);
16381639

16391640
/* Clear all user mode queues */
1640-
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1641+
while (!list_empty(&qpd->queues_list)) {
1642+
struct mqd_manager *mqd_mgr;
16411643
int ret;
16421644

1645+
q = list_first_entry(&qpd->queues_list, struct queue, list);
1646+
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1647+
q->properties.type)];
16431648
ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
16441649
if (ret)
16451650
retval = ret;
1651+
dqm_unlock(dqm);
1652+
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1653+
dqm_lock(dqm);
16461654
}
16471655

16481656
/* Unregister process */

0 commit comments

Comments
 (0)