diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch new file mode 100644 index 00000000..3a7ddd9a --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch @@ -0,0 +1,134 @@ +From c61a07fd15a9ffb3c3c2f5337dd4e44b407e49b4 Mon Sep 17 00:00:00 2001 +From: Philip Yang <Philip.Yang@amd.com> +Date: Wed, 5 Dec 2018 14:03:43 -0500 +Subject: [PATCH 2766/2940] drm/amdkfd: avoid HMM change cause circular lock +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There is circular lock between gfx and kfd path with HMM change: +lock(dqm) -> bo::reserve -> amdgpu_mn_lock + +To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested +locking between mmap_sem and bo::reserve. The locking order +is: bo::reserve -> amdgpu_mn_lock(p->mn) + +Change-Id: Ifb2a8af2a17d2d78ae3df7aa782785964c22fd15 +Signed-off-by: Philip Yang <Philip.Yang@amd.com> +Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> +Acked-by: Christian König <christian.koenig@amd.com> +--- + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 31 ++++++++++--------- + 1 file changed, 16 insertions(+), 15 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 525dea08d208..602284ffd976 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -1196,28 +1196,27 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + int retval; + struct mqd_manager *mqd_mgr; + +- retval = 0; +- +- dqm_lock(dqm); +- + if (dqm->total_queue_count >= max_num_of_queues_per_device) { + pr_warn("Can't create new usermode queue because %d queues were already created\n", + dqm->total_queue_count); + retval = -EPERM; +- goto out_unlock; ++ goto out; + } + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + retval = allocate_sdma_queue(dqm, q); + if (retval) +- goto out_unlock; ++ goto out; + } + + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + ++ /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: ++ * lock(dqm) -> bo::Reserves ++ */ + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; + /* +@@ -1228,9 +1227,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); +- + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); +- + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, +@@ -1238,6 +1235,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + if (retval) + goto out_deallocate_doorbell; + ++ dqm_lock(dqm); ++ + list_add(&q->list, &qpd->queues_list); + qpd->queue_count++; + if (q->properties.is_active) { +@@ -1268,9 +1267,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); +-out_unlock: +- dqm_unlock(dqm); +- ++out: + return retval; + } + +@@ -1436,8 +1433,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + qpd->reset_wavefronts = true; + } + +- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); +- + /* + * Unconditionally decrement this counter, regardless of the queue's + * type +@@ -1448,6 +1443,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + + dqm_unlock(dqm); + ++ /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ ++ mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); ++ + return retval; + + failed_try_destroy_debugged_queue: +@@ -1669,7 +1667,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + qpd->reset_wavefronts = false; + } + +- /* lastly, free mqd resources */ ++ dqm_unlock(dqm); ++ ++ /* Lastly, free mqd resources. ++ * Do uninit_mqd() after dqm_unlock to avoid circular locking. ++ */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; +@@ -1678,7 +1680,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + } + +- dqm_unlock(dqm); + return retval; + } + +-- +2.17.1 + |