aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch134
1 files changed, 134 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch
new file mode 100644
index 00000000..3a7ddd9a
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2766-drm-amdkfd-avoid-HMM-change-cause-circular-lock.patch
@@ -0,0 +1,134 @@
+From c61a07fd15a9ffb3c3c2f5337dd4e44b407e49b4 Mon Sep 17 00:00:00 2001
+From: Philip Yang <Philip.Yang@amd.com>
+Date: Wed, 5 Dec 2018 14:03:43 -0500
+Subject: [PATCH 2766/2940] drm/amdkfd: avoid HMM change cause circular lock
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+There is circular lock between gfx and kfd path with HMM change:
+lock(dqm) -> bo::reserve -> amdgpu_mn_lock
+
+To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
+locking between mmap_sem and bo::reserve. The locking order
+is: bo::reserve -> amdgpu_mn_lock(p->mn)
+
+Change-Id: Ifb2a8af2a17d2d78ae3df7aa782785964c22fd15
+Signed-off-by: Philip Yang <Philip.Yang@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+---
+ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 31 ++++++++++---------
+ 1 file changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 525dea08d208..602284ffd976 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -1196,28 +1196,27 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ int retval;
+ struct mqd_manager *mqd_mgr;
+
+- retval = 0;
+-
+- dqm_lock(dqm);
+-
+ if (dqm->total_queue_count >= max_num_of_queues_per_device) {
+ pr_warn("Can't create new usermode queue because %d queues were already created\n",
+ dqm->total_queue_count);
+ retval = -EPERM;
+- goto out_unlock;
++ goto out;
+ }
+
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
+ retval = allocate_sdma_queue(dqm, q);
+ if (retval)
+- goto out_unlock;
++ goto out;
+ }
+
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_sdma_queue;
+
++ /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
++ * lock(dqm) -> bo::Reserves
++ */
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
+ /*
+@@ -1228,9 +1227,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ q->properties.is_evicted = (q->properties.queue_size > 0 &&
+ q->properties.queue_percent > 0 &&
+ q->properties.queue_address != 0);
+-
+ dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+-
+ q->properties.tba_addr = qpd->tba_addr;
+ q->properties.tma_addr = qpd->tma_addr;
+ retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
+@@ -1238,6 +1235,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ if (retval)
+ goto out_deallocate_doorbell;
+
++ dqm_lock(dqm);
++
+ list_add(&q->list, &qpd->queues_list);
+ qpd->queue_count++;
+ if (q->properties.is_active) {
+@@ -1268,9 +1267,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
+ if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+ deallocate_sdma_queue(dqm, q);
+-out_unlock:
+- dqm_unlock(dqm);
+-
++out:
+ return retval;
+ }
+
+@@ -1436,8 +1433,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+ qpd->reset_wavefronts = true;
+ }
+
+- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+-
+ /*
+ * Unconditionally decrement this counter, regardless of the queue's
+ * type
+@@ -1448,6 +1443,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+
+ dqm_unlock(dqm);
+
++ /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
++ mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
++
+ return retval;
+
+ failed_try_destroy_debugged_queue:
+@@ -1669,7 +1667,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
+ qpd->reset_wavefronts = false;
+ }
+
+- /* lastly, free mqd resources */
++ dqm_unlock(dqm);
++
++ /* Lastly, free mqd resources.
++ * Do uninit_mqd() after dqm_unlock to avoid circular locking.
++ */
+ list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+ mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
+ q->properties.type)];
+@@ -1678,7 +1680,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
+ mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+ }
+
+- dqm_unlock(dqm);
+ return retval;
+ }
+
+--
+2.17.1
+