aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1324-drm-amdkfd-Fix-a-bug-that-vmid-is-released-before-re.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1324-drm-amdkfd-Fix-a-bug-that-vmid-is-released-before-re.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/1324-drm-amdkfd-Fix-a-bug-that-vmid-is-released-before-re.patch169
1 files changed, 169 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1324-drm-amdkfd-Fix-a-bug-that-vmid-is-released-before-re.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1324-drm-amdkfd-Fix-a-bug-that-vmid-is-released-before-re.patch
new file mode 100644
index 00000000..fa464a14
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1324-drm-amdkfd-Fix-a-bug-that-vmid-is-released-before-re.patch
@@ -0,0 +1,169 @@
+From 7696176f7336a289a5adb3f7e0366b29453a88f9 Mon Sep 17 00:00:00 2001
+From: Yong Zhao <Yong.Zhao@amd.com>
+Date: Fri, 28 Jul 2017 18:15:46 -0400
+Subject: [PATCH 1324/4131] drm/amdkfd: Fix a bug that vmid is released before
+ resetting wavefronts
+
+When no HWS is used, vmid is always released after the last queue is
+destroyed rather than when the process terminates. With the current code,
+when a process terminates with all queues destroyed and somehow we need
+to reset wavefronts, dbgdev_wave_reset_wavefronts() will fail because
+no vmid is bound to this process any more.
+
+With this commit, we will reset the wavefronts, if needed, just before
+releasing the vmid. As part of the change, the wavefronts reset handling
+is moved to DQM from PQM, resulting in clearer logic.
+
+Change-Id: Ib72b7dc1d910045130928a8e20729b884a55b335
+Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
+
+ Conflicts[4.12]:
+ drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+
+ Conflicts:
+ drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+---
+ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 24 +++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 11 +++++-----
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 -
+ .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 15 --------------
+ 4 files changed, 29 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 77cabd1..8dbbbeb 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -421,12 +421,26 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
+ KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+ KFD_HIQ_TIMEOUT,
+ q->pipe, q->queue);
++ if (retval == -ETIME)
++ qpd->reset_wavefronts = true;
+
+ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+
+ list_del(&q->list);
+- if (list_empty(&qpd->queues_list))
++ if (list_empty(&qpd->queues_list)) {
++ if (qpd->reset_wavefronts) {
++ pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
++ dqm->dev);
++ /* dbgdev_wave_reset_wavefronts has to be called before
++ * deallocate_vmid(), i.e. when vmid is still in use.
++ */
++ dbgdev_wave_reset_wavefronts(dqm->dev,
++ qpd->pqm->process);
++ qpd->reset_wavefronts = false;
++ }
++
+ deallocate_vmid(dqm, qpd, q);
++ }
+ if (q->properties.is_active)
+ dqm->queue_count--;
+
+@@ -1307,6 +1321,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
+ dqm->queue_count--;
+
+ retval = execute_queues_cpsch(dqm, false, false);
++ if (retval == -ETIME)
++ qpd->reset_wavefronts = true;
+
+ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+
+@@ -1533,6 +1549,12 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
+
+ retval = execute_queues_cpsch(dqm, true, true);
+
++ if (retval || qpd->reset_wavefronts) {
++ pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
++ dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
++ qpd->reset_wavefronts = false;
++ }
++
+ /* lastly, free mqd resources */
+ list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+ mqd = dqm->ops.get_mqd_manager(dqm,
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index c8af486..fe0f482 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -544,6 +544,12 @@ struct qcm_process_device {
+ unsigned int vmid;
+ bool is_debug;
+ unsigned int evicted; /* eviction counter, 0=active */
++
++ /* This flag tells if we should reset all wavefronts on
++ * process termination
++ */
++ bool reset_wavefronts;
++
+ /*
+ * All the memory management data should be here too
+ */
+@@ -645,11 +651,6 @@ struct kfd_process_device {
+ /* GPUVM allocations storage */
+ struct idr alloc_idr;
+
+- /* This flag tells if we should reset all
+- * wavefronts on process termination
+- */
+- bool reset_wavefronts;
+-
+ /* Flag used to tell the pdd has dequeued from the dqm.
+ * This is used to prevent dev->dqm->ops.process_termination() from
+ * being called twice when it is already called in IOMMU callback
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 39d9e6d2..baf1f75 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -705,7 +705,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
+ pdd->qpd.dqm = dev->dqm;
+ pdd->qpd.pqm = &p->pqm;
+ pdd->qpd.evicted = 0;
+- pdd->reset_wavefronts = false;
+ pdd->process = p;
+ pdd->bound = PDD_UNBOUND;
+ pdd->already_dequeued = false;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+index 5394866..32e782d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+@@ -66,7 +66,6 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
+ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
+ {
+ struct kfd_dev *dev = pdd->dev;
+- struct kfd_process *p = pdd->process;
+ int retval;
+
+ if (pdd->already_dequeued)
+@@ -74,16 +73,6 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
+
+ retval = dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+ pdd->already_dequeued = true;
+- /* Checking pdd->reset_wavefronts may not be needed, because
+- * if reset_wavefronts was set to true before, which means unmapping
+- * failed, process_termination should fail too until we reset
+- * wavefronts. Now we put the check there to be safe.
+- */
+- if (retval || pdd->reset_wavefronts) {
+- pr_warn("Resetting wave fronts on dev %p\n", dev);
+- dbgdev_wave_reset_wavefronts(dev, p);
+- pdd->reset_wavefronts = false;
+- }
+ }
+
+ void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
+@@ -337,10 +326,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
+ kfree(pqn->q->properties.cu_mask);
+ pqn->q->properties.cu_mask = NULL;
+ retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
+- if (retval != 0) {
+- if (retval == -ETIME)
+- pdd->reset_wavefronts = true;
+- }
+ uninit_queue(pqn->q);
+ }
+
+--
+2.7.4
+