aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch312
1 files changed, 0 insertions, 312 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch
deleted file mode 100644
index 264ff29d..00000000
--- a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch
+++ /dev/null
@@ -1,312 +0,0 @@
-From 45b36801b47893b0e57e8256e733f27d18c19a0a Mon Sep 17 00:00:00 2001
-From: Yong Zhao <yong.zhao@amd.com>
-Date: Mon, 30 May 2016 20:47:17 -0400
-Subject: [PATCH 1443/4131] drm/amdkfd: Fix suspend/resume issue on Carrizo
-
-When we do suspend/resume through "sudo pm-suspend" while there is
-HSA activity running, upon resume we will encounter HWS hanging, which
-is caused by memory read/write failures. The root cause is that when
-suspend, we neglected to unbind pasid from kfd device.
-
-Another major change is that the bind/unbinding is changed to be
-performed on a per process basis, instead of whether there are queues
-in dqm.
-
-There are some other small changes as well.
-
-Change-Id: If9ac972fc4309b688f6c1d07e27cede54814410e
-Signed-off-by: Yong Zhao <yong.zhao@amd.com>
-
- Conflicts:
- drivers/gpu/drm/amd/amdkfd/kfd_process.c
----
- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 26 +++---
- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 13 ---
- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 14 +++-
- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 94 ++++++++++++++++++----
- 4 files changed, 105 insertions(+), 42 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-index 8bf209d..aae4e5a 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-@@ -258,7 +258,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
- struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
-
- if (dev)
-- kfd_unbind_process_from_device(dev, pasid);
-+ kfd_process_iommu_unbind_callback(dev, pasid);
- }
-
- /*
-@@ -488,14 +488,18 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
- {
- BUG_ON(kfd == NULL);
-
-- if (kfd->init_complete) {
-- kfd->dqm->ops.stop(kfd->dqm);
-- if (kfd->device_info->is_need_iommu_device) {
-- amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
-- amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
-- amd_iommu_free_device(kfd->pdev);
-- }
-- }
-+ if (!kfd->init_complete)
-+ return;
-+
-+ kfd->dqm->ops.stop(kfd->dqm);
-+ if (!kfd->device_info->is_need_iommu_device)
-+ return;
-+
-+ kfd_unbind_processes_from_device(kfd);
-+
-+ amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
-+ amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
-+ amd_iommu_free_device(kfd->pdev);
- }
-
- int kgd2kfd_evict_bo(struct kfd_dev *dev, void *mem)
-@@ -533,6 +537,10 @@ static int kfd_resume(struct kfd_dev *kfd)
- iommu_pasid_shutdown_callback);
- amd_iommu_set_invalid_ppr_cb(kfd->pdev,
- iommu_invalid_ppr_cb);
-+
-+ err = kfd_bind_processes_to_device(kfd);
-+ if (err)
-+ return -ENXIO;
- }
-
- err = kfd->dqm->ops.start(kfd->dqm);
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
-index 5fb3c1e..6f01393 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
-@@ -863,7 +863,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
-
- static int start_cpsch(struct device_queue_manager *dqm)
- {
-- struct device_process_node *node;
- int retval;
-
- BUG_ON(!dqm);
-@@ -892,11 +891,6 @@ static int start_cpsch(struct device_queue_manager *dqm)
-
- init_interrupts(dqm);
-
-- list_for_each_entry(node, &dqm->queues, list)
-- if (node->qpd->pqm->process && dqm->dev)
-- kfd_bind_process_to_device(dqm->dev,
-- node->qpd->pqm->process);
--
- mutex_lock(&dqm->lock);
- execute_queues_cpsch(dqm, false);
- mutex_unlock(&dqm->lock);
-@@ -911,9 +905,6 @@ static int start_cpsch(struct device_queue_manager *dqm)
-
- static int stop_cpsch(struct device_queue_manager *dqm)
- {
-- struct device_process_node *node;
-- struct kfd_process_device *pdd;
--
- BUG_ON(!dqm);
-
- mutex_lock(&dqm->lock);
-@@ -922,10 +913,6 @@ static int stop_cpsch(struct device_queue_manager *dqm)
-
- mutex_unlock(&dqm->lock);
-
-- list_for_each_entry(node, &dqm->queues, list) {
-- pdd = qpd_to_pdd(node->qpd);
-- pdd->bound = false;
-- }
- kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
- pm_uninit(&dqm->packets);
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-index 1715c74..6727e4a 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-@@ -531,6 +531,12 @@ struct qcm_process_device {
- #define GET_GPU_ID(handle) (handle >> 32)
- #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
-
-+enum kfd_pdd_bound {
-+ PDD_UNBOUND = 0,
-+ PDD_BOUND,
-+ PDD_BOUND_SUSPENDED,
-+};
-+
- /* Data that is per-process-per device. */
- struct kfd_process_device {
- /*
-@@ -564,7 +570,7 @@ struct kfd_process_device {
- uint64_t sh_hidden_private_base_vmid;
-
- /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
-- bool bound;
-+ enum kfd_pdd_bound bound;
-
- /* VM context for GPUVM allocations */
- void *vm;
-@@ -658,8 +664,10 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
- struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
-
- struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
-- struct kfd_process *p);
--void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid);
-+ struct kfd_process *p);
-+int kfd_bind_processes_to_device(struct kfd_dev *dev);
-+void kfd_unbind_processes_from_device(struct kfd_dev *dev);
-+void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
- struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
- struct kfd_process *p);
- struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-index 2c371cd..3b312b7 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-@@ -319,8 +319,13 @@ static void kfd_process_wq_release(struct work_struct *work)
- pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
- pdd->dev->id, p->pasid);
-
-- if (pdd->dev->device_info->is_need_iommu_device)
-- amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
-+ if (pdd->dev->device_info->is_need_iommu_device) {
-+ if (pdd->bound == PDD_BOUND) {
-+ amd_iommu_unbind_pasid(pdd->dev->pdev,
-+ p->pasid);
-+ pdd->bound = PDD_UNBOUND;
-+ }
-+ }
-
- /*
- * Remove all handles from idr and release appropriate
-@@ -616,9 +621,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
-
- list_for_each_entry(pdd, &p->per_device_data, per_device_list)
- if (pdd->dev == dev)
-- break;
-+ return pdd;
-
-- return pdd;
-+ return NULL;
- }
-
- struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
-@@ -636,6 +641,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
- pdd->qpd.evicted = 0;
- pdd->reset_wavefronts = false;
- pdd->process = p;
-+ pdd->bound = PDD_UNBOUND;
- list_add(&pdd->per_device_list, &p->per_device_data);
-
- /* Init idr used for memory handle translation */
-@@ -672,21 +678,85 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
- return ERR_PTR(-ENOMEM);
- }
-
-- if (pdd->bound)
-+ if (pdd->bound == PDD_BOUND)
- return pdd;
-
-+ if (pdd->bound == PDD_BOUND_SUSPENDED) {
-+ pr_err("kfd: binding PDD_BOUND_SUSPENDED pdd is unexpected!\n");
-+ return ERR_PTR(-EINVAL);
-+ }
-+
- if (dev->device_info->is_need_iommu_device) {
- err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
- if (err < 0)
- return ERR_PTR(err);
- }
-
-- pdd->bound = true;
-+ pdd->bound = PDD_BOUND;
-
- return pdd;
- }
-
--void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
-+int kfd_bind_processes_to_device(struct kfd_dev *dev)
-+{
-+ struct kfd_process_device *pdd;
-+ struct kfd_process *p;
-+ unsigned int temp;
-+ int err = 0;
-+
-+ int idx = srcu_read_lock(&kfd_processes_srcu);
-+
-+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-+ down_write(&p->lock);
-+ pdd = kfd_get_process_device_data(dev, p);
-+ if (pdd->bound != PDD_BOUND_SUSPENDED) {
-+ up_write(&p->lock);
-+ continue;
-+ }
-+
-+ err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
-+ p->lead_thread);
-+ if (err < 0) {
-+ pr_err("unexpected pasid %d binding failure\n",
-+ p->pasid);
-+ up_write(&p->lock);
-+ break;
-+ }
-+
-+ pdd->bound = PDD_BOUND;
-+ up_write(&p->lock);
-+ }
-+
-+ srcu_read_unlock(&kfd_processes_srcu, idx);
-+
-+ return err;
-+}
-+
-+void kfd_unbind_processes_from_device(struct kfd_dev *dev)
-+{
-+ struct kfd_process_device *pdd;
-+ struct kfd_process *p;
-+ unsigned int temp, temp_bound, temp_pasid;
-+
-+ int idx = srcu_read_lock(&kfd_processes_srcu);
-+
-+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
-+ down_write(&p->lock);
-+ pdd = kfd_get_process_device_data(dev, p);
-+ temp_bound = pdd->bound;
-+ temp_pasid = p->pasid;
-+ if (pdd->bound == PDD_BOUND)
-+ pdd->bound = PDD_BOUND_SUSPENDED;
-+ up_write(&p->lock);
-+
-+ if (temp_bound == PDD_BOUND)
-+ amd_iommu_unbind_pasid(dev->pdev, temp_pasid);
-+ }
-+
-+ srcu_read_unlock(&kfd_processes_srcu, idx);
-+}
-+
-+void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
- {
- struct kfd_process *p;
- struct kfd_process_device *pdd;
-@@ -722,16 +792,6 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
- pdd->reset_wavefronts = false;
- }
-
-- /*
-- * Just mark pdd as unbound, because we still need it
-- * to call amd_iommu_unbind_pasid() in when the
-- * process exits.
-- * We don't call amd_iommu_unbind_pasid() here
-- * because the IOMMU called us.
-- */
-- if (pdd)
-- pdd->bound = false;
--
- up_write(&p->lock);
- }
-
---
-2.7.4
-