diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch | 312 |
1 files changed, 0 insertions, 312 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch deleted file mode 100644 index 264ff29d..00000000 --- a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1443-drm-amdkfd-Fix-suspend-resume-issue-on-Carrizo.patch +++ /dev/null @@ -1,312 +0,0 @@ -From 45b36801b47893b0e57e8256e733f27d18c19a0a Mon Sep 17 00:00:00 2001 -From: Yong Zhao <yong.zhao@amd.com> -Date: Mon, 30 May 2016 20:47:17 -0400 -Subject: [PATCH 1443/4131] drm/amdkfd: Fix suspend/resume issue on Carrizo - -When we do suspend/resume through "sudo pm-suspend" while there is -HSA activity running, upon resume we will encounter HWS hanging, which -is caused by memory read/write failures. The root cause is that when -suspend, we neglected to unbind pasid from kfd device. - -Another major change is that the bind/unbinding is changed to be -performed on a per process basis, instead of whether there are queues -in dqm. - -There are some other small changes as well. - -Change-Id: If9ac972fc4309b688f6c1d07e27cede54814410e -Signed-off-by: Yong Zhao <yong.zhao@amd.com> - - Conflicts: - drivers/gpu/drm/amd/amdkfd/kfd_process.c ---- - drivers/gpu/drm/amd/amdkfd/kfd_device.c | 26 +++--- - .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 13 --- - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 14 +++- - drivers/gpu/drm/amd/amdkfd/kfd_process.c | 94 ++++++++++++++++++---- - 4 files changed, 105 insertions(+), 42 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c -index 8bf209d..aae4e5a 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c -@@ -258,7 +258,7 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) - struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); - - if (dev) -- kfd_unbind_process_from_device(dev, pasid); -+ kfd_process_iommu_unbind_callback(dev, pasid); - } - - /* -@@ -488,14 +488,18 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) - { - BUG_ON(kfd == NULL); - -- if (kfd->init_complete) { -- kfd->dqm->ops.stop(kfd->dqm); -- if (kfd->device_info->is_need_iommu_device) { -- amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); -- amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); -- amd_iommu_free_device(kfd->pdev); -- } -- } -+ if (!kfd->init_complete) -+ return; -+ -+ kfd->dqm->ops.stop(kfd->dqm); -+ if (!kfd->device_info->is_need_iommu_device) -+ return; -+ -+ kfd_unbind_processes_from_device(kfd); -+ -+ amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); -+ amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); -+ amd_iommu_free_device(kfd->pdev); - } - - int kgd2kfd_evict_bo(struct kfd_dev *dev, void *mem) -@@ -533,6 +537,10 @@ static int kfd_resume(struct kfd_dev *kfd) - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, - iommu_invalid_ppr_cb); -+ -+ err = kfd_bind_processes_to_device(kfd); -+ if (err) -+ return -ENXIO; - } - - err = kfd->dqm->ops.start(kfd->dqm); -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -index 5fb3c1e..6f01393 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c -@@ -863,7 +863,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm) - - static int start_cpsch(struct device_queue_manager *dqm) - { -- struct device_process_node *node; - int retval; - - BUG_ON(!dqm); -@@ -892,11 +891,6 @@ static int start_cpsch(struct device_queue_manager *dqm) - - init_interrupts(dqm); - -- list_for_each_entry(node, &dqm->queues, list) -- if (node->qpd->pqm->process && dqm->dev) -- kfd_bind_process_to_device(dqm->dev, -- node->qpd->pqm->process); -- - mutex_lock(&dqm->lock); - execute_queues_cpsch(dqm, false); - mutex_unlock(&dqm->lock); -@@ -911,9 +905,6 @@ static int start_cpsch(struct device_queue_manager *dqm) - - static int stop_cpsch(struct device_queue_manager *dqm) - { -- struct device_process_node *node; -- struct kfd_process_device *pdd; -- - BUG_ON(!dqm); - - mutex_lock(&dqm->lock); -@@ -922,10 +913,6 @@ static int stop_cpsch(struct device_queue_manager *dqm) - - mutex_unlock(&dqm->lock); - -- list_for_each_entry(node, &dqm->queues, list) { -- pdd = qpd_to_pdd(node->qpd); -- pdd->bound = false; -- } - kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packets); - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -index 1715c74..6727e4a 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -@@ -531,6 +531,12 @@ struct qcm_process_device { - #define GET_GPU_ID(handle) (handle >> 32) - #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) - -+enum kfd_pdd_bound { -+ PDD_UNBOUND = 0, -+ PDD_BOUND, -+ PDD_BOUND_SUSPENDED, -+}; -+ - /* Data that is per-process-per device. */ - struct kfd_process_device { - /* -@@ -564,7 +570,7 @@ struct kfd_process_device { - uint64_t sh_hidden_private_base_vmid; - - /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ -- bool bound; -+ enum kfd_pdd_bound bound; - - /* VM context for GPUVM allocations */ - void *vm; -@@ -658,8 +664,10 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); - struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); - - struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, -- struct kfd_process *p); --void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); -+ struct kfd_process *p); -+int kfd_bind_processes_to_device(struct kfd_dev *dev); -+void kfd_unbind_processes_from_device(struct kfd_dev *dev); -+void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); - struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, - struct kfd_process *p); - struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c -index 2c371cd..3b312b7 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c -@@ -319,8 +319,13 @@ static void kfd_process_wq_release(struct work_struct *work) - pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", - pdd->dev->id, p->pasid); - -- if (pdd->dev->device_info->is_need_iommu_device) -- amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); -+ if (pdd->dev->device_info->is_need_iommu_device) { -+ if (pdd->bound == PDD_BOUND) { -+ amd_iommu_unbind_pasid(pdd->dev->pdev, -+ p->pasid); -+ pdd->bound = PDD_UNBOUND; -+ } -+ } - - /* - * Remove all handles from idr and release appropriate -@@ -616,9 +621,9 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, - - list_for_each_entry(pdd, &p->per_device_data, per_device_list) - if (pdd->dev == dev) -- break; -+ return pdd; - -- return pdd; -+ return NULL; - } - - struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, -@@ -636,6 +641,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, - pdd->qpd.evicted = 0; - pdd->reset_wavefronts = false; - pdd->process = p; -+ pdd->bound = PDD_UNBOUND; - list_add(&pdd->per_device_list, &p->per_device_data); - - /* Init idr used for memory handle translation */ -@@ -672,21 +678,85 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, - return ERR_PTR(-ENOMEM); - } - -- if (pdd->bound) -+ if (pdd->bound == PDD_BOUND) - return pdd; - -+ if (pdd->bound == PDD_BOUND_SUSPENDED) { -+ pr_err("kfd: binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); -+ return ERR_PTR(-EINVAL); -+ } -+ - if (dev->device_info->is_need_iommu_device) { - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); - if (err < 0) - return ERR_PTR(err); - } - -- pdd->bound = true; -+ pdd->bound = PDD_BOUND; - - return pdd; - } - --void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) -+int kfd_bind_processes_to_device(struct kfd_dev *dev) -+{ -+ struct kfd_process_device *pdd; -+ struct kfd_process *p; -+ unsigned int temp; -+ int err = 0; -+ -+ int idx = srcu_read_lock(&kfd_processes_srcu); -+ -+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { -+ down_write(&p->lock); -+ pdd = kfd_get_process_device_data(dev, p); -+ if (pdd->bound != PDD_BOUND_SUSPENDED) { -+ up_write(&p->lock); -+ continue; -+ } -+ -+ err = amd_iommu_bind_pasid(dev->pdev, p->pasid, -+ p->lead_thread); -+ if (err < 0) { -+ pr_err("unexpected pasid %d binding failure\n", -+ p->pasid); -+ up_write(&p->lock); -+ break; -+ } -+ -+ pdd->bound = PDD_BOUND; -+ up_write(&p->lock); -+ } -+ -+ srcu_read_unlock(&kfd_processes_srcu, idx); -+ -+ return err; -+} -+ -+void kfd_unbind_processes_from_device(struct kfd_dev *dev) -+{ -+ struct kfd_process_device *pdd; -+ struct kfd_process *p; -+ unsigned int temp, temp_bound, temp_pasid; -+ -+ int idx = srcu_read_lock(&kfd_processes_srcu); -+ -+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { -+ down_write(&p->lock); -+ pdd = kfd_get_process_device_data(dev, p); -+ temp_bound = pdd->bound; -+ temp_pasid = p->pasid; -+ if (pdd->bound == PDD_BOUND) -+ pdd->bound = PDD_BOUND_SUSPENDED; -+ up_write(&p->lock); -+ -+ if (temp_bound == PDD_BOUND) -+ amd_iommu_unbind_pasid(dev->pdev, temp_pasid); -+ } -+ -+ srcu_read_unlock(&kfd_processes_srcu, idx); -+} -+ -+void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) - { - struct kfd_process *p; - struct kfd_process_device *pdd; -@@ -722,16 +792,6 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) - pdd->reset_wavefronts = false; - } - -- /* -- * Just mark pdd as unbound, because we still need it -- * to call amd_iommu_unbind_pasid() in when the -- * process exits. -- * We don't call amd_iommu_unbind_pasid() here -- * because the IOMMU called us. -- */ -- if (pdd) -- pdd->bound = false; -- - up_write(&p->lock); - } - --- -2.7.4 - |