aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch141
1 files changed, 141 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch
new file mode 100644
index 00000000..3fbb184b
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch
@@ -0,0 +1,141 @@
+From c861d6b46c53645ff531e90e774bcca8de72c2d0 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Wed, 17 Jan 2018 11:39:31 -0500
+Subject: [PATCH 3184/4131] drm/amdgpu: Fix potential deadlock when restoring
+ BOs
+
+Locking the PD reservation object when querying the PD address while
+restarting the queues can lead to a deadlock with an eviction waiting
+for the same restore to finish.
+
+Avoid that by remembering the PD address every time we validate the
+page directory. That way the address can be queried later without
+locking the PD reservation again.
+
+Change-Id: I00498db93ae9a61a43861c1a2363f99402922720
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+
+Conflicts:
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 47 ++++++++----------------
+ 2 files changed, 17 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+index 0dbba0f..be9b009 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+@@ -126,6 +126,8 @@ struct amdkfd_vm {
+ struct amdgpu_device *adev;
+ /* Points to the KFD process VM info*/
+ struct amdkfd_process_info *process_info;
++
++ uint64_t pd_phys_addr;
+ };
+
+ int amdgpu_amdkfd_init(void);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index cb8b4ab..2409b56 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -382,17 +382,18 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
+ * again. Page directories are only updated after updating page
+ * tables.
+ */
+-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
++static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
+ {
+- struct amdgpu_bo *pd = vm->root.base.bo;
++ struct amdgpu_bo *pd = vm->base.root.base.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+ struct amdgpu_vm_parser param;
++ uint64_t addr, flags = AMDGPU_PTE_VALID;
+ int ret;
+
+ param.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ param.wait = false;
+
+- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
++ ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
+ &param);
+ if (ret) {
+ pr_err("amdgpu: failed to validate PT BOs\n");
+@@ -404,7 +405,12 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+ pr_err("amdgpu: failed to validate PD\n");
+ return ret;
+ }
+- if (vm->use_cpu_for_update) {
++
++ addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
++ amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
++ vm->pd_phys_addr = addr;
++
++ if (vm->base.use_cpu_for_update) {
+ ret = amdgpu_bo_kmap(pd, NULL);
+ if (ret) {
+ pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
+@@ -519,7 +525,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+ goto err_alloc_pts;
+ }
+
+- ret = vm_validate_pt_pd_bos(avm);
++ ret = vm_validate_pt_pd_bos(kvm);
+ if (ret != 0) {
+ pr_err("validate_pt_pd_bos() failed\n");
+ goto err_alloc_pts;
+@@ -1366,31 +1372,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ return ret;
+ }
+
+-static u64 get_vm_pd_gpu_offset(void *vm)
+-{
+- struct amdgpu_vm *avm = (struct amdgpu_vm *) vm;
+- struct amdgpu_device *adev =
+- amdgpu_ttm_adev(avm->root.base.bo->tbo.bdev);
+- u64 offset;
+- uint64_t flags = AMDGPU_PTE_VALID;
+-
+- BUG_ON(avm == NULL);
+-
+- amdgpu_bo_reserve(avm->root.base.bo, false);
+-
+- offset = amdgpu_bo_gpu_offset(avm->root.base.bo);
+-
+- amdgpu_bo_unreserve(avm->root.base.bo);
+-
+- /* On some ASICs the FB doesn't start at 0. Adjust FB offset
+- * to an actual MC address.
+- */
+- if (adev->gmc.gmc_funcs->get_vm_pde)
+- amdgpu_gmc_get_vm_pde(adev, -1, &offset, &flags);
+-
+- return offset;
+-}
+-
+ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+ void **process_info,
+ struct dma_fence **ef)
+@@ -1513,7 +1494,9 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+
+ uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+ {
+- return get_vm_pd_gpu_offset(vm) >> AMDGPU_GPU_PAGE_SHIFT;
++ struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
++
++ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+ }
+
+ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+@@ -1920,7 +1903,7 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
+
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+- ret = vm_validate_pt_pd_bos(&peer_vm->base);
++ ret = vm_validate_pt_pd_bos(peer_vm);
+ if (ret)
+ return ret;
+ }
+--
+2.7.4
+