diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch new file mode 100644 index 00000000..3fbb184b --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch @@ -0,0 +1,141 @@ +From c861d6b46c53645ff531e90e774bcca8de72c2d0 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Wed, 17 Jan 2018 11:39:31 -0500 +Subject: [PATCH 3184/4131] drm/amdgpu: Fix potential deadlock when restoring + BOs + +Locking the PD reservation object when querying the PD address while +restarting the queues can lead to a deadlock with an eviction waiting +for the same restore to finish. + +Avoid that by remembering the PD address every time we validate the +page directory. That way the address can be queried later without +locking the PD reservation again. + +Change-Id: I00498db93ae9a61a43861c1a2363f99402922720 +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> + +Conflicts: + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 + + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 47 ++++++++---------------- + 2 files changed, 17 insertions(+), 32 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index 0dbba0f..be9b009 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -126,6 +126,8 @@ struct amdkfd_vm { + struct amdgpu_device *adev; + /* Points to the KFD process VM info*/ + struct amdkfd_process_info *process_info; ++ ++ uint64_t pd_phys_addr; + }; + + int amdgpu_amdkfd_init(void); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index cb8b4ab..2409b56 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -382,17 +382,18 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) + * again. Page directories are only updated after updating page + * tables. + */ +-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) ++static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) + { +- struct amdgpu_bo *pd = vm->root.base.bo; ++ struct amdgpu_bo *pd = vm->base.root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + struct amdgpu_vm_parser param; ++ uint64_t addr, flags = AMDGPU_PTE_VALID; + int ret; + + param.domain = AMDGPU_GEM_DOMAIN_VRAM; + param.wait = false; + +- ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, ++ ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, + ¶m); + if (ret) { + pr_err("amdgpu: failed to validate PT BOs\n"); +@@ -404,7 +405,12 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) + pr_err("amdgpu: failed to validate PD\n"); + return ret; + } +- if (vm->use_cpu_for_update) { ++ ++ addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); ++ amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); ++ vm->pd_phys_addr = addr; ++ ++ if (vm->base.use_cpu_for_update) { + ret = amdgpu_bo_kmap(pd, NULL); + if (ret) { + pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); +@@ -519,7 +525,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + goto err_alloc_pts; + } + +- ret = vm_validate_pt_pd_bos(avm); ++ ret = vm_validate_pt_pd_bos(kvm); + if (ret != 0) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; +@@ -1366,31 +1372,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + return ret; + } + +-static u64 get_vm_pd_gpu_offset(void *vm) +-{ +- struct amdgpu_vm *avm = (struct amdgpu_vm *) vm; +- struct amdgpu_device *adev = +- amdgpu_ttm_adev(avm->root.base.bo->tbo.bdev); +- u64 offset; +- uint64_t flags = AMDGPU_PTE_VALID; +- +- BUG_ON(avm == NULL); +- +- amdgpu_bo_reserve(avm->root.base.bo, false); +- +- offset = amdgpu_bo_gpu_offset(avm->root.base.bo); +- +- amdgpu_bo_unreserve(avm->root.base.bo); +- +- /* On some ASICs the FB doesn't start at 0. Adjust FB offset +- * to an actual MC address. +- */ +- if (adev->gmc.gmc_funcs->get_vm_pde) +- amdgpu_gmc_get_vm_pde(adev, -1, &offset, &flags); +- +- return offset; +-} +- + int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef) +@@ -1513,7 +1494,9 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) + + uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) + { +- return get_vm_pd_gpu_offset(vm) >> AMDGPU_GPU_PAGE_SHIFT; ++ struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; ++ ++ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; + } + + int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, +@@ -1920,7 +1903,7 @@ static int process_validate_vms(struct amdkfd_process_info *process_info) + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { +- ret = vm_validate_pt_pd_bos(&peer_vm->base); ++ ret = vm_validate_pt_pd_bos(peer_vm); + if (ret) + return ret; + } +-- +2.7.4 + |