diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1548-drm-amdkfd-Move-resume_mm-quiesce_mm-out-of-reserve-.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1548-drm-amdkfd-Move-resume_mm-quiesce_mm-out-of-reserve-.patch | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1548-drm-amdkfd-Move-resume_mm-quiesce_mm-out-of-reserve-.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1548-drm-amdkfd-Move-resume_mm-quiesce_mm-out-of-reserve-.patch new file mode 100644 index 00000000..547e3cd2 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1548-drm-amdkfd-Move-resume_mm-quiesce_mm-out-of-reserve-.patch @@ -0,0 +1,202 @@ +From 8597c8ba04362ffa14c9914ec84e0a969153e78e Mon Sep 17 00:00:00 2001 +From: Yong Zhao <yong.zhao@amd.com> +Date: Mon, 24 Oct 2016 17:16:01 -0400 +Subject: [PATCH 1548/4131] drm/amdkfd: Move resume_mm/quiesce_mm out of + reserve/unreserve scope + +resume_mm/quiesce_mm should be called without bo and VMs reserved. +Otherwise, deadlock could happen, as the PD BO of the VM could be +reserved for the second time when get_process_page_dir() is called. + +Change-Id: I6e268e34d43edee5a4beb29ca1ee55de41825787 +Signed-off-by: Yong Zhao <yong.zhao@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 83 +++++++++++++++++------- + 1 file changed, 58 insertions(+), 25 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index ddb9cab..6581539 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -1061,6 +1061,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kfd_bo_va_list *bo_va_entry = NULL; + struct kfd_bo_va_list *bo_va_entry_aql = NULL; + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; ++ int num_to_quiesce = 0; + + BUG_ON(kgd == NULL); + BUG_ON(mem == NULL); +@@ -1126,14 +1127,12 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + if (entry->bo_va->vm == vm && !entry->is_mapped) { + if (mem->evicted) { + /* If the BO is evicted, just mark the +- * mapping as mapped and stop the GPU's +- * queues until the BO is restored. */ +- ret = kgd2kfd->quiesce_mm(adev->kfd, +- current->mm); +- if (ret != 0) +- goto quiesce_failed; ++ * mapping as mapped and the GPU's queues ++ * will be stopped later. ++ */ + entry->is_mapped = true; + mem->mapped_to_gpu_memory++; ++ num_to_quiesce++; + continue; + } + +@@ -1158,11 +1157,23 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + true); + unreserve_bo_and_vms(&ctx, true); + ++ while (num_to_quiesce--) { ++ /* Now stop the GPU's queues while bo and VMs are unreserved. ++ * quiesce_mm() is reference counted, and that is why we can ++ * call it multiple times. ++ */ ++ ret = kgd2kfd->quiesce_mm(adev->kfd, current->mm); ++ if (ret != 0) { ++ pr_err("quiesce_mm() failed\n"); ++ reserve_bo_and_vm(mem, vm, &ctx); ++ goto map_bo_to_gpuvm_failed; ++ } ++ } ++ + mutex_unlock(&mem->lock); +- return 0; ++ return ret; + + map_bo_to_gpuvm_failed: +-quiesce_failed: + update_user_pages_failed: + if (bo_va_entry_aql) + remove_bo_from_vm(adev, bo_va_entry_aql); +@@ -1349,6 +1360,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + unsigned mapped_before; + int ret = 0; + struct bo_vm_reservation_context ctx; ++ int num_to_resume = 0; + + BUG_ON(kgd == NULL); + BUG_ON(mem == NULL); +@@ -1381,14 +1393,12 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + if (entry->bo_va->vm == vm && entry->is_mapped) { + if (mem->evicted) { + /* If the BO is evicted, just mark the +- * mapping as unmapped and allow the +- * GPU's queues to resume. */ +- ret = kgd2kfd->resume_mm(adev->kfd, +- current->mm); +- if (ret != 0) +- goto unreserve_out; ++ * mapping as unmapped and the GPU's queues ++ * will be resumed later. ++ */ + entry->is_mapped = false; + mem->mapped_to_gpu_memory--; ++ num_to_resume++; + continue; + } + +@@ -1430,6 +1440,18 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + + unreserve_out: + unreserve_bo_and_vms(&ctx, false); ++ ++ while (num_to_resume--) { ++ /* Now resume GPU's queues while bo and VMs are unreserved. ++ * resume_mm() is reference counted, and that is why we can ++ * call it multiple times. ++ */ ++ ret = kgd2kfd->resume_mm(adev->kfd, current->mm); ++ if (ret != 0) { ++ pr_err("resume_mm() failed.\n"); ++ break; ++ } ++ } + out: + mutex_unlock(&mem->lock); + return ret; +@@ -1694,7 +1716,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, int dma_buf_fd, + int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm) + { + struct kfd_bo_va_list *entry; +- unsigned n_evicted; ++ unsigned int n_evicted = 0, n_unmapped = 0; + int r = 0; + struct bo_vm_reservation_context ctx; + +@@ -1708,11 +1730,6 @@ int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm) + * queues of the affected GPUs are quiesced first. Count the + * number of evicted mappings so we can roll back if something + * goes wrong. */ +- n_evicted = 0; +- +- r = reserve_bo_and_cond_vms(mem, NULL, VA_MAPPED, &ctx); +- if (unlikely(r != 0)) +- return r; + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + struct amdgpu_device *adev; +@@ -1728,16 +1745,31 @@ int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm) + goto fail; + } + ++ n_evicted++; ++ } ++ ++ r = reserve_bo_and_cond_vms(mem, NULL, VA_MAPPED, &ctx); ++ if (unlikely(r != 0)) ++ goto fail; ++ ++ list_for_each_entry(entry, &mem->bo_va_list, bo_list) { ++ struct amdgpu_device *adev; ++ ++ if (!entry->is_mapped) ++ continue; ++ ++ adev = (struct amdgpu_device *)entry->kgd_dev; ++ + r = unmap_bo_from_gpuvm(adev, mem->bo, + entry->bo_va, &ctx.sync); + if (r != 0) { + pr_err("failed unmap va 0x%llx\n", + mem->va); +- kgd2kfd->resume_mm(adev->kfd, mm); ++ unreserve_bo_and_vms(&ctx, true); + goto fail; + } + +- n_evicted++; ++ n_unmapped++; + } + + unreserve_bo_and_vms(&ctx, true); +@@ -1745,7 +1777,6 @@ int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm) + return 0; + + fail: +- unreserve_bo_and_vms(&ctx, true); + /* To avoid hangs and keep state consistent, roll back partial + * eviction by restoring queues and marking mappings as + * unmapped. Access to now unmapped buffers will fault. */ +@@ -1757,12 +1788,14 @@ int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm) + if (!entry->is_mapped) + continue; + +- entry->is_mapped = false; ++ if (n_unmapped) { ++ entry->is_mapped = false; ++ n_unmapped--; ++ } + + adev = (struct amdgpu_device *)entry->kgd_dev; + if (kgd2kfd->resume_mm(adev->kfd, mm)) + pr_err("Failed to resume KFD\n"); +- + n_evicted--; + } + +-- +2.7.4 + |