diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch new file mode 100644 index 00000000..88f5ae2f --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch @@ -0,0 +1,134 @@ +From 7741229a1c0e20ffe2824b5122694c2822d86b1d Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Fri, 7 Dec 2018 15:18:43 +0100 +Subject: [PATCH 3800/4256] drm/amdgpu: add graceful VM fault handling v3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Next step towards HMM support. For now just silence the retry fault and +optionally redirect the request to the dummy page. + +v2: make sure the VM is not destroyed while we handle the fault. +v3: fix VM destroy check, cleanup comments + +Signed-off-by: Christian König <christian.koenig@amd.com> +Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 73 ++++++++++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 + + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++ + 3 files changed, 79 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index 144cb2e0e9aa..f0daa5e20f3d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -3079,3 +3079,76 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) + } + } + } ++ ++/** ++ * amdgpu_vm_handle_fault - graceful handling of VM faults. ++ * @adev: amdgpu device pointer ++ * @pasid: PASID of the VM ++ * @addr: Address of the fault ++ * ++ * Try to gracefully handle a VM fault. Return true if the fault was handled and ++ * shouldn't be reported any more. ++ */ ++bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, ++ uint64_t addr) ++{ ++ struct amdgpu_bo *root; ++ uint64_t value, flags; ++ struct amdgpu_vm *vm; ++ long r; ++ ++ spin_lock(&adev->vm_manager.pasid_lock); ++ vm = idr_find(&adev->vm_manager.pasid_idr, pasid); ++ if (vm) ++ root = amdgpu_bo_ref(vm->root.base.bo); ++ else ++ root = NULL; ++ spin_unlock(&adev->vm_manager.pasid_lock); ++ ++ if (!root) ++ return false; ++ ++ r = amdgpu_bo_reserve(root, true); ++ if (r) ++ goto error_unref; ++ ++ /* Double check that the VM still exists */ ++ spin_lock(&adev->vm_manager.pasid_lock); ++ vm = idr_find(&adev->vm_manager.pasid_idr, pasid); ++ if (vm && vm->root.base.bo != root) ++ vm = NULL; ++ spin_unlock(&adev->vm_manager.pasid_lock); ++ if (!vm) ++ goto error_unlock; ++ ++ addr /= AMDGPU_GPU_PAGE_SIZE; ++ flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | ++ AMDGPU_PTE_SYSTEM; ++ ++ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { ++ /* Redirect the access to the dummy page */ ++ value = adev->dummy_page_addr; ++ flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | ++ AMDGPU_PTE_WRITEABLE; ++ } else { ++ /* Let the hw retry silently on the PTE */ ++ value = 0; ++ } ++ ++ r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, ++ flags, value, NULL, NULL); ++ if (r) ++ goto error_unlock; ++ ++ r = amdgpu_vm_update_pdes(adev, vm, true); ++ ++error_unlock: ++ amdgpu_bo_unreserve(root); ++ if (r < 0) ++ DRM_ERROR("Can't handle page fault (%ld)\n", r); ++ ++error_unref: ++ amdgpu_bo_unref(&root); ++ ++ return false; ++} +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +index 3f1335295c00..5fbb26a0e1d8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +@@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); + + void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, + struct amdgpu_task_info *task_info); ++bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, ++ uint64_t addr); + + void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); + +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +index 3dde208fa0c6..fe63f64c4db3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +@@ -376,6 +376,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, + } + + /* If it's the first fault for this address, process it normally */ ++ if (retry_fault && !in_interrupt() && ++ amdgpu_vm_handle_fault(adev, entry->pasid, addr)) ++ return 1; /* This also prevents sending it to KFD */ ++ + if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to +-- +2.17.1 + |