aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch134
1 files changed, 134 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch
new file mode 100644
index 00000000..88f5ae2f
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3800-drm-amdgpu-add-graceful-VM-fault-handling-v3.patch
@@ -0,0 +1,134 @@
+From 7741229a1c0e20ffe2824b5122694c2822d86b1d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 7 Dec 2018 15:18:43 +0100
+Subject: [PATCH 3800/4256] drm/amdgpu: add graceful VM fault handling v3
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Next step towards HMM support. For now just silence the retry fault and
+optionally redirect the request to the dummy page.
+
+v2: make sure the VM is not destroyed while we handle the fault.
+v3: fix VM destroy check, cleanup comments
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 73 ++++++++++++++++++++++++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +
+ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++
+ 3 files changed, 79 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+index 144cb2e0e9aa..f0daa5e20f3d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -3079,3 +3079,76 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+ }
+ }
+ }
++
++/**
++ * amdgpu_vm_handle_fault - graceful handling of VM faults.
++ * @adev: amdgpu device pointer
++ * @pasid: PASID of the VM
++ * @addr: Address of the fault
++ *
++ * Try to gracefully handle a VM fault. Return true if the fault was handled and
++ * shouldn't be reported any more.
++ */
++bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
++ uint64_t addr)
++{
++ struct amdgpu_bo *root;
++ uint64_t value, flags;
++ struct amdgpu_vm *vm;
++ long r;
++
++ spin_lock(&adev->vm_manager.pasid_lock);
++ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
++ if (vm)
++ root = amdgpu_bo_ref(vm->root.base.bo);
++ else
++ root = NULL;
++ spin_unlock(&adev->vm_manager.pasid_lock);
++
++ if (!root)
++ return false;
++
++ r = amdgpu_bo_reserve(root, true);
++ if (r)
++ goto error_unref;
++
++ /* Double check that the VM still exists */
++ spin_lock(&adev->vm_manager.pasid_lock);
++ vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
++ if (vm && vm->root.base.bo != root)
++ vm = NULL;
++ spin_unlock(&adev->vm_manager.pasid_lock);
++ if (!vm)
++ goto error_unlock;
++
++ addr /= AMDGPU_GPU_PAGE_SIZE;
++ flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
++ AMDGPU_PTE_SYSTEM;
++
++ if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
++ /* Redirect the access to the dummy page */
++ value = adev->dummy_page_addr;
++ flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
++ AMDGPU_PTE_WRITEABLE;
++ } else {
++ /* Let the hw retry silently on the PTE */
++ value = 0;
++ }
++
++ r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,
++ flags, value, NULL, NULL);
++ if (r)
++ goto error_unlock;
++
++ r = amdgpu_vm_update_pdes(adev, vm, true);
++
++error_unlock:
++ amdgpu_bo_unreserve(root);
++ if (r < 0)
++ DRM_ERROR("Can't handle page fault (%ld)\n", r);
++
++error_unref:
++ amdgpu_bo_unref(&root);
++
++ return false;
++}
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+index 3f1335295c00..5fbb26a0e1d8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+@@ -413,6 +413,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
+
+ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+ struct amdgpu_task_info *task_info);
++bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
++ uint64_t addr);
+
+ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+index 3dde208fa0c6..fe63f64c4db3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+@@ -376,6 +376,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
+ }
+
+ /* If it's the first fault for this address, process it normally */
++ if (retry_fault && !in_interrupt() &&
++ amdgpu_vm_handle_fault(adev, entry->pasid, addr))
++ return 1; /* This also prevents sending it to KFD */
++
+ if (!amdgpu_sriov_vf(adev)) {
+ /*
+ * Issue a dummy read to wait for the status register to
+--
+2.17.1
+