aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1654-drm-amdgpu-Allow-get_user_pages-to-fail-in-restore-w.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1654-drm-amdgpu-Allow-get_user_pages-to-fail-in-restore-w.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1654-drm-amdgpu-Allow-get_user_pages-to-fail-in-restore-w.patch120
1 files changed, 0 insertions, 120 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1654-drm-amdgpu-Allow-get_user_pages-to-fail-in-restore-w.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1654-drm-amdgpu-Allow-get_user_pages-to-fail-in-restore-w.patch
deleted file mode 100644
index 3dd1057d..00000000
--- a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1654-drm-amdgpu-Allow-get_user_pages-to-fail-in-restore-w.patch
+++ /dev/null
@@ -1,120 +0,0 @@
-From 7d8e00c65331476edd5611b4ba9d161ac59000d7 Mon Sep 17 00:00:00 2001
-From: Felix Kuehling <Felix.Kuehling@amd.com>
-Date: Wed, 5 Apr 2017 19:45:23 -0400
-Subject: [PATCH 1654/4131] drm/amdgpu: Allow get_user_pages to fail in restore
- worker
-
-Avoid stalling queues indefinitely if get_user_pages fails. This type
-of failure is indicative of a user mode bug where memory is freed that
-is still mapped to the GPU. Let the restore continue and update the
-page table with invalid entries for the failed BO. If the GPU tries to
-access it, the application will crash with a VM fault.
-
-Bug: SWDEV-117987
-Change-Id: Ica23b8d562e8268d80109192f1c5f0c16eb72de0
-Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 52 ++++++++++++++++++------
- 1 file changed, 40 insertions(+), 12 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-index bde4f6a..3f1b1d9 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-@@ -523,7 +523,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
- up_read(&mm->mmap_sem);
- if (ret) {
-- pr_err("%s: Failed to get user pages\n", __func__);
-+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
- goto free_out;
- }
-
-@@ -1996,8 +1996,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
- mem->user_pages);
- if (ret) {
- mem->user_pages[0] = NULL;
-- pr_err("%s: Failed to get user pages\n", __func__);
-- goto unlock_mmap_out;
-+ pr_info("%s: Failed to get user pages: %d\n",
-+ __func__, ret);
-+ ret = 0;
-+ /* Pretend it succeeded. It will fail later
-+ * with a VM fault if the GPU tries to access
-+ * it. Better than hanging indefinitely with
-+ * stalled user mode queues.
-+ */
- }
-
- /* Mark the BO as valid unless it was invalidated
-@@ -2064,6 +2070,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
-
- amdgpu_sync_create(&sync);
-
-+ /* Avoid triggering eviction fences when unmapping invalid
-+ * userptr BOs (waits for all fences, doesn't use
-+ * FENCE_OWNER_VM)
-+ */
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node)
-+ amdgpu_amdkfd_remove_eviction_fence(peer_vm->base.root.bo,
-+ process_info->eviction_fence,
-+ NULL, NULL);
-+
- ret = process_validate_vms(process_info);
- if (ret)
- goto unreserve_out;
-@@ -2076,15 +2092,17 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
-
- bo = mem->bo;
-
-- /* Copy pages array and validate the BO */
-- memcpy(bo->tbo.ttm->pages, mem->user_pages,
-- sizeof(struct page *) * bo->tbo.ttm->num_pages);
-- amdgpu_ttm_placement_from_domain(bo, mem->domain);
-- ret = ttm_bo_validate(&bo->tbo, &bo->placement,
-- false, false);
-- if (ret) {
-- pr_err("%s: failed to validate BO\n", __func__);
-- goto unreserve_out;
-+ /* Copy pages array and validate the BO if we got user pages */
-+ if (mem->user_pages[0]) {
-+ memcpy(bo->tbo.ttm->pages, mem->user_pages,
-+ sizeof(struct page *) * bo->tbo.ttm->num_pages);
-+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
-+ ret = ttm_bo_validate(&bo->tbo, &bo->placement,
-+ false, false);
-+ if (ret) {
-+ pr_err("%s: failed to validate BO\n", __func__);
-+ goto unreserve_out;
-+ }
- }
-
- /* Validate succeeded, now the BO owns the pages, free
-@@ -2097,6 +2115,12 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
- list_move_tail(&mem->validate_list.head,
- &process_info->userptr_valid_list);
-
-+ /* Update mapping. If the BO was not validated
-+ * (because we couldn't get user pages), this will
-+ * clear the page table entries, which will result in
-+ * VM faults if the GPU tries to access the invalid
-+ * memory.
-+ */
- list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
- if (!bo_va_entry->is_mapped)
- continue;
-@@ -2113,6 +2137,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
- }
- }
- unreserve_out:
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node)
-+ amdgpu_bo_fence(peer_vm->base.root.bo,
-+ &process_info->eviction_fence->base, true);
- ttm_eu_backoff_reservation(&ticket, &resv_list);
- amdgpu_sync_wait(&sync);
- amdgpu_sync_free(&sync);
---
-2.7.4
-