aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1588-drm-amdgpu-Retry-failed-userptr-restore-when-the-map.patch
blob: 79168573c0b011580812efa217191f1936db8549 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
From 3e09aff00ca90b135406dd86642fde011937ce51 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 9 Feb 2017 17:59:45 -0500
Subject: [PATCH 1588/4131] drm/amdgpu: Retry failed userptr restore when the
 mapping is in flux

update_user_pages fails with -EDEADLK if the virtual address mapping
of the buffer is being updated while we're trying to get its pages.
This can happen when a large buffer is being updated after fork,
taking longer than the 1 jiffy delay between the first evict and
restore. Reschedule restore after another 1 jiffy delay, hoping that
the VM mapping will have settled down by then.

Change-Id: Ic0c917ae8877bbd15e1989ca1c365df1a637c361
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c       | 20 ++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  5 +++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7d10b71..fff1b85 100755
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -266,6 +266,7 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work)
 	struct kgd_mem *mem = container_of(dwork, struct kgd_mem, work);
 	struct amdgpu_device *adev;
 	struct mm_struct *mm;
+	int ret = 0;
 
 	mutex_lock(&mem->lock);
 
@@ -282,16 +283,27 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work)
 	 * was scheduled.
 	 */
 	if (mem->evicted == 1) {
-		if (amdgpu_amdkfd_gpuvm_restore_mem(mem, mm) != -EBUSY)
+		ret = amdgpu_amdkfd_gpuvm_restore_mem(mem, mm);
+		if (ret != -EBUSY && ret != -EDEADLK)
 			mem->evicted = 0;
 	}
 
-	BUG_ON(mem->mm != mm);
-	mem->mm = NULL;
+	/* If restore failed due to the VM being updated concurrently,
+	 * reschedule restore again in a jiffie
+	 */
+	if (ret == -EDEADLK && mem->evicted == 1) {
+		pr_err("Rescheduling restore\n");
+		mm = NULL;
+		schedule_delayed_work(&mem->work, 1);
+	} else {
+		BUG_ON(mem->mm != mm);
+		mem->mm = NULL;
+	}
 
 	mutex_unlock(&mem->lock);
 
-	mmput(mm);
+	if (mm)
+		mmput(mm);
 }
 
 int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8dffb3a..e1bea18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1934,6 +1934,11 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm)
 			if (ret == -ESRCH)
 				/* process terminating, fail quiet and fast */
 				return ret;
+			else if (ret == -EDEADLK)
+				/* Someone else is still updating the
+				 * VM, let's try again later
+				 */
+				return ret;
 			pr_err("get_user_pages failed. Probably userptr is freed. %d\n",
 			       ret);
 		}
-- 
2.7.4