1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
From 3e09aff00ca90b135406dd86642fde011937ce51 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 9 Feb 2017 17:59:45 -0500
Subject: [PATCH 1588/4131] drm/amdgpu: Retry failed userptr restore when the
mapping is in flux
update_user_pages fails with -EDEADLK if the virtual address mapping
of the buffer is being updated while we're trying to get its pages.
This can happen when a large buffer is being updated after fork,
taking longer than the 1 jiffy delay between the first evict and
restore. Reschedule restore after another 1 jiffy delay, hoping that
the VM mapping will have settled down by then.
Change-Id: Ic0c917ae8877bbd15e1989ca1c365df1a637c361
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 20 ++++++++++++++++----
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++++
2 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7d10b71..fff1b85 100755
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -266,6 +266,7 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work)
struct kgd_mem *mem = container_of(dwork, struct kgd_mem, work);
struct amdgpu_device *adev;
struct mm_struct *mm;
+ int ret = 0;
mutex_lock(&mem->lock);
@@ -282,16 +283,27 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work)
* was scheduled.
*/
if (mem->evicted == 1) {
- if (amdgpu_amdkfd_gpuvm_restore_mem(mem, mm) != -EBUSY)
+ ret = amdgpu_amdkfd_gpuvm_restore_mem(mem, mm);
+ if (ret != -EBUSY && ret != -EDEADLK)
mem->evicted = 0;
}
- BUG_ON(mem->mm != mm);
- mem->mm = NULL;
+ /* If restore failed due to the VM being updated concurrently,
+ * reschedule restore again in a jiffie
+ */
+ if (ret == -EDEADLK && mem->evicted == 1) {
+ pr_err("Rescheduling restore\n");
+ mm = NULL;
+ schedule_delayed_work(&mem->work, 1);
+ } else {
+ BUG_ON(mem->mm != mm);
+ mem->mm = NULL;
+ }
mutex_unlock(&mem->lock);
- mmput(mm);
+ if (mm)
+ mmput(mm);
}
int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8dffb3a..e1bea18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1934,6 +1934,11 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm)
if (ret == -ESRCH)
/* process terminating, fail quiet and fast */
return ret;
+ else if (ret == -EDEADLK)
+ /* Someone else is still updating the
+ * VM, let's try again later
+ */
+ return ret;
pr_err("get_user_pages failed. Probably userptr is freed. %d\n",
ret);
}
--
2.7.4
|