aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1381-drm-amdgpu-Add-MMU-notifier-for-HSA-userptr-buffers.patch
blob: 04f86f2b38579af745817f144a172e556cd76ba6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
From 4e57129b464425b2fb9618d3fdaca75a22cbc577 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 15 Mar 2016 02:37:53 -0400
Subject: [PATCH 1381/4131] drm/amdgpu: Add MMU notifier for HSA userptr
 buffers

Evicts userptr buffers temporarily when the kernel invalidates the
user VM mapping. Buffer eviction calls into KFD for quiescing queues
accessing the affected VM. After the invalidation, evicted buffers
are restored, which pins them, presumably using an updated user
mapping. A short delay avoids excessive repeated unpinning and
repinning of the same buffer.

Change-Id: I889ee367605dd0121fe4dab6a24e55441094588a
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>

 Conflicts:
	drivers/gpu/drm/amd/amdgpu/amdgpu.h
	drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c     | 143 ++++++++++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    |   2 +-
 4 files changed, 132 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d8c5c45..ab784c9 100755
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -391,6 +391,8 @@ struct amdgpu_gem_object {
 	struct amdgpu_bo		*bo;
 };
 
+struct kgd_mem;
+
 #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_gem_object, base)->bo
 
 void amdgpu_gem_object_free(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 430c622..87bb00d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -36,12 +36,19 @@
 #include <drm/drm.h>
 
 #include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+
+enum amdgpu_mn_type {
+	AMDGPU_MN_TYPE_GFX,
+	AMDGPU_MN_TYPE_HSA,
+};
 
 struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
 	struct mmu_notifier	mn;
+	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
 	struct work_struct	work;
@@ -81,6 +88,9 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 		list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 			bo->mn = NULL;
 			list_del_init(&bo->mn_list);
+			if (rmn->type == AMDGPU_MN_TYPE_HSA)
+				amdgpu_amdkfd_cancel_restore_mem(
+					adev, bo->kfd_bo);
 		}
 		kfree(node);
 	}
@@ -147,7 +157,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 }
 
 /**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
  * @mn: our notifier
  * @mn: the mm this callback is about
@@ -157,10 +167,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * We block for all BOs between start and end to be idle and
  * unmap them by move them into system domain again.
  */
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
-					     struct mm_struct *mm,
-					     unsigned long start,
-					     unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
 {
 	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -183,22 +193,122 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 	mutex_unlock(&rmn->lock);
 }
 
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
-	.release = amdgpu_mn_release,
-	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
+/**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	/* notification is exclusive, but interval is inclusive */
+	end -= 1;
+
+	it = interval_tree_iter_first(&rmn->objects, start, end);
+	while (it) {
+		struct amdgpu_mn_node *node;
+		struct amdgpu_bo *bo;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		it = interval_tree_iter_next(it, start, end);
+
+		list_for_each_entry(bo, &node->bos, mn_list) {
+			struct kgd_mem *mem = bo->kfd_bo;
+
+			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+							 start, end))
+				amdgpu_amdkfd_evict_mem(bo->adev, mem, mm);
+		}
+	}
+}
+
+/**
+ * amdgpu_mn_invalidate_range_end_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * Restore BOs between start and end. Once the last BO is restored,
+ * the queues can be reenabled. Restoring a BO can itself trigger
+ * another recursive MMU notifier. Therefore this needs to be
+ * scheduled in a worker thread. Adding a slight delay (1 jiffy)
+ * avoids excessive repeated evictions.
+ */
+static void amdgpu_mn_invalidate_range_end_hsa(struct mmu_notifier *mn,
+					       struct mm_struct *mm,
+					       unsigned long start,
+					       unsigned long end)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	/* notification is exclusive, but interval is inclusive */
+	end -= 1;
+
+	it = interval_tree_iter_first(&rmn->objects, start, end);
+	while (it) {
+		struct amdgpu_mn_node *node;
+		struct amdgpu_bo *bo;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		it = interval_tree_iter_next(it, start, end);
+
+		list_for_each_entry(bo, &node->bos, mn_list) {
+			struct kgd_mem *mem = bo->kfd_bo;
+
+			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+							 start, end))
+				amdgpu_amdkfd_schedule_restore_mem(bo->adev,
+								   mem, mm, 1);
+		}
+	}
+}
+
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+	[AMDGPU_MN_TYPE_GFX] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+	},
+	[AMDGPU_MN_TYPE_HSA] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+		.invalidate_range_end = amdgpu_mn_invalidate_range_end_hsa,
+	},
 };
 
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type. */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
 /**
  * amdgpu_mn_get - create notifier context
  *
  * @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
  *
  * Creates a notifier context for current->mm.
  */
-static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+				       enum amdgpu_mn_type type)
 {
 	struct mm_struct *mm = current->mm;
 	struct amdgpu_mn *rmn;
+	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
 	struct hlist_node *node;
@@ -215,11 +325,11 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 #endif
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-	hash_for_each_possible(adev->mn_hash, rmn, node, node, (unsigned long)mm)
+	hash_for_each_possible(adev->mn_hash, rmn, node, node, key)
 #else
-	hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
+	hash_for_each_possible(adev->mn_hash, rmn, node, key)
 #endif
-		if (rmn->mm == mm)
+		if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
 			goto release_locks;
 
 	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -230,7 +340,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 
 	rmn->adev = adev;
 	rmn->mm = mm;
-	rmn->mn.ops = &amdgpu_mn_ops;
+	rmn->type = type;
+	rmn->mn.ops = &amdgpu_mn_ops[type];
 	mutex_init(&rmn->lock);
 	rmn->objects = RB_ROOT;
 
@@ -238,7 +349,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	if (r)
 		goto free_rmn;
 
-	hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+	hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
 
 release_locks:
 	up_write(&mm->mmap_sem);
@@ -267,12 +378,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	enum amdgpu_mn_type type =
+		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 	struct amdgpu_mn *rmn;
 	struct amdgpu_mn_node *node = NULL;
 	struct list_head bos;
 	struct interval_tree_node *it;
 
-	rmn = amdgpu_mn_get(adev);
+	rmn = amdgpu_mn_get(adev, type);
 	if (IS_ERR(rmn))
 		return PTR_ERR(rmn);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index fabddbb..e53b70a 100755
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -83,7 +83,7 @@ struct amdgpu_bo {
 
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
 	struct amdgpu_mn		*mn;
-	bool				is_kfd_bo;
+	struct kgd_mem			*kfd_bo;
 	struct kfd_process_device	*pdd;
 
 	union {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index cb6aaea..2b866be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -286,7 +286,7 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 	 * Don't verify access for KFD BO as it doesn't necessary has
 	 * KGD file pointer
 	 */
-	if (!abo || abo->is_kfd_bo || !filp)
+	if (!abo || abo->kfd_bo || !filp)
 		return 0;
 	file_priv = filp->private_data;
 
-- 
2.7.4