aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/4160-drm-amdkfd-CMA-Refactor-CMA-code.patch
blob: 673438630c1b76bed98d7e52efb4e2502e8bd59a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
From 4c5de602a4b2e3a49fe96f5ab6403697209e0121 Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Thu, 22 Mar 2018 17:25:54 -0400
Subject: [PATCH 4160/5725] drm/amdkfd: CMA: Refactor CMA code

This is similar to process_vm_rw() functions. This refactoring is also
helpful for the special handling of userptr BOs (upcoming commits).

This commit does not change any functionality.

v2: Fix potential fence leak

Change-Id: Ic8f9c6a7599d2beac54d768831618df0207f10e9
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>

Conflicts:
      drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 329 +++++++++++++++++--------------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  17 ++
 2 files changed, 203 insertions(+), 143 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index fd62468..ebb862b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1706,22 +1706,164 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep,
 	return r;
 }
 
+/* Update cma_iter.cur_bo with KFD BO that is assocaited with
+ * cma_iter.array.va_addr
+ */
+static int kfd_cma_iter_update_bo(struct cma_iter *ci)
+{
+	struct kfd_memory_range *arr = ci->array;
+	uint64_t va_end = arr->va_addr + arr->size - 1;
+
+	mutex_lock(&ci->p->mutex);
+	ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr,
+								va_end);
+	mutex_unlock(&ci->p->mutex);
+
+	if (!ci->cur_bo || va_end > ci->cur_bo->it.last) {
+		pr_err("CMA failed. Range out of bounds\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+/* Advance iter by @size bytes. */
+static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size)
+{
+	int ret = 0;
+
+	ci->offset += size;
+	if (WARN_ON(size > ci->total || ci->offset > ci->array->size))
+		return -EFAULT;
+	ci->total -= size;
+	/* If current range is copied, move to next range if available. */
+	if (ci->offset == ci->array->size) {
+
+		/* End of all ranges */
+		if (!(--ci->nr_segs))
+			return 0;
+
+		ci->array++;
+		ci->offset = 0;
+		ret = kfd_cma_iter_update_bo(ci);
+		if (ret)
+			return ret;
+	}
+	ci->bo_offset = (ci->array->va_addr + ci->offset) -
+			ci->cur_bo->it.start;
+	return ret;
+}
+
+static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs,
+				struct kfd_process *p, struct cma_iter *ci)
+{
+	int ret;
+	int nr;
+
+	if (!arr || !segs)
+		return -EINVAL;
+
+	memset(ci, 0, sizeof(*ci));
+	ci->array = arr;
+	ci->nr_segs = segs;
+	ci->p = p;
+	ci->offset = 0;
+	for (nr = 0; nr < segs; nr++)
+		ci->total += arr[nr].size;
+
+	/* Valid but size is 0. So copied will also be 0 */
+	if (!ci->total)
+		return 0;
+
+	ret = kfd_cma_iter_update_bo(ci);
+	if (!ret)
+		ci->bo_offset = arr->va_addr - ci->cur_bo->it.start;
+	return ret;
+}
+
+static bool kfd_cma_iter_end(struct cma_iter *ci)
+{
+	if (!(ci->nr_segs) || !(ci->total))
+		return true;
+	return false;
+}
+
+/* Copy single range from source iterator @si to destination iterator @di.
+ * @si will move to next range and @di will move by bytes copied.
+ * @return : 0 for success or -ve for failure
+ * @f: The last fence if any
+ * @copied: out: number of bytes copied
+ */
+static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di,
+				bool cma_write, struct dma_fence **f,
+				uint64_t *copied)
+{
+	int err = 0;
+	uint64_t copy_size, n;
+	uint64_t size = si->array->size;
+	struct kfd_bo *src_bo = si->cur_bo;
+	struct dma_fence *lfence = NULL;
+
+	if (!src_bo || !di || !copied)
+		return -EINVAL;
+	*copied = 0;
+	if (f)
+		*f = NULL;
+
+	while (size && !kfd_cma_iter_end(di)) {
+		struct dma_fence *fence = NULL;
+		struct kfd_bo *dst_bo = di->cur_bo;
+
+		copy_size = min(size, (di->array->size - di->offset));
+
+		/* Check both BOs belong to same device */
+		if (src_bo->dev->kgd != dst_bo->dev->kgd) {
+			pr_err("CMA fail. Not same dev\n");
+			return -EINVAL;
+		}
+
+		err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd,
+			src_bo->mem, si->bo_offset, dst_bo->mem, di->bo_offset,
+			copy_size, &fence, &n);
+		if (err) {
+			pr_err("GPU CMA %d failed\n", err);
+			break;
+		}
+
+		if (fence) {
+			dma_fence_put(lfence);
+			lfence = fence;
+		}
+		size -= n;
+		*copied += n;
+		err = kfd_cma_iter_advance(si, n);
+		if (err)
+			break;
+		err = kfd_cma_iter_advance(di, n);
+		if (err)
+			break;
+	}
+
+	if (f)
+		*f = dma_fence_get(lfence);
+	dma_fence_put(lfence);
+
+	return err;
+}
+
 static int kfd_ioctl_cross_memory_copy(struct file *filep,
 				       struct kfd_process *local_p, void *data)
 {
 	struct kfd_ioctl_cross_memory_copy_args *args = data;
 	struct kfd_memory_range *src_array, *dst_array;
-	struct kfd_bo *src_bo, *dst_bo;
-	struct kfd_process *remote_p, *src_p, *dst_p;
+	struct kfd_process *remote_p;
 	struct task_struct *remote_task;
 	struct mm_struct *remote_mm;
 	struct pid *remote_pid;
-	struct dma_fence *fence = NULL, *lfence = NULL;
-	uint64_t dst_va_addr;
-	uint64_t copied, total_copied = 0;
-	uint64_t src_offset, dst_offset, dst_va_addr_end;
+	struct dma_fence *lfence = NULL;
+	uint64_t copied = 0, total_copied = 0;
+	struct cma_iter di, si;
 	const char *cma_op;
-	int i, j = 0, err = 0;
+	int err = 0;
 
 	/* Check parameters */
 	if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 ||
@@ -1787,160 +1929,61 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
 		err = -EINVAL;
 		goto kfd_process_fail;
 	}
-
+	/* Initialise cma_iter si & @di with source & destination range. */
 	if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) {
-		src_p = local_p;
-		dst_p = remote_p;
 		cma_op = "WRITE";
 		pr_debug("CMA WRITE: local -> remote\n");
+		err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
+					remote_p, &di);
+		if (err)
+			goto kfd_process_fail;
+		err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
+					local_p, &si);
+		if (err)
+			goto kfd_process_fail;
 	} else {
-		src_p = remote_p;
-		dst_p = local_p;
 		cma_op = "READ";
 		pr_debug("CMA READ: remote -> local\n");
-	}
 
+		err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
+					local_p, &di);
+		if (err)
+			goto kfd_process_fail;
+		err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
+					remote_p, &si);
+		if (err)
+			goto kfd_process_fail;
+	}
 
-	/* For each source kfd_range:
-	 * - Find the BO. Each range has to be within the same BO.
-	 * - Copy this range to single or multiple destination BOs.
-	 * - dst_va_addr - will point to next va address into which data will
-	 *                 be copied.
-	 * - dst_bo & src_bo - the current destination and source BOs
-	 * - src_offset & dst_offset - offset into the respective BOs from
-	 *                             data will be sourced or copied
+	/* Copy one si range at a time into di. After each call to
+	 * kfd_copy_single_range() si will move to next range. di will be
+	 * incremented by bytes copied
 	 */
-	dst_va_addr = dst_array[0].va_addr;
-	dst_va_addr_end = dst_va_addr + dst_array[0].size - 1;
-	mutex_lock(&dst_p->mutex);
-	dst_bo = kfd_process_find_bo_from_interval(dst_p,
-			dst_va_addr,
-			dst_va_addr_end);
-	mutex_unlock(&dst_p->mutex);
-	if (!dst_bo || dst_va_addr_end > dst_bo->it.last) {
-		pr_err("CMA %s failed. Invalid dst range\n", cma_op);
-		err = -EFAULT;
-		goto kfd_process_fail;
-	}
-	dst_offset = dst_va_addr - dst_bo->it.start;
-
-	for (i = 0; i < args->src_mem_array_size; i++) {
-		uint64_t src_va_addr_end = src_array[i].va_addr +
-					   src_array[i].size - 1;
-		uint64_t src_size_to_copy = src_array[i].size;
-
-		mutex_lock(&src_p->mutex);
-		src_bo = kfd_process_find_bo_from_interval(src_p,
-				src_array[i].va_addr,
-				src_va_addr_end);
-		mutex_unlock(&src_p->mutex);
-		if (!src_bo || src_va_addr_end > src_bo->it.last) {
-			pr_err("CMA %s failed. Invalid src range\n", cma_op);
-			err = -EFAULT;
-			break;
-		}
+	while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) {
+		struct dma_fence *fence = NULL;
 
-		src_offset = src_array[i].va_addr - src_bo->it.start;
+		err = kfd_copy_single_range(&si, &di,
+					KFD_IS_CROSS_MEMORY_WRITE(args->flags),
+					&fence, &copied);
+		total_copied += copied;
 
-		/* Copy src_bo to one or multiple dst_bo(s) based on size and
-		 * and current copy location.
-		 */
-		while (j < args->dst_mem_array_size) {
-			uint64_t copy_size;
-			int64_t space_left;
-
-			/* Find the current copy_size. This will be smaller of
-			 * the following
-			 * - space left in the current dest memory range
-			 * - data left to copy from source range
-			 */
-			space_left = (dst_array[j].va_addr + dst_array[j].size)
-					- dst_va_addr;
-			copy_size = (src_size_to_copy < space_left) ?
-					src_size_to_copy : space_left;
-
-			/* Check both BOs belong to same device */
-			if (src_bo->dev->kgd != dst_bo->dev->kgd) {
-				pr_err("CMA %s fail. Not same dev\n", cma_op);
-				err = -EINVAL;
-				break;
-			}
+		if (err)
+			break;
 
-			/* Store prev fence. Release it when a later fence is
-			 * created
-			 */
+		/* Release old fence if a later fence is created. If no
+		 * new fence is created, then keep the preivous fence
+		 */
+		if (fence) {
+			dma_fence_put(lfence);
 			lfence = fence;
-			fence = NULL;
-
-			err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(
-				src_bo->dev->kgd,
-				src_bo->mem, src_offset,
-				dst_bo->mem, dst_offset,
-				copy_size,
-				&fence, &copied);
-
-			if (err) {
-				pr_err("GPU CMA %s failed\n", cma_op);
-				break;
-			}
-
-			/* Later fence available. Release old fence */
-			if (fence && lfence) {
-				dma_fence_put(lfence);
-				lfence = NULL;
-			}
-
-			total_copied += copied;
-			src_size_to_copy -= copied;
-			space_left -= copied;
-			dst_va_addr += copied;
-			dst_offset += copied;
-			src_offset += copied;
-			if (dst_va_addr > dst_bo->it.last + 1) {
-				pr_err("CMA %s fail. Mem overflow\n", cma_op);
-				err = -EFAULT;
-				break;
-			}
-
-			/* If the cur dest range is full move to next one */
-			if (space_left <= 0) {
-				if (++j >= args->dst_mem_array_size)
-					break;
-
-				dst_va_addr = dst_array[j].va_addr;
-				dst_va_addr_end = dst_va_addr +
-						  dst_array[j].size - 1;
-				dst_bo = kfd_process_find_bo_from_interval(
-						dst_p,
-						dst_va_addr,
-						dst_va_addr_end);
-				if (!dst_bo ||
-				    dst_va_addr_end > dst_bo->it.last) {
-					pr_err("CMA %s failed. Invalid dst range\n",
-					       cma_op);
-					err = -EFAULT;
-					break;
-				}
-				dst_offset = dst_va_addr - dst_bo->it.start;
-			}
-
-			/* If the cur src range is done, move to next one */
-			if (src_size_to_copy <= 0)
-				break;
 		}
-		if (err)
-			break;
 	}
 
 	/* Wait for the last fence irrespective of error condition */
-	if (fence) {
-		if (dma_fence_wait_timeout(fence, false, msecs_to_jiffies(1000))
-			< 0)
+	if (lfence) {
+		if (dma_fence_wait_timeout(lfence, false,
+					   msecs_to_jiffies(1000)) < 0)
 			pr_err("CMA %s failed. BO timed out\n", cma_op);
-		dma_fence_put(fence);
-	} else if (lfence) {
-		pr_debug("GPU copy fail. But wait for prev DMA to finish\n");
-		dma_fence_wait_timeout(lfence, true, msecs_to_jiffies(1000));
 		dma_fence_put(lfence);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0a019a6..da61ae8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -303,6 +303,23 @@ struct kfd_bo {
 	struct kfd_ipc_obj *kfd_ipc_obj;
 };
 
+/* Similar to iov_iter */
+struct cma_iter {
+	/* points to current entry of range array */
+	struct kfd_memory_range *array;
+	/* total number of entries in the initial array */
+	unsigned long nr_segs;
+	/* total amount of data pointed by kfd array*/
+	unsigned long total;
+	/* offset into the entry pointed by cma_iter.array */
+	unsigned long offset;
+	struct kfd_process *p;
+	/* current kfd_bo associated with cma_iter.array.va_addr */
+	struct kfd_bo *cur_bo;
+	/* offset w.r.t cur_bo */
+	unsigned long bo_offset;
+};
+
 /* KGD2KFD callbacks */
 void kgd2kfd_exit(void);
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
-- 
2.7.4