aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3439-drm-amdkfd-Create-KFD-VMs-on-demand.patch
blob: c54abf5a948b75e58b428aa8deabacf39f7f092d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
From 30272a1b4da44c7866dad126b7026551498f6be2 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 23 Feb 2018 18:02:55 -0500
Subject: [PATCH 3439/4131] drm/amdkfd: Create KFD VMs on demand

Instead of creating all VMs on process creation, create them when
a process is bound to a device. This will later allow registering
an existing VM from a DRM render node FD at runtime, before the
process is bound to the device. This way the render node VM can be
used for KFD instead of creating our own redundant VM.

Change-Id: I62f912e41af9c223752f199c22822c4ff6638ed4
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 242 +++++++++++++++++--------------
 1 file changed, 137 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7c6bcbd..91aac82 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -68,7 +68,6 @@ static struct kfd_process *find_process(const struct task_struct *thread,
 static void kfd_process_ref_release(struct kref *ref);
 static struct kfd_process *create_process(const struct task_struct *thread,
 					struct file *filep);
-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
 
 static void evict_process_worker(struct work_struct *work);
 static void restore_process_worker(struct work_struct *work);
@@ -175,47 +174,31 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
 	return err;
 }
 
-/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage
- *	The memory reserved is for KFD to submit IB to AMDGPU from kernel.
- *	If the memory is reserved successfully, ib_kaddr_assigned will have
- *	the CPU/kernel address. Check ib_kaddr_assigned before accessing the
- *	memory.
+/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
+ *	process for IB usage The memory reserved is for KFD to submit
+ *	IB to AMDGPU from kernel.  If the memory is reserved
+ *	successfully, ib_kaddr_assigned will have the CPU/kernel
+ *	address. Check ib_kaddr_assigned before accessing the memory.
  */
-static int kfd_process_reserve_ib_mem(struct kfd_process *p)
+static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
 {
-	int ret = 0;
-	struct kfd_process_device *pdd = NULL;
-	struct kfd_dev *kdev = NULL;
-	struct qcm_process_device *qpd = NULL;
-	void *kaddr;
+	struct qcm_process_device *qpd = &pdd->qpd;
 	uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
-			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
-			 ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
+		ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
+	void *kaddr;
+	int ret;
 
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-		kdev = pdd->dev;
-		qpd = &pdd->qpd;
-		if (qpd->ib_kaddr)
-			continue;
+	if (qpd->ib_kaddr || !qpd->ib_base)
+		return 0;
 
-		if (qpd->ib_base) { /* is dGPU */
-			ret = kfd_process_alloc_gpuvm(p, kdev,
-				qpd->ib_base, PAGE_SIZE,
-				&kaddr, pdd, flags);
-			if (!ret)
-				qpd->ib_kaddr = kaddr;
-			else
-				/* In case of error, the kfd_bos for some pdds
-				 * which are already allocated successfully
-				 * will be freed in upper level function
-				 * i.e. create_process().
-				 */
-				return ret;
-		} else {
-			/* FIXME: Support APU */
-			continue;
-		}
-	}
+	/* ib_base is only set for dGPU */
+	ret = kfd_process_alloc_gpuvm(pdd->process, pdd->dev,
+				      qpd->ib_base, PAGE_SIZE,
+				      &kaddr, pdd, flags);
+	if (ret)
+		return ret;
+
+	qpd->ib_kaddr = kaddr;
 
 	return 0;
 }
@@ -317,33 +300,42 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
 	return p;
 }
 
-static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
+static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 {
-	struct kfd_process_device *pdd, *peer_pdd;
+	struct kfd_process *p = pdd->process;
 	struct kfd_bo *buf_obj;
 	int id;
 
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-		/*
-		 * Remove all handles from idr and release appropriate
-		 * local memory object
-		 */
-		idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) {
-			list_for_each_entry(peer_pdd, &p->per_device_data,
-					per_device_list) {
-				peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
-						peer_pdd->dev->kgd,
-						buf_obj->mem, peer_pdd->vm);
-			}
-
-			run_rdma_free_callback(buf_obj);
-			pdd->dev->kfd2kgd->free_memory_of_gpu(
-					pdd->dev->kgd, buf_obj->mem);
-			kfd_process_device_remove_obj_handle(pdd, id);
+	/*
+	 * Remove all handles from idr and release appropriate
+	 * local memory object
+	 */
+	idr_for_each_entry(&pdd->alloc_idr, buf_obj, id) {
+		struct kfd_process_device *peer_pdd;
+
+		list_for_each_entry(peer_pdd, &p->per_device_data,
+				    per_device_list) {
+			if (!peer_pdd->vm)
+				continue;
+			peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu(
+				peer_pdd->dev->kgd, buf_obj->mem, peer_pdd->vm);
 		}
+
+		run_rdma_free_callback(buf_obj);
+		pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd,
+						      buf_obj->mem);
+		kfd_process_device_remove_obj_handle(pdd, id);
 	}
 }
 
+static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+		kfd_process_device_free_bos(pdd);
+}
+
 static void kfd_process_destroy_pdds(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd, *temp;
@@ -472,52 +464,29 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 	.release = kfd_process_notifier_release,
 };
 
-static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 {
-	int ret;
 	unsigned long  offset;
-	struct kfd_process_device *pdd = NULL;
-	struct kfd_dev *dev = NULL;
-	struct qcm_process_device *qpd = NULL;
-	void *kaddr;
-	uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
-			 ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
-			 ALLOC_MEM_FLAGS_READONLY |
-			 ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
+	struct kfd_process_device *pdd;
 
 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-		dev = pdd->dev;
-		qpd = &pdd->qpd;
-		if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
+		struct kfd_dev *dev = pdd->dev;
+		struct qcm_process_device *qpd = &pdd->qpd;
+
+		if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
 			continue;
-		if (qpd->cwsr_base) {
-			/* cwsr_base is only set for DGPU */
-			ret = kfd_process_alloc_gpuvm(p, dev, qpd->cwsr_base,
-				KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd, flags);
-			if (!ret) {
-				qpd->cwsr_kaddr = kaddr;
-				qpd->tba_addr = qpd->cwsr_base;
-			} else
-				/* In case of error, the kfd_bos for some pdds
-				 * which are already allocated successfully
-				 * will be freed in upper level function
-				 * i.e. create_process().
-				 */
-				return ret;
-		} else {
-			offset = (dev->id |
-				KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT;
-			qpd->tba_addr = (uint64_t)vm_mmap(filep, 0,
+
+		offset = (dev->id | KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT;
+		qpd->tba_addr = (uint64_t)vm_mmap(filep, 0,
 				KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
 				MAP_SHARED, offset);
 
-			if (IS_ERR_VALUE(qpd->tba_addr)) {
-				pr_err("Failure to set tba address. error -%d.\n",
-					(int)qpd->tba_addr);
-				qpd->tba_addr = 0;
-				qpd->cwsr_kaddr = NULL;
-				return -ENOMEM;
-			}
+		if (IS_ERR_VALUE(qpd->tba_addr)) {
+			pr_err("Failure to set tba address. error -%d.\n",
+			       (int)qpd->tba_addr);
+			qpd->tba_addr = 0;
+			qpd->cwsr_kaddr = NULL;
+			return -ENOMEM;
 		}
 
 		memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
@@ -530,6 +499,38 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
 	return 0;
 }
 
+static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
+{
+	struct kfd_dev *dev = pdd->dev;
+	struct qcm_process_device *qpd = &pdd->qpd;
+	uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
+		ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_READONLY |
+		ALLOC_MEM_FLAGS_EXECUTE_ACCESS;
+	void *kaddr;
+	int ret;
+
+	if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base)
+		return 0;
+
+	/* cwsr_base is only set for dGPU */
+	ret = kfd_process_alloc_gpuvm(pdd->process, dev, qpd->cwsr_base,
+				      KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd,
+				      flags);
+	if (ret)
+		return ret;
+
+	qpd->cwsr_kaddr = kaddr;
+	qpd->tba_addr = qpd->cwsr_base;
+
+	memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+
+	qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+	pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+		 qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+
+	return 0;
+}
+
 static struct kfd_process *create_process(const struct task_struct *thread,
 					struct file *filep)
 {
@@ -586,10 +587,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
 	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
 	process->last_restore_timestamp = get_jiffies_64();
 
-	err = kfd_process_reserve_ib_mem(process);
-	if (err)
-		goto err_reserve_ib_mem;
-	err = kfd_process_init_cwsr(process, filep);
+	err = kfd_process_init_cwsr_apu(process, filep);
 	if (err)
 		goto err_init_cwsr;
 
@@ -601,7 +599,6 @@ static struct kfd_process *create_process(const struct task_struct *thread,
 	return process;
 
 err_init_cwsr:
-err_reserve_ib_mem:
 	kfd_process_free_outstanding_kfd_bos(process);
 	kfd_process_destroy_pdds(process);
 err_init_apertures:
@@ -685,12 +682,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 		goto err_create_pdd;
 	}
 
-	/* Create the GPUVM context for this specific device */
-	if (dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
-					&p->process_info, &p->ef)) {
-		pr_err("Failed to create process VM object\n");
-		goto err_create_pdd;
-	}
 	return pdd;
 
 err_create_pdd:
@@ -701,6 +692,43 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 	return NULL;
 }
 
+static int kfd_process_device_init_vm(struct kfd_process_device *pdd)
+{
+	struct kfd_process *p;
+	struct kfd_dev *dev;
+	int ret;
+
+	if (pdd->vm)
+		return 0;
+
+	p = pdd->process;
+	dev = pdd->dev;
+
+	ret = dev->kfd2kgd->create_process_vm(dev->kgd, &pdd->vm,
+					      &p->process_info, &p->ef);
+	if (ret) {
+		pr_err("Failed to create process VM object\n");
+		return ret;
+	}
+
+	ret = kfd_process_device_reserve_ib_mem(pdd);
+	if (ret)
+		goto err_reserve_ib_mem;
+	ret = kfd_process_device_init_cwsr_dgpu(pdd);
+	if (ret)
+		goto err_init_cwsr;
+
+	return 0;
+
+err_init_cwsr:
+err_reserve_ib_mem:
+	kfd_process_device_free_bos(pdd);
+	dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);
+	pdd->vm = NULL;
+
+	return ret;
+}
+
 /*
  * Direct the IOMMU to bind the process (specifically the pasid->mm)
  * to the device.
@@ -724,6 +752,10 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 	if (err)
 		return ERR_PTR(err);
 
+	err = kfd_process_device_init_vm(pdd);
+	if (err)
+		return ERR_PTR(err);
+
 	return pdd;
 }
 
-- 
2.7.4