meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1171-drm-amdkfd-Adjust-kernel-managed-memory-allocation-m.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

From 69736d7a9a0eff5bed67c00629edc2d67147b9f6 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Tue, 9 Aug 2016 13:06:34 -0400
Subject: [PATCH 1171/4131] drm/amdkfd: Adjust kernel managed memory allocation
 method

This commit simplifies the kernel managed memory allocation function.
In the new code, we use kfd_processes_mutex to ensure exclusiveness and
thus get rid of p->lock when calling into kgd.

Moreover, a bug is fixed that when kfd_process_init_cwsr() or
kfd_process_reserve_ib_mem() fails, the user space still think no error
happens.

Change-Id: I8b2d3cd0616d4795189d761690ae0648e74c9c4e
Signed-off-by: Yong Zhao <yong.zhao@amd.com>

 Conflicts:
	drivers/gpu/drm/amd/amdkfd/kfd_process.c
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 142 +++++++++++++++----------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 65099bb..ca4ed91 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -57,7 +57,8 @@ static struct workqueue_struct *kfd_process_wq;
 static struct kfd_process *find_process(const struct task_struct *thread,
 		bool lock);
 static void kfd_process_ref_release(struct kref *ref);
-static struct kfd_process *create_process(const struct task_struct *thread);
+static struct kfd_process *create_process(const struct task_struct *thread,
+					struct file *filep);
 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
 
 
@@ -83,24 +84,23 @@ static void kfd_process_free_gpuvm(struct kfd_dev *kdev, struct kgd_mem *mem,
 }
 
 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
- *	During the memory allocation of GPU, we can't hold the process lock.
- *	There's a chance someone else allocates the memory during the lock
- *	released time. In that case, -EINVAL is returned but kptr remains so
- *	the caller knows the memory is allocated (by someone else) and
- *	available to use.
+ *	This function should be only called right after the process
+ *	is created and when kfd_processes_mutex is still being held
+ *	to avoid concurrency. Because of that exclusiveness, we do
+ *	not need to take p->lock. Because kfd_processes_mutex instead
+ *	of p->lock is held, we do not need to release the lock when
+ *	calling into kgd through functions such as alloc_memory_of_gpu()
+ *	etc.
  */
 static int kfd_process_alloc_gpuvm(struct kfd_process *p,
 		struct kfd_dev *kdev, uint64_t gpu_va, uint32_t size,
-		void *vm, void **kptr, struct kfd_process_device *pdd,
-		uint64_t *addr_to_assign)
+		void **kptr, struct kfd_process_device *pdd)
 {
 	int err;
 	void *mem = NULL;
 
-	/* can't hold the process lock while allocating from KGD */
-	up_write(&p->lock);
-
-	err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, vm,
+	err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
+				pdd->vm,
 				(struct kgd_mem **)&mem, NULL, kptr, pdd,
 				ALLOC_MEM_FLAGS_GTT |
 				ALLOC_MEM_FLAGS_NONPAGED |
@@ -113,37 +113,28 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
 	if (err)
 		goto err_map_mem;
 
-	down_write(&p->lock);
-	/* Check if someone else allocated the memory while we weren't looking
+	/* Create an obj handle so kfd_process_device_remove_obj_handle
+	 * will take care of the bo removal when the process finishes.
+	 * We do not need to take p->lock, because the process is just
+	 * created and the ioctls have not had the chance to run.
 	 */
-	if (*addr_to_assign) {
-		err = -EINVAL;
+	if (kfd_process_device_create_obj_handle(
+			pdd, mem, gpu_va, size) < 0) {
+		err = -ENOMEM;
+		*kptr = NULL;
 		goto free_gpuvm;
-	} else {
-		/* Create an obj handle so kfd_process_device_remove_obj_handle
-		 * will take care of the bo removal when the process finishes
-		 */
-		if (kfd_process_device_create_obj_handle(
-				pdd, mem, gpu_va, size) < 0) {
-			err = -ENOMEM;
-			*kptr = NULL;
-			goto free_gpuvm;
-		}
 	}
 
 	return err;
 
 free_gpuvm:
-	up_write(&p->lock);
 	kfd_process_free_gpuvm(kdev, (struct kgd_mem *)mem, pdd->vm);
-	down_write(&p->lock);
 	return err;
 
 err_map_mem:
 	kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem);
 err_alloc_mem:
 	*kptr = NULL;
-	down_write(&p->lock);
 	return err;
 }
 
@@ -161,7 +152,6 @@ static int kfd_process_reserve_ib_mem(struct kfd_process *p)
 	struct qcm_process_device *qpd = NULL;
 	void *kaddr;
 
-	down_write(&p->lock);
 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
 				per_device_list) {
 		kdev = pdd->dev;
@@ -171,21 +161,23 @@ static int kfd_process_reserve_ib_mem(struct kfd_process *p)
 
 		if (qpd->ib_base) { /* is dGPU */
 			err = kfd_process_alloc_gpuvm(p, kdev,
-				qpd->ib_base, kdev->ib_size, pdd->vm,
-				&kaddr, pdd, (uint64_t *)&qpd->ib_kaddr);
+				qpd->ib_base, kdev->ib_size,
+				&kaddr, pdd);
 			if (!err)
 				qpd->ib_kaddr = kaddr;
-			else if (qpd->ib_kaddr)
-				err = 0;
 			else
-				err = -ENOMEM;
+				goto err_out;
 		} else {
 			/* FIXME: Support APU */
-			err = -ENOMEM;
+			continue;
 		}
 	}
 
-	up_write(&p->lock);
+err_out:
+	/* In case of error, the kfd_bos for some pdds which are already
+	 * allocated successfully will be freed in upper level function
+	 * i.e. create_process().
+	 */
 	return err;
 }
 
@@ -204,9 +196,6 @@ struct kfd_process *kfd_create_process(struct file *filep)
 	if (thread->group_leader->mm != thread->mm)
 		return ERR_PTR(-EINVAL);
 
-	/* Take mmap_sem because we call __mmu_notifier_register inside */
-	down_write(&thread->mm->mmap_sem);
-
 	/*
 	 * take kfd processes mutex before starting of process creation
 	 * so there won't be a case where two threads of the same process
@@ -218,17 +207,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
 	process = find_process(thread, false);
 	if (process)
 		pr_debug("kfd: process already found\n");
-
-	if (!process)
-		process = create_process(thread);
+	else
+		process = create_process(thread, filep);
 
 	mutex_unlock(&kfd_processes_mutex);
 
-	up_write(&thread->mm->mmap_sem);
-
-	kfd_process_init_cwsr(process, filep);
-	kfd_process_reserve_ib_mem(process);
-
 	return process;
 }
 
@@ -588,33 +571,30 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 
 static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
 {
-	int err;
+	int err = 0;
 	unsigned long  offset;
 	struct kfd_process_device *temp, *pdd = NULL;
 	struct kfd_dev *dev = NULL;
 	struct qcm_process_device *qpd = NULL;
+	void *kaddr;
 
-	down_write(&p->lock);
 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
 				per_device_list) {
 		dev = pdd->dev;
 		qpd = &pdd->qpd;
-		if (!dev->cwsr_enabled || qpd->tba_addr)
+		if (!dev->cwsr_enabled || qpd->cwsr_kaddr)
 			continue;
 		if (qpd->cwsr_base) {
 			/* cwsr_base is only set for DGPU */
-
 			err = kfd_process_alloc_gpuvm(p, dev, qpd->cwsr_base,
-					dev->cwsr_size, pdd->vm,
-					&qpd->cwsr_kaddr, pdd, &qpd->tba_addr);
+					dev->cwsr_size,	&kaddr, pdd);
 			if (!err) {
+				qpd->cwsr_kaddr = kaddr;
 				memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages),
 				       PAGE_SIZE);
 				kunmap(dev->cwsr_pages);
 				qpd->tba_addr = qpd->cwsr_base;
-			} else if (qpd->cwsr_kaddr)
-				err = 0;
-			else
+			} else
 				goto out;
 		} else {
 			offset = (kfd_get_gpu_id(dev) |
@@ -622,25 +602,33 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
 			qpd->tba_addr = (uint64_t)vm_mmap(filep, 0,
 				dev->cwsr_size,	PROT_READ | PROT_EXEC,
 				MAP_SHARED, offset);
-			qpd->cwsr_kaddr = (void *)qpd->tba_addr;
+
+			if (IS_ERR_VALUE(qpd->tba_addr)) {
+				pr_err("Failure to set tba address. error -%d.\n",
+					(int)qpd->tba_addr);
+				qpd->tba_addr = 0;
+				qpd->cwsr_kaddr = NULL;
+				err = -ENOMEM;
+				goto out;
+			} else
+				qpd->cwsr_kaddr = (void *)qpd->tba_addr;
 		}
-		if (IS_ERR_VALUE(qpd->tba_addr)) {
-			pr_err("Failure to set tba address. error -%d.\n",
-				(int)qpd->tba_addr);
-			qpd->tba_addr = 0;
-			qpd->cwsr_kaddr = NULL;
-		} else
-			qpd->tma_addr = qpd->tba_addr + dev->tma_offset;
-			pr_debug("set tba :0x%llx, tma:0x%llx for pqm.\n",
-				qpd->tba_addr, qpd->tma_addr);
+
+		qpd->tma_addr = qpd->tba_addr + dev->tma_offset;
+		pr_debug("set tba :0x%llx, tma:0x%llx for pqm.\n",
+			qpd->tba_addr, qpd->tma_addr);
 	}
 
 out:
-	up_write(&p->lock);
+	/* In case of error, the kfd_bos for some pdds which are already
+	 * allocated successfully will be freed in upper level function
+	 * i.e. create_process().
+	 */
 	return err;
 }
 
-static struct kfd_process *create_process(const struct task_struct *thread)
+static struct kfd_process *create_process(const struct task_struct *thread,
+					struct file *filep)
 {
 	struct kfd_process *process;
 	int err = -ENOMEM;
@@ -663,7 +651,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 
 	/* register notifier */
 	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
-	err = __mmu_notifier_register(&process->mmu_notifier, process->mm);
+	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
 	if (err)
 		goto err_mmu_notifier;
 
@@ -687,14 +675,26 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (err != 0)
 		goto err_init_apertures;
 
+	err = kfd_process_reserve_ib_mem(process);
+	if (err)
+		goto err_reserve_ib_mem;
+	err = kfd_process_init_cwsr(process, filep);
+	if (err)
+		goto err_init_cwsr;
+
 	return process;
 
+err_init_cwsr:
+err_reserve_ib_mem:
+	kfd_process_free_outstanding_kfd_bos(process);
+	kfd_process_destroy_pdds(process);
 err_init_apertures:
 	pqm_uninit(&process->pqm);
 err_process_pqm_init:
 	hash_del_rcu(&process->kfd_processes);
 	synchronize_rcu();
-	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
+	mmu_notifier_unregister_no_release(&process->mmu_notifier,
+					process->mm);
 err_mmu_notifier:
 	kfd_pasid_free(process->pasid);
 err_alloc_pasid:
-- 
2.7.4