aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1203-drm-amdkfd-Update-page-directory-address-in-qpd-afte.patch
blob: d7d327501a979529675a1159cdc1639770695d11 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
From f3dbd0a8f0c6f818ae28cf52aade9254ab778ed0 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 5 Oct 2016 14:48:03 -0400
Subject: [PATCH 1203/4131] drm/amdkfd: Update page directory address in qpd
 after restoring BOs

When restoring BOs, page_table_base may be different from before, so we
should update it in qpd after restoring BOs.

Moreover, page_table_base won't change during mapping BOs, and it is
used for queue management, rather than buffer management. So we should
not change page_table_base in qpd when mapping BOs.

Lastly, flushing TLB should be done only when unmapping or evicting
BOs, and it is not needed when mapping BOs. However, as there are some
bugs in PTE handling for mapping and unmapping, we choose to keep the
flushing as a workaround.

Change-Id: I996b95b617b1542eab6e0b921a509a59ddfcc708
Signed-off-by: Yong Zhao <yong.zhao@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           | 45 ++++++------
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 79 +++++++++-------------
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  2 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c           | 10 +--
 5 files changed, 63 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d44ee59..f56d24a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1399,25 +1399,16 @@ int kfd_map_memory_to_gpu(void *mem, struct kfd_process_device *pdd)
 	int err;
 	struct kfd_dev *dev = pdd->dev;
 
-	BUG_ON(!dev);
-	BUG_ON(!pdd);
-
 	err = dev->kfd2kgd->map_memory_to_gpu(
 		dev->kgd, (struct kgd_mem *) mem, pdd->vm);
 
-	if (err != 0)
-		return err;
-
+	/* Theoretically we don't need this flush. However, as there are
+	 * some bugs in our PTE handling for mapping and unmapping, we
+	 * need this flush to pass all the tests.
+	 */
 	kfd_flush_tlb(dev, pdd->process->pasid);
 
-	err = dev->dqm->ops.set_page_directory_base(dev->dqm, &pdd->qpd);
-	if (err != 0) {
-		dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd,
-				(struct kgd_mem *) mem, pdd->vm);
-		return err;
-	}
-
-	return 0;
+	return err;
 }
 
 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
@@ -1526,6 +1517,22 @@ static int kfd_ioctl_map_memory_to_gpu_wrapper(struct file *filep,
 	return kfd_ioctl_map_memory_to_gpu(filep, p, &new_args);
 }
 
+int kfd_unmap_memory_from_gpu(void *mem, struct kfd_process_device *pdd)
+{
+	int err;
+	struct kfd_dev *dev = pdd->dev;
+
+	err = dev->kfd2kgd->unmap_memory_to_gpu(
+		dev->kgd, (struct kgd_mem *) mem, pdd->vm);
+
+	if (err != 0)
+		return err;
+
+	kfd_flush_tlb(dev, pdd->process->pasid);
+
+	return 0;
+}
+
 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 					struct kfd_process *p, void *data)
 {
@@ -1594,15 +1601,11 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 				err = -EFAULT;
 				goto get_mem_obj_from_handle_failed;
 			}
-			peer->kfd2kgd->unmap_memory_to_gpu(peer->kgd,
-					mem, peer_pdd->vm);
-			kfd_flush_tlb(peer, p->pasid);
+			kfd_unmap_memory_from_gpu(mem, peer_pdd);
 		}
 		kfree(devices_arr);
-	} else {
-		dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm);
-		kfd_flush_tlb(dev, p->pasid);
-	}
+	} else
+		kfd_unmap_memory_from_gpu(mem, pdd);
 
 	return 0;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0873a19..0cf4a62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -130,11 +130,15 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
 	program_sh_mem_settings(dqm, qpd);
 
+	/* qpd->page_table_base is set earlier when register_process()
+	 * is called, i.e. when the first queue is created.
+	 */
 	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
-			allocated_vmid,
+			qpd->vmid,
 			qpd->page_table_base);
 	/*invalidate the VM context after pasid and vmid mapping is set up*/
 	kfd_flush_tlb(dqm->dev, qpd->pqm->process->pasid);
+
 	return 0;
 }
 
@@ -541,9 +545,15 @@ int process_restore_queues(struct device_queue_manager *dqm,
 	struct queue *q, *next;
 	struct mqd_manager *mqd;
 	int retval = 0;
+	struct kfd_process_device *pdd;
+	uint32_t pd_base;
 
 	BUG_ON(!dqm || !qpd);
 
+	pdd = qpd_to_pdd(qpd);
+	/* Retrieve PD base */
+	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
+
 	mutex_lock(&dqm->lock);
 	if (qpd->evicted == 0) /* already restored, do nothing */
 		goto out_unlock;
@@ -553,6 +563,19 @@ int process_restore_queues(struct device_queue_manager *dqm,
 		goto out_unlock;
 	}
 
+	/* Update PD Base in QPD */
+	qpd->page_table_base = pd_base;
+	pr_debug("Updated PD address to 0x%08x in %s\n", pd_base, __func__);
+
+	if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
+				dqm->dev->kgd,
+				qpd->vmid,
+				qpd->page_table_base);
+
+		kfd_flush_tlb(dqm->dev, pdd->process->pasid);
+	}
+
 	/* activate all active queues on the qpd */
 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
 		mqd = dqm->ops.get_mqd_manager(dqm,
@@ -590,9 +613,10 @@ int process_restore_queues(struct device_queue_manager *dqm,
 static int register_process(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd)
 {
-	struct kfd_process_device *pdd;
 	struct device_process_node *n;
 	int retval;
+	struct kfd_process_device *pdd;
+	uint32_t pd_base;
 
 	BUG_ON(!dqm || !qpd);
 
@@ -604,13 +628,16 @@ static int register_process(struct device_queue_manager *dqm,
 
 	n->qpd = qpd;
 
+	pdd = qpd_to_pdd(qpd);
+	/* Retrieve PD base */
+	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
+
 	mutex_lock(&dqm->lock);
 	list_add(&n->list, &dqm->queues);
 
-	pdd = qpd_to_pdd(qpd);
-	qpd->page_table_base =
-		dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
-	pr_debug("Retrieved PD address == 0x%08u\n", qpd->page_table_base);
+	/* Update PD Base in QPD */
+	qpd->page_table_base = pd_base;
+	pr_debug("Updated PD address to 0x%08x in %s\n", pd_base, __func__);
 
 	retval = dqm->asic_ops.update_qpd(dqm, qpd);
 
@@ -1349,44 +1376,6 @@ static int set_trap_handler(struct device_queue_manager *dqm,
 	return 0;
 }
 
-
-static int set_page_directory_base(struct device_queue_manager *dqm,
-					struct qcm_process_device *qpd)
-{
-	struct kfd_process_device *pdd;
-	uint32_t pd_base;
-	int retval = 0;
-
-	BUG_ON(!dqm || !qpd);
-
-	mutex_lock(&dqm->lock);
-
-	pdd = qpd_to_pdd(qpd);
-
-	/* Retrieve PD base */
-	pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
-
-	/* If it has not changed, just get out */
-	if (qpd->page_table_base == pd_base)
-		goto out;
-
-	/* Update PD Base in QPD */
-	qpd->page_table_base = pd_base;
-	pr_debug("Updated PD address == 0x%08u\n", pd_base);
-
-	/*
-	 * Preempt queues, destroy runlist and create new runlist. Queues
-	 * will have the update PD base address
-	 */
-	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
-		retval = execute_queues_cpsch(dqm, false);
-
-out:
-	mutex_unlock(&dqm->lock);
-
-	return retval;
-}
-
 static int process_termination_nocpsch(struct device_queue_manager *dqm,
 		struct qcm_process_device *qpd)
 {
@@ -1522,7 +1511,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
 		dqm->ops.set_trap_handler = set_trap_handler;
-		dqm->ops.set_page_directory_base = set_page_directory_base;
 		dqm->ops.process_termination = process_termination_cpsch;
 		break;
 	case KFD_SCHED_POLICY_NO_HWS:
@@ -1539,7 +1527,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.uninitialize = uninitialize_nocpsch;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
 		dqm->ops.set_trap_handler = set_trap_handler;
-		dqm->ops.set_page_directory_base = set_page_directory_base;
 		dqm->ops.process_termination = process_termination_nocpsch;
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 65e4c51c..05d0cc8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -134,8 +134,6 @@ struct device_queue_manager_ops {
 				    uint64_t tba_addr,
 				    uint64_t tma_addr);
 
-	int	(*set_page_directory_base)(struct device_queue_manager *dqm,
-					struct qcm_process_device *qpd);
 	int (*process_termination)(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd);
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2957184..107c573 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -749,6 +749,7 @@ struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
 
 /* kfd dgpu memory */
 int kfd_map_memory_to_gpu(void *mem, struct kfd_process_device *pdd);
+int kfd_unmap_memory_from_gpu(void *mem, struct kfd_process_device *pdd);
 
 /* Process device data iterator */
 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 5894142..b679ea7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -75,11 +75,11 @@ void kfd_process_destroy_wq(void)
 	}
 }
 
-static void kfd_process_free_gpuvm(struct kfd_dev *kdev, struct kgd_mem *mem,
-				void *vm)
+static void kfd_process_free_gpuvm(struct kgd_mem *mem,
+			struct kfd_process_device *pdd)
 {
-	kdev->kfd2kgd->unmap_memory_to_gpu(kdev->kgd, mem, vm);
-	kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem, vm);
+	kfd_unmap_memory_from_gpu(mem, pdd);
+	pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem, pdd->vm);
 }
 
 /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
@@ -127,7 +127,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
 	return err;
 
 free_gpuvm:
-	kfd_process_free_gpuvm(kdev, (struct kgd_mem *)mem, pdd->vm);
+	kfd_process_free_gpuvm(mem, pdd);
 	return err;
 
 err_map_mem:
-- 
2.7.4