aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.19.8/2762-drm-amdkfd-save-context-on-queue-suspend.patch
blob: 7639ea383f3550fcf9765652c08a56af436674ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
From 5d02c44069710220fd2315b85d6bfdcb8b85c03b Mon Sep 17 00:00:00 2001
From: Philip Cox <Philip.Cox@amd.com>
Date: Fri, 29 Mar 2019 16:53:04 -0400
Subject: [PATCH 2762/2940] drm/amdkfd: save context on queue suspend

This is a quick fix to save the queue context, and control stack to
the queue info area when we suspend a queue.  The context size, along
with the control stack size are also saved. between the context and
control stack areas.

Change-Id: Ie5b8773d33ac06c3c8da942abece23f00c73834b
Signed-off-by: Philip Cox <Philip.Cox@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 64 +++++++----------
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 70 ++++++++++++++++++-
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 26 ++++++-
 3 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 684f84f130a8..49d4f3cf5afd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2550,13 +2550,13 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
 	struct kfd_process_device *pdd;
 	int r = 0;
 	struct kfd_dev *dev;
-	struct kfd_process *process;
+	struct kfd_process *process = NULL;
+	struct pid *pid = NULL;
 	uint32_t gpu_id;
 	uint32_t debug_trap_action;
 	uint32_t data1;
 	uint32_t data2;
 	uint32_t data3;
-	struct pid *pid;
 
 	debug_trap_action = args->op;
 	gpu_id = args->gpu_id;
@@ -2583,7 +2583,27 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
 	}
 
 	mutex_lock(&p->mutex);
-	pdd = kfd_get_process_device_data(dev, p);
+
+	if (debug_trap_action == KFD_IOC_DBG_TRAP_NODE_SUSPEND ||
+		debug_trap_action == KFD_IOC_DBG_TRAP_NODE_RESUME) {
+
+		pid = find_get_pid(data1);
+		if (!pid) {
+			pr_err("Cannot find pid info for %i\n", data1);
+			r = -ESRCH;
+			goto unlock_out;
+		}
+
+		process = kfd_lookup_process_by_pid(pid);
+		if (!process) {
+			pr_err("Cannot find process info info for %i\n", data1);
+			r = -ESRCH;
+			goto unlock_out;
+		}
+		pdd = kfd_get_process_device_data(dev, process);
+	} else {
+		pdd = kfd_get_process_device_data(dev, p);
+	}
 	if (!pdd) {
 		r = -EINVAL;
 		goto unlock_out;
@@ -2654,21 +2674,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
 				dev->vm_info.last_vmid_kfd);
 		break;
 	case KFD_IOC_DBG_TRAP_NODE_SUSPEND:
-		pid = find_get_pid(data1);
-		if (!pid) {
-			pr_err("Cannot find pid info for %i\n", data1);
-			r = -ESRCH;
-			goto unlock_out;
-		}
-
-		process = kfd_lookup_process_by_pid(pid);
-		if (!process) {
-			pr_err("Cannot find process info info for %i\n", data1);
-			r = -ESRCH;
-			put_pid(pid);
-			goto unlock_out;
-		}
-
 		/*
 		 * To suspend/resume queues, we need:
 		 *  ptrace to be enabled,
@@ -2687,25 +2692,8 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
 			pr_err("Cannot debug process to suspend queues\n");
 			r = -ESRCH;
 		}
-		kfd_unref_process(process);
-		put_pid(pid);
 		break;
 	case KFD_IOC_DBG_TRAP_NODE_RESUME:
-		pid = find_get_pid(data1);
-		if (!pid) {
-			pr_err("Cannot find pid info for %i\n", data1);
-			r = -ESRCH;
-			goto unlock_out;
-		}
-
-		process = kfd_lookup_process_by_pid(pid);
-		if (!process) {
-			pr_err("Cannot find process info info for %i\n", data1);
-			r = -ESRCH;
-			put_pid(pid);
-			goto unlock_out;
-		}
-
 		/*
 		 * To suspend/resume queues, we need:
 		 *  ptrace to be enabled,
@@ -2724,8 +2712,6 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
 			pr_err("Cannot debug process to resume queues\n");
 			r = -ESRCH;
 		}
-		kfd_unref_process(process);
-		put_pid(pid);
 		break;
 	default:
 		pr_err("Invalid option: %i\n", debug_trap_action);
@@ -2747,6 +2733,10 @@ static int kfd_ioctl_dbg_set_debug_trap(struct file *filep,
 	}
 
 unlock_out:
+	if (pid)
+		put_pid(pid);
+	if (process)
+		kfd_unref_process(process);
 	mutex_unlock(&p->mutex);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 5b0fbf7ba659..525dea08d208 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -25,6 +25,7 @@
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/list.h>
+#include <linux/mmu_context.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/sched.h>
@@ -1976,6 +1977,52 @@ int release_debug_trap_vmid(struct device_queue_manager *dqm)
 	return r;
 }
 
+
+struct copy_context_work_handler_workarea {
+	struct work_struct copy_context_work;
+	struct device_queue_manager *dqm;
+	struct qcm_process_device *qpd;
+	struct mm_struct *mm;
+};
+
+void copy_context_work_handler (struct work_struct *work)
+{
+	struct copy_context_work_handler_workarea *workarea;
+	struct mqd_manager *mqd_mgr;
+	struct qcm_process_device *qpd;
+	struct device_queue_manager *dqm;
+	struct queue *q;
+	uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size;
+
+	workarea = container_of(work,
+			struct copy_context_work_handler_workarea,
+			copy_context_work);
+
+	qpd = workarea->qpd;
+	dqm = workarea->dqm;
+	use_mm(workarea->mm);
+
+
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
+
+		/* We ignore the return value from get_wave_state because
+		 * i) right now, it always returns 0, and
+		 * ii) if we hit an error, we would continue to the next queue
+		 *     anyway.
+		 */
+		mqd_mgr->get_wave_state(mqd_mgr,
+				q->mqd,
+				(void __user *)	q->properties.ctx_save_restore_area_address,
+				&tmp_ctl_stack_used_size,
+				&tmp_save_area_used_size);
+	}
+
+	unuse_mm(workarea->mm);
+}
+
+
+
 int suspend_queues(struct device_queue_manager *dqm,
 			struct kfd_process *p,
 			uint32_t flags)
@@ -1984,6 +2031,9 @@ int suspend_queues(struct device_queue_manager *dqm,
 	struct kfd_dev *dev;
 	struct kfd_process_device *pdd;
 
+	bool queues_suspended = false;
+	struct copy_context_work_handler_workarea copy_context_worker;
+
 	dev = dqm->dev;
 
 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
@@ -1991,8 +2041,21 @@ int suspend_queues(struct device_queue_manager *dqm,
 			r = pdd->dev->dqm->ops.evict_process_queues(
 					pdd->dev->dqm,
 					&pdd->qpd);
-			if (r)
+			if (r) {
 				pr_err("Failed to suspend process queues\n");
+				break;
+			}
+
+			copy_context_worker.qpd = &pdd->qpd;
+			copy_context_worker.dqm = dqm;
+			copy_context_worker.mm = get_task_mm(p->lead_thread);
+			queues_suspended = true;
+
+			INIT_WORK_ONSTACK(
+					&copy_context_worker.copy_context_work,
+					copy_context_work_handler);
+
+			schedule_work(&copy_context_worker.copy_context_work);
 			break;
 		}
 	}
@@ -2001,6 +2064,11 @@ int suspend_queues(struct device_queue_manager *dqm,
 	if (!r && flags & KFD__DBG_NODE_SUSPEND_MEMORY_FENCE)
 		amdgpu_amdkfd_debug_mem_fence(dev->kgd);
 
+	if (queues_suspended) {
+		flush_work(&copy_context_worker.copy_context_work);
+		mmput(copy_context_worker.mm);
+		destroy_work_on_stack(&copy_context_worker.copy_context_work);
+	}
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 4ca984653ae6..b6c312f7ce7e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -337,13 +337,17 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
 		mm->dev->kgd, queue_address,
 		pipe_id, queue_id);
 }
-
 static int get_wave_state(struct mqd_manager *mm, void *mqd,
 			  void __user *ctl_stack,
 			  u32 *ctl_stack_used_size,
 			  u32 *save_area_used_size)
 {
+	void __user *user_data_ptr;
 	struct v9_mqd *m;
+	struct {
+		uint32_t ctl_stack_size;
+		uint32_t save_area_size;
+	} user_data;
 
 	/* Control stack is located one page after MQD. */
 	void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
@@ -351,11 +355,27 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
 	m = get_mqd(mqd);
 
 	*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
-		m->cp_hqd_cntl_stack_offset;
+		m->cp_hqd_cntl_stack_offset + sizeof(user_data);
 	*save_area_used_size = m->cp_hqd_wg_state_offset -
 		m->cp_hqd_cntl_stack_size;
 
-	if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
+	/* To avoid breaking existing tools reading the control stack,
+	 * set the IS_EVENT and IS_STATE bits to the sizes so that they
+	 * are ignored if read as COMPUTE_RELAUNCH register.
+	 */
+	user_data.ctl_stack_size = 0xC0000000 | *ctl_stack_used_size;
+	user_data.save_area_size = 0xC0000000 | *save_area_used_size;
+
+	/* The user ctl_stack_size and save_area size are located
+	 * right below the start of the context save area.
+	 */
+	user_data_ptr = (void __user *)((uintptr_t)ctl_stack
+		+ m->cp_hqd_cntl_stack_size - sizeof(user_data));
+
+	if (copy_to_user(ctl_stack,
+			 (void *)((uintptr_t) mqd_ctl_stack + sizeof(user_data)),
+			 m->cp_hqd_cntl_stack_size - sizeof(user_data))
+		|| copy_to_user(user_data_ptr, &user_data, sizeof(user_data)))
 		return -EFAULT;
 
 	return 0;
-- 
2.17.1