aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1502-drm-amdkfd-Fix-CU-masking-for-more-than-32-CUs.patch
blob: 1e4795d148e4ef9f6c9cdc8c981a3420d40e7634 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
From c95a7ced9bd72a1f224ed09dac2d5dbe2a75a1c6 Mon Sep 17 00:00:00 2001
From: Kent Russell <kent.russell@amd.com>
Date: Thu, 18 Aug 2016 13:15:39 -0400
Subject: [PATCH 1502/4131] drm/amdkfd Fix CU masking for more than 32 CUs

The initial implementation of CU masking used a single uint32 instead of
an array, which limited the number of CUs that could be masked to 32.
Match the thunk/kernel spec and pass in the cu_mask_count (number of
bits total) and a uint32 array to mask properly.
BUG:KFD-277

Change-Id: I61d17685809d9beb62fdc9a47a1c19d8a2107a54
Signed-off-by: Kent Russell <kent.russell@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           | 36 +++++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c      |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   | 59 +++++++++++-----------
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c    | 59 +++++++++++-----------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  7 +++
 6 files changed, 101 insertions(+), 64 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 831f63f..c144752 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -391,14 +391,44 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
 					void *data)
 {
 	int retval;
+	const int max_num_cus = 1024;
 	struct kfd_ioctl_set_cu_mask_args *args = data;
 	struct queue_properties properties;
 	uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
+	size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
 
-	if (get_user(properties.cu_mask, cu_mask_ptr))
+	if ((args->num_cu_mask % 32) != 0) {
+		pr_debug("kfd: num_cu_mask (0x%x) must be a multiple of 32",
+				args->num_cu_mask);
+		return -EINVAL;
+	}
+
+	properties.cu_mask_count = args->num_cu_mask;
+	if (properties.cu_mask_count == 0) {
+		pr_debug("kfd: CU Mask cannot be 0");
+		return -EINVAL;
+	}
+
+	/* To prevent an unreasonably large CU mask size, set an arbitrary
+	 * limit of max_num_cus bits.  We can then just drop any CU mask bits
+	 * past max_num_cus bits and just use the first max_num_cus bits.
+	 */
+	if (properties.cu_mask_count > max_num_cus) {
+		pr_debug("kfd: CU mask cannot be greater than 1024 bits");
+		properties.cu_mask_count = max_num_cus;
+		cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
+	}
+
+	properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
+	if (!properties.cu_mask)
+		return -ENOMEM;
+
+	retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
+	if (retval) {
+		pr_debug("kfd: Could not copy cu mask from userspace");
+		kfree(properties.cu_mask);
 		return -EFAULT;
-	if (properties.cu_mask == 0)
-		return 0;
+	}
 
 	down_write(&p->lock);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 162a83f..f19f2b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -123,6 +123,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 	prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
 	prop.eop_ring_buffer_address = kq->eop_gpu_addr;
 	prop.eop_ring_buffer_size = PAGE_SIZE;
+	prop.cu_mask = NULL;
 
 	if (init_queue(&kq->queue, &prop) != 0)
 		goto err_init_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 1badce1..959a7f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -48,12 +48,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 {
 	struct cik_mqd *m;
 	struct kfd_cu_info cu_info;
-	uint32_t mgmt_se_mask;
-	uint32_t cu_sh_mask, cu_sh_shift;
-	uint32_t cu_mask;
-	int se, sh;
+	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+	uint32_t cu_mask_count = q->cu_mask_count;
+	const uint32_t *cu_mask = q->cu_mask;
+	int se, cu_per_sh, cu_index, i;
 
-	if (q->cu_mask == 0)
+	if (WARN_ON(cu_mask_count == 0))
 		return;
 
 	m = get_mqd(mqd);
@@ -63,32 +63,31 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 	m->compute_static_thread_mgmt_se3 = 0;
 
 	mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);
-	cu_mask = q->cu_mask;
-	for (se = 0; se < cu_info.num_shader_engines && cu_mask; se++) {
-		mgmt_se_mask = 0;
-		for (sh = 0; sh < 2 && cu_mask; sh++) {
-			cu_sh_shift = hweight32(cu_info.cu_bitmap[se][sh]);
-			cu_sh_mask = (1 << cu_sh_shift) - 1;
-			mgmt_se_mask |= (cu_mask & cu_sh_mask) << (sh * 16);
-			cu_mask >>= cu_sh_shift;
-		}
-		switch (se) {
-		case 0:
-			m->compute_static_thread_mgmt_se0 = mgmt_se_mask;
-			break;
-		case 1:
-			m->compute_static_thread_mgmt_se1 = mgmt_se_mask;
-			break;
-		case 2:
-			m->compute_static_thread_mgmt_se2 = mgmt_se_mask;
-			break;
-		case 3:
-			m->compute_static_thread_mgmt_se3 = mgmt_se_mask;
-			break;
-		default:
-			break;
-		}
+
+	/* If # CU mask bits > # CUs, set it to the # of CUs */
+	if (cu_mask_count > cu_info.cu_active_number)
+		cu_mask_count = cu_info.cu_active_number;
+
+	cu_index = 0;
+	for (se = 0; se < cu_info.num_shader_engines; se++) {
+		cu_per_sh = 0;
+
+		/* Get the number of CUs on this Shader Engine */
+		for (i = 0; i < 4; i++)
+			cu_per_sh += hweight32(cu_info.cu_bitmap[se][i]);
+
+		se_mask[se] = cu_mask[cu_index / 32] >> (cu_index % 32);
+		if ((cu_per_sh + (cu_index % 32)) > 32)
+			se_mask[se] |= cu_mask[(cu_index / 32) + 1]
+					<< (32 - (cu_index % 32));
+		se_mask[se] &= (1 << cu_per_sh) - 1;
+		cu_index += cu_per_sh;
 	}
+	m->compute_static_thread_mgmt_se0 = se_mask[0];
+	m->compute_static_thread_mgmt_se1 = se_mask[1];
+	m->compute_static_thread_mgmt_se2 = se_mask[2];
+	m->compute_static_thread_mgmt_se3 = se_mask[3];
+
 	pr_debug("kfd: update cu mask to %#x %#x %#x %#x\n",
 		m->compute_static_thread_mgmt_se0,
 		m->compute_static_thread_mgmt_se1,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index d78964c..59bc27e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -48,12 +48,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 {
 	struct vi_mqd *m;
 	struct kfd_cu_info cu_info;
-	uint32_t mgmt_se_mask;
-	uint32_t cu_sh_mask, cu_sh_shift;
-	uint32_t cu_mask;
-	int se, sh;
+	uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
+	uint32_t cu_mask_count = q->cu_mask_count;
+	const uint32_t *cu_mask = q->cu_mask;
+	int se, cu_per_sh, cu_index, i;
 
-	if (q->cu_mask == 0)
+	if (WARN_ON(cu_mask_count == 0))
 		return;
 
 	m = get_mqd(mqd);
@@ -63,32 +63,31 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
 	m->compute_static_thread_mgmt_se3 = 0;
 
 	mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info);
-	cu_mask = q->cu_mask;
-	for (se = 0; se < cu_info.num_shader_engines && cu_mask; se++) {
-		mgmt_se_mask = 0;
-		for (sh = 0; sh < 2 && cu_mask; sh++) {
-			cu_sh_shift = hweight32(cu_info.cu_bitmap[se][sh]);
-			cu_sh_mask = (1 << cu_sh_shift) - 1;
-			mgmt_se_mask |= (cu_mask & cu_sh_mask) << (sh * 16);
-			cu_mask >>= cu_sh_shift;
-		}
-		switch (se) {
-		case 0:
-			m->compute_static_thread_mgmt_se0 = mgmt_se_mask;
-			break;
-		case 1:
-			m->compute_static_thread_mgmt_se1 = mgmt_se_mask;
-			break;
-		case 2:
-			m->compute_static_thread_mgmt_se2 = mgmt_se_mask;
-			break;
-		case 3:
-			m->compute_static_thread_mgmt_se3 = mgmt_se_mask;
-			break;
-		default:
-			break;
-		}
+
+	/* If # CU mask bits > # CUs, set it to the # of CUs */
+	if (cu_mask_count > cu_info.cu_active_number)
+		cu_mask_count = cu_info.cu_active_number;
+
+	cu_index = 0;
+	for (se = 0; se < cu_info.num_shader_engines; se++) {
+		cu_per_sh = 0;
+
+		/* Get the number of CUs on this Shader Engine */
+		for (i = 0; i < 4; i++)
+			cu_per_sh += hweight32(cu_info.cu_bitmap[se][i]);
+
+		se_mask[se] = cu_mask[cu_index / 32] >> (cu_index % 32);
+		if ((cu_per_sh + (cu_index % 32)) > 32)
+			se_mask[se] |= cu_mask[(cu_index / 32) + 1]
+					<< (32 - (cu_index % 32));
+		se_mask[se] &= (1 << cu_per_sh) - 1;
+		cu_index += cu_per_sh;
 	}
+	m->compute_static_thread_mgmt_se0 = se_mask[0];
+	m->compute_static_thread_mgmt_se1 = se_mask[1];
+	m->compute_static_thread_mgmt_se2 = se_mask[2];
+	m->compute_static_thread_mgmt_se3 = se_mask[3];
+
 	pr_debug("kfd: update cu mask to %#x %#x %#x %#x\n",
 		m->compute_static_thread_mgmt_se0,
 		m->compute_static_thread_mgmt_se1,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2bfe761..0a2afa7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -425,7 +425,8 @@ struct queue_properties {
 	uint64_t tba_addr;
 	uint64_t tma_addr;
 	/* Relevant for CU */
-	uint32_t cu_mask;
+	uint32_t cu_mask_count; /* Must be a multiple of 32 */
+	uint32_t *cu_mask;
 };
 
 /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index cf08e824..b68776e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -337,6 +337,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 
 	if (pqn->q) {
 		dqm = pqn->q->device->dqm;
+		kfree(pqn->q->properties.cu_mask);
 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
 		if (retval != 0) {
 			if (retval == -ETIME)
@@ -400,6 +401,12 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
 		return -EFAULT;
 	}
 
+	/* Free the old CU mask memory if it is already allocated, then
+	 * allocate memory for the new CU mask.
+	 */
+	kfree(pqn->q->properties.cu_mask);
+
+	pqn->q->properties.cu_mask_count = p->cu_mask_count;
 	pqn->q->properties.cu_mask = p->cu_mask;
 
 	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
-- 
2.7.4