aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/2855-drm-amdkfd-Hardware-DWORD-size-is-4-bytes.patch
blob: bb9b2f5982d297c786b1588ed573dcd67519b2bc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
From 7787d21b7ec2cc40a0849a6b8d58ec3fe845e2b1 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 2 Nov 2017 16:43:36 -0400
Subject: [PATCH 2855/4131] drm/amdkfd: Hardware DWORD size is 4 bytes

Don't use sizeof(uint32_t) or similar types for hardware or firmware
DWORD size. The hardware and firmware don't care about Linux types.

Change-Id: Idd093ba19276c9207dc9e9083d8d0dcd93f4a2c3
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c          | 14 +++++---------
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c    |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 10 ++++------
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c  |  7 +++----
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |  9 ++++-----
 6 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index af6d736..df9b346 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -97,7 +97,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
 
 	ib_packet->control = (1 << 23) | (1 << 31) |
-			((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+			((size_in_bytes / 4) & 0xfffff);
 
 	ib_packet->bitfields5.pasid = pasid;
 
@@ -133,8 +133,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 
 	rm_packet->header.opcode = IT_RELEASE_MEM;
 	rm_packet->header.type = PM4_TYPE_3;
-	rm_packet->header.count = sizeof(struct pm4_mec_release_mem) /
-					sizeof(unsigned int) - 2;
+	rm_packet->header.count = sizeof(struct pm4_mec_release_mem) / 4 - 2;
 
 	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
 	rm_packet->bitfields2.event_index =
@@ -667,8 +666,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
 	packets_vec[0].header.type = PM4_TYPE_3;
 	packets_vec[0].bitfields2.reg_offset =
-			GRBM_GFX_INDEX / (sizeof(uint32_t)) -
-				USERCONFIG_REG_BASE;
+			GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 
 	packets_vec[0].bitfields2.insert_vmid = 0;
 	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
@@ -676,8 +674,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	packets_vec[1].header.count = 1;
 	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
 	packets_vec[1].header.type = PM4_TYPE_3;
-	packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
-						AMD_CONFIG_REG_BASE;
+	packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
 
 	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
 	packets_vec[1].bitfields2.insert_vmid = 1;
@@ -693,8 +690,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 
 	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
 	packets_vec[2].bitfields2.reg_offset =
-				GRBM_GFX_INDEX / (sizeof(uint32_t)) -
-					USERCONFIG_REG_BASE;
+				GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
 
 	packets_vec[2].bitfields2.insert_vmid = 0;
 	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 047b048..8cf9d44 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -221,7 +221,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	wptr = kq->pending_wptr;
 	wptr64 = kq->pending_wptr64;
 	queue_address = (unsigned int *)kq->pq_kernel_addr;
-	queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
+	queue_size_dwords = kq->queue->properties.queue_size / 4;
 
 	pr_debug("rptr: %d\n", rptr);
 	pr_debug("wptr: %d\n", wptr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index ecf4a33..9022ecb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -106,7 +106,7 @@ unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
 
 	header.u32All = 0;
 	header.opcode = opcode;
-	header.count = packet_size/sizeof(uint32_t) - 2;
+	header.count = packet_size / 4 - 2;
 	header.type = PM4_TYPE_3;
 
 	return header.u32All;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 5398705..a628e28 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -217,7 +217,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
 {
 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
-	uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
 
 	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
 					  (uint32_t __user *)p->write_ptr,
@@ -251,8 +251,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 	 * Calculating queue size which is log base 2 of actual queue size -1
 	 * dwords and another -1 for ffs
 	 */
-	m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
-								- 1 - 1;
+	m->cp_hqd_pq_control |= ffs(q->queue_size / 4) - 1 - 1;
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
@@ -293,7 +292,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 	struct cik_sdma_rlc_registers *m;
 
 	m = get_sdma_mqd(mqd);
-	m->sdma_rlc_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+	m->sdma_rlc_rb_cntl = (ffs(q->queue_size / 4) - 1)
 			<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
 			q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
 			1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
@@ -434,8 +433,7 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
 	 * Calculating queue size which is log base 2 of actual queue
 	 * size -1 dwords
 	 */
-	m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int))
-								- 1 - 1;
+	m->cp_hqd_pq_control |= ffs(q->queue_size / 4) - 1 - 1;
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 30edcda..e65725b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -203,8 +203,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
 	m = get_mqd(mqd);
 
 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
-	m->cp_hqd_pq_control |=
-			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+	m->cp_hqd_pq_control |= ffs(q->queue_size / 4) - 1 - 1;
 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
 
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -231,7 +230,7 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
 	 * is safe, giving a maximum field value of 0xA.
 	 */
 	m->cp_hqd_eop_control = min(0xA,
-		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+		ffs(q->eop_ring_buffer_size / 4) - 1 - 1);
 	m->cp_hqd_eop_base_addr_lo =
 			lower_32_bits(q->eop_ring_buffer_address >> 8);
 	m->cp_hqd_eop_base_addr_hi =
@@ -402,7 +401,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 	struct v9_sdma_mqd *m;
 
 	m = get_sdma_mqd(mqd);
-	m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+	m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / 4) - 1)
 		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
 		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
 		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 973ea94..2e6fc62 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -191,7 +191,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
 {
 	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
 	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
-	uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1);
 
 	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
 					  (uint32_t __user *)p->write_ptr,
@@ -209,8 +209,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
 			atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
 			mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
-	m->cp_hqd_pq_control |=
-			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+	m->cp_hqd_pq_control |=	ffs(q->queue_size / 4) - 1 - 1;
 	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
 
 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
@@ -242,7 +241,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 	 * is safe, giving a maximum field value of 0xA.
 	 */
 	m->cp_hqd_eop_control |= min(0xA,
-		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
+		ffs(q->eop_ring_buffer_size / 4) - 1 - 1);
 	m->cp_hqd_eop_base_addr_lo =
 			lower_32_bits(q->eop_ring_buffer_address >> 8);
 	m->cp_hqd_eop_base_addr_hi =
@@ -414,7 +413,7 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 	struct vi_sdma_mqd *m;
 
 	m = get_sdma_mqd(mqd);
-	m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / sizeof(unsigned int)) - 1)
+	m->sdmax_rlcx_rb_cntl = (ffs(q->queue_size / 4) - 1)
 		<< SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
 		q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
 		1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
-- 
2.7.4