aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1500-drm-amdkfd-Fix-IB-freeing-without-DIQ-synchronizatio.patch
blob: b597818ae267ce04d9547fbabf665baa8b6271e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
From 1ed8a837fbd6c172428217916db653e5f3499586 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 7 Sep 2016 18:06:52 -0400
Subject: [PATCH 1500/4131] drm/amdkfd: Fix IB freeing without DIQ
 synchronization

When DIQ IBs are submitted without synchronization, it's not safe
to release the IB memory. Avoid the need to explicitly free the IB
by allocating it inline in the ring buffer, packaged inside a NOP
packet.

Change-Id: Ife4d527fbcca369bdb45d5a09b1ae72da3231045
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c       | 25 ++++++++-----------
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 36 ++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 10 ++++++++
 3 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 74109d0..9de73ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -373,8 +373,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 	/* we do not control the vmid in DIQ mode, just a place holder */
 	unsigned int vmid = 0;
 
-	struct kfd_mem_obj *mem_obj;
 	uint32_t *packet_buff_uint = NULL;
+	uint64_t packet_buff_gpu_addr = 0;
 
 	struct pm4__set_config_reg *packets_vec = NULL;
 
@@ -398,13 +398,13 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 			break;
 		}
 
-		status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+		status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
+				ib_size/sizeof(uint32_t),
+				&packet_buff_uint, &packet_buff_gpu_addr);
 
 		if (status != 0)
 			break;
 
-		packet_buff_uint = mem_obj->cpu_ptr;
-
 		memset(packet_buff_uint, 0, ib_size);
 
 		packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
@@ -499,7 +499,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 			status = dbgdev_diq_submit_ib(
 						dbgdev,
 						adw_info->process->pasid,
-						mem_obj->gpu_addr,
+						packet_buff_gpu_addr,
 						packet_buff_uint,
 						ib_size, true);
 
@@ -511,8 +511,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 		}
 
 	} while (false);
-	if (packet_buff_uint != NULL)
-		kfd_gtt_sa_free(dbgdev->dev, mem_obj);
 
 	return status;
 
@@ -632,8 +630,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	int status = 0;
 	union SQ_CMD_BITS reg_sq_cmd;
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
-	struct kfd_mem_obj *mem_obj;
 	uint32_t *packet_buff_uint = NULL;
+	uint64_t packet_buff_gpu_addr = 0;
 	struct pm4__set_config_reg *packets_vec = NULL;
 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
 
@@ -674,13 +672,13 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 
 		pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
 
-		status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+		status = dbgdev->kq->ops.acquire_inline_ib(dbgdev->kq,
+				ib_size / sizeof(uint32_t),
+				&packet_buff_uint, &packet_buff_gpu_addr);
 
 		if (status != 0)
 			break;
 
-		packet_buff_uint = mem_obj->cpu_ptr;
-
 		memset(packet_buff_uint, 0, ib_size);
 
 		packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
@@ -715,7 +713,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 		status = dbgdev_diq_submit_ib(
 				dbgdev,
 				wac_info->process->pasid,
-				mem_obj->gpu_addr,
+				packet_buff_gpu_addr,
 				packet_buff_uint,
 				ib_size, false);
 
@@ -724,9 +722,6 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 
 	} while (false);
 
-	if (packet_buff_uint != NULL)
-		kfd_gtt_sa_free(dbgdev->dev, mem_obj);
-
 	return status;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 9eaa040..162a83f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -219,7 +219,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	 * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
 	 */
 	rptr = *kq->rptr_kernel;
-	wptr = *kq->wptr_kernel;
+	wptr = kq->pending_wptr;
 	queue_address = (unsigned int *)kq->pq_kernel_addr;
 	queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
 
@@ -258,6 +258,39 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	return 0;
 }
 
+static int acquire_inline_ib(struct kernel_queue *kq,
+			     size_t size_in_dwords,
+			     unsigned int **buffer_ptr,
+			     uint64_t *gpu_addr)
+{
+	int ret;
+	unsigned int *buf;
+	union PM4_MES_TYPE_3_HEADER nop;
+
+	if (size_in_dwords >= (1 << 14))
+		return -EINVAL;
+
+	/* Allocate size_in_dwords on the ring, plus an extra dword
+	 * for a NOP packet header
+	 */
+	ret = acquire_packet_buffer(kq, size_in_dwords + 1,  &buf);
+	if (ret)
+		return ret;
+
+	/* Build a NOP packet that contains the IB as "payload". */
+	nop.u32all = 0;
+	nop.opcode = IT_NOP;
+	nop.count = size_in_dwords - 1;
+	nop.type = PM4_TYPE_3;
+
+	*buf = nop.u32all;
+	*buffer_ptr = buf + 1;
+	*gpu_addr = kq->pq_gpu_addr + ((unsigned long)*buffer_ptr -
+				       (unsigned long)kq->pq_kernel_addr);
+
+	return 0;
+}
+
 static void submit_packet(struct kernel_queue *kq)
 {
 #ifdef DEBUG
@@ -300,6 +333,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 	kq->ops.initialize = initialize;
 	kq->ops.uninitialize = uninitialize;
 	kq->ops.acquire_packet_buffer = acquire_packet_buffer;
+	kq->ops.acquire_inline_ib = acquire_inline_ib;
 	kq->ops.submit_packet = submit_packet;
 	kq->ops.rollback_packet = rollback_packet;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 5940531..a217f42 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -42,6 +42,12 @@
  * pending write pointer to that location so subsequent calls to
  * acquire_packet_buffer will get a correct write pointer
  *
+ * @acquire_inline_ib: Returns a pointer to the location in the kernel
+ * queue ring buffer where the calling function can write an inline IB. It is
+ * Guaranteed that there is enough space for that IB. It also updates the
+ * pending write pointer to that location so subsequent calls to
+ * acquire_packet_buffer will get a correct write pointer
+ *
  * @submit_packet: Update the write pointer and doorbell of a kernel queue.
  *
  * @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
@@ -59,6 +65,10 @@ struct kernel_queue_ops {
 	int	(*acquire_packet_buffer)(struct kernel_queue *kq,
 					size_t packet_size_in_dwords,
 					unsigned int **buffer_ptr);
+	int	(*acquire_inline_ib)(struct kernel_queue *kq,
+				     size_t packet_size_in_dwords,
+				     unsigned int **buffer_ptr,
+				     uint64_t *gpu_addr);
 
 	void	(*submit_packet)(struct kernel_queue *kq);
 	void	(*rollback_packet)(struct kernel_queue *kq);
-- 
2.7.4