aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1200-drm-amdkfd-Enable-user-trap-handler-on-non-CWSR-ASIC.patch
blob: 389c2c9a411900e64c6c90c76bd006d0a047b8d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
From 17dd5f8421374886d216a736f68211f68db08a02 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Thu, 6 Oct 2016 20:01:20 -0500
Subject: [PATCH 1200/4131] drm/amdkfd: Enable user trap handler on non-CWSR
 ASICs

AMDKFD_IOC_SET_TRAP_HANDLER requires a context-save trap handler
to be installed, from which a jump to the user-provided trap handler
can be initiated. We would like to have this feature on ASICs which
do not support context-save/restore.

When a context-save handler is not installed configure the trap handler
registers to point directly to the user trap handler.

Also remove the legacy microcode version check since the KFD relies on
functionality present only in newer versions.

Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c           |  5 -----
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            | 25 +++++++++++++---------
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 16 +++++++++++---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c    | 17 +++++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  1 +
 5 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a828572..61729eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -519,11 +519,6 @@ static int kfd_ioctl_set_trap_handler(struct file *filep,
 		err = -ESRCH;
 		goto out;
 	}
-	if (!dev->cwsr_enabled || !pdd->qpd.cwsr_kaddr) {
-		pr_err("kfd: CWSR is not enabled, can't set trap handler.\n");
-		err = -EINVAL;
-		goto out;
-	}
 
 	if (dev->dqm->ops.set_trap_handler(dev->dqm,
 					&pdd->qpd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index ba48aca..6bab9db 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -43,7 +43,8 @@ static const struct kfd_device_info kaveri_device_info = {
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
-	.is_need_iommu_device = true
+	.is_need_iommu_device = true,
+	.supports_cwsr = false,
 };
 
 static const struct kfd_device_info hawaii_device_info = {
@@ -55,7 +56,8 @@ static const struct kfd_device_info hawaii_device_info = {
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
-	.is_need_iommu_device = false
+	.is_need_iommu_device = false,
+	.supports_cwsr = false,
 };
 
 static const struct kfd_device_info carrizo_device_info = {
@@ -67,7 +69,8 @@ static const struct kfd_device_info carrizo_device_info = {
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
-	.is_need_iommu_device = true
+	.is_need_iommu_device = true,
+	.supports_cwsr = true,
 };
 
 static const struct kfd_device_info tonga_device_info = {
@@ -78,7 +81,8 @@ static const struct kfd_device_info tonga_device_info = {
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
-	.is_need_iommu_device = false
+	.is_need_iommu_device = false,
+	.supports_cwsr = false,
 };
 
 static const struct kfd_device_info fiji_device_info = {
@@ -89,7 +93,8 @@ static const struct kfd_device_info fiji_device_info = {
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
-	.is_need_iommu_device = false
+	.is_need_iommu_device = false,
+	.supports_cwsr = true,
 };
 
 static const struct kfd_device_info polaris10_device_info = {
@@ -100,7 +105,8 @@ static const struct kfd_device_info polaris10_device_info = {
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
-	.is_need_iommu_device = false
+	.is_need_iommu_device = false,
+	.supports_cwsr = true,
 };
 
 static const struct kfd_device_info polaris11_device_info = {
@@ -111,7 +117,8 @@ static const struct kfd_device_info polaris11_device_info = {
 	.event_interrupt_class = &event_interrupt_class_cik,
 	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
-	.is_need_iommu_device = false
+	.is_need_iommu_device = false,
+	.supports_cwsr = true,
 };
 
 struct kfd_deviceid {
@@ -321,10 +328,8 @@ static int kfd_cwsr_init(struct kfd_dev *kfd)
 {
 	/*
 	 * Initialize the CWSR required memory for TBA and TMA
-	 * only support CWSR on VI and up with FW version >=625.
 	 */
-	if (cwsr_enable &&
-		(kfd->mec_fw_version >= KFD_CWSR_CZ_FW_VER)) {
+	if (cwsr_enable && kfd->device_info->supports_cwsr) {
 		void *cwsr_addr = NULL;
 		unsigned int size = sizeof(cwsr_trap_carrizo_hex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 3bc831f..349b8c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -210,6 +210,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 					    q->properties.queue_percent > 0 &&
 					    q->properties.queue_address != 0);
 
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
+
 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
@@ -1333,9 +1336,16 @@ static int set_trap_handler(struct device_queue_manager *dqm,
 {
 	uint64_t *tma;
 
-	tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset);
-	tma[0] = tba_addr;
-	tma[1] = tma_addr;
+	if (dqm->dev->cwsr_enabled) {
+		/* Jump from CWSR trap handler to user trap */
+		tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset);
+		tma[0] = tba_addr;
+		tma[1] = tma_addr;
+	} else {
+		qpd->tba_addr = tba_addr;
+		qpd->tma_addr = tma_addr;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 579cc68..b0ea0d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -142,10 +142,17 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 		m->cp_hqd_iq_rptr = 1;
 
 	if (q->tba_addr) {
-		m->cp_hqd_persistent_state |=
-			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+		m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
+		m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
+		m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
+		m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
 		m->compute_pgm_rsrc2 |=
 			(1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+	}
+
+	if (mm->dev->cwsr_enabled) {
+		m->cp_hqd_persistent_state |=
+			(1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
 		m->cp_hqd_ctx_save_base_addr_lo =
 			lower_32_bits(q->ctx_save_restore_area_address);
 		m->cp_hqd_ctx_save_base_addr_hi =
@@ -154,10 +161,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 		m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
 		m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
 		m->cp_hqd_wg_state_offset = q->ctl_stack_size;
-		m->compute_tba_lo = lower_32_bits(q->tba_addr >> 8);
-		m->compute_tba_hi = upper_32_bits(q->tba_addr >> 8);
-		m->compute_tma_lo = lower_32_bits(q->tma_addr >> 8);
-		m->compute_tma_hi = upper_32_bits(q->tma_addr >> 8);
 	}
 
 	*mqd = m;
@@ -238,7 +241,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
 				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
 	}
-	if (q->tba_addr)
+	if (mm->dev->cwsr_enabled)
 		m->cp_hqd_ctx_save_control =
 			atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
 			mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ed7f9bc..c02bfa0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -188,6 +188,7 @@ struct kfd_device_info {
 	uint8_t num_of_watch_points;
 	uint16_t mqd_size_aligned;
 	bool is_need_iommu_device;
+	bool supports_cwsr;
 };
 
 struct kfd_mem_obj {
-- 
2.7.4