aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1532-drm-amdgpu-Synchronize-KFD-HQD-load-protocol-with-CP.patch
blob: faa9348c31ae28b0ea71eebf25ab11419984bfce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
From f038f14a4d4a6d2c719de9600ba9371f19a51412 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Thu, 6 Oct 2016 19:48:22 -0500
Subject: [PATCH 1532/4131] drm/amdgpu: Synchronize KFD HQD load protocol with
 CP scheduler

The non-CP scheduling path should write to the entire HQD without
assuming that some registers do not need to be initialized. These
assumptions are otherwise challenging to verify.

On the Gfx8 path this fixes AQL support due to a missing write to
CP_HQD_IQ_RPTR. Additionally, implement a workaround for an errata
concerning the EOP queue on Tonga ASICs to fix hangs when submitting
commands to KFD queues.

Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>

 Conflicts:
	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

Change-Id: Ia3dae4001fde5d8d093ad460ebbd31899a7329c8
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 54 ++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 68 +++++++++--------------
 2 files changed, 37 insertions(+), 85 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 62270ca..3b812a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -382,26 +382,18 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct cik_mqd *m;
+	uint32_t *mqd_hqd;
+        uint32_t reg;
 
 	m = get_mqd(mqd);
 
-
 	acquire_queue(kgd, pipe_id, queue_id);
 
-        WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-        WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-        WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-
-        WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-        WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-        WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-        WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
+        /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
+        mqd_hqd = &m->cp_mqd_base_addr_lo;
 
-        WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-        WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, m->cp_hqd_pq_rptr_report_addr_hi);
-        WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
-
-        WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
+       for (reg = mmCP_HQD_VMID; reg <= mmCP_MQD_CONTROL; reg++)
+               WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 
         if (wptr) {
                 /* Don't read wptr with get_user because the user
@@ -419,34 +411,12 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
                        (uint32_t)((uint64_t)wptr >> 32));
                 WREG32(mmCP_PQ_WPTR_POLL_CNTL1,
                       get_queue_mask(pipe_id, queue_id));
-        } else
-                WREG32(mmCP_HQD_PQ_WPTR, 0);
-
-
-        WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-        WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
-        WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
-
-        WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
-
-        WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-        WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
-        WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
-
-        WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
-        WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
-        WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
-        WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
-
-        WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-
-        WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-        WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-
-        WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
-
-        WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
-	release_queue(kgd);
+	}
+	
+       /* Write CP_HQD_ACTIVE last. */
+        for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++)
+                WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+ 	release_queue(kgd);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 4db9637..aed08fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -376,24 +376,32 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct vi_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg;
 
 	m = get_mqd(mqd);
 
 	acquire_queue(kgd, pipe_id, queue_id);
 
-	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+	for (reg = mmCP_HQD_VMID; reg <= mmCP_HQD_EOP_CONTROL; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 
-	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-			m->cp_hqd_pq_rptr_report_addr_hi);
-	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-	WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
+	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+	 * This is safe since EOP RPTR==WPTR for any inactive HQD
+	 * on ASICs that do not support context-save.
+	 * EOP writes/reads can start anywhere in the ring.
+	 */
+	if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
+		WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+		WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+		WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+	}
+
+	for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 
 	if (wptr) {
 		/* Don't read wptr with get_user because the user
@@ -411,37 +419,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		       (uint32_t)((uint64_t)wptr >> 32));
 		WREG32(mmCP_PQ_WPTR_POLL_CNTL1,
 		       get_queue_mask(pipe_id, queue_id));
-	} else
-		WREG32(mmCP_HQD_PQ_WPTR, 0);
-
-	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-
-	WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
-	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
-	WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
-	WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
-	WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
-	WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
-
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
-	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
-	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
-	WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
-	WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
-	WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
-
-	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-
-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
-	WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
-	WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
-	WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
-
-	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+	}
+
+	/* Write CP_HQD_ACTIVE last. */
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_ACTIVE; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
 
 	release_queue(kgd);
 
-- 
2.7.4