aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch
blob: f48e2adcd730a87335e7d30d1542a8bab4437cc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
From 1892e4b7e885e2db1cdf38156bda0402e2a6a166 Mon Sep 17 00:00:00 2001
From: Jay Cornwall <Jay.Cornwall@amd.com>
Date: Wed, 31 Jan 2018 09:24:37 -0600
Subject: [PATCH 3311/4131] drm/amdkfd: Workaround SQC store failure in gfx9
 trap handler

SQC stores may intermittently write incorrect data under concurrency
when module parameter noretry=1. This can cause failed context
save/restore cycles as the wavefront state is saved incorrectly.

Within each wavefront wait for SQC store acknowledgment before
issuing another.

Change-Id: Ie2ba2bff1c9b0257632c617145b133fe3006e301
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
---
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm  | 73 ++++++++++++++--------
 1 file changed, 48 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
index 81d7069..f9e819b 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -90,6 +90,7 @@ var SIM_RUN_HACK		    =	0		    //any hack that needs to be made to run this code
 var SGPR_SAVE_USE_SQC		    =	1		    //use SQC D$ to do the write
 var USE_MTBUF_INSTEAD_OF_MUBUF	    =	0		    //becasue TC EMU curently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
 var SWIZZLE_EN			    =	0		    //whether we use swizzled buffer addressing
+var ACK_SQC_STORE		    =	1		    //workaround for suspected SQC store bug causing incorrect stores under concurrency
 
 /**************************************************************************/
 /*			variables					  */
@@ -1089,6 +1090,9 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
 	s_mov_b32 exec_lo, m0			//assuming exec_lo is not needed anymore from this point on
 	s_mov_b32 m0, s_mem_offset
 	s_buffer_store_dword s, s_rsrc, m0	glc:1
+if ACK_SQC_STORE
+	s_waitcnt lgkmcnt(0)
+end
 	s_add_u32	s_mem_offset, s_mem_offset, 4
 	s_mov_b32   m0, exec_lo
 end
@@ -1098,9 +1102,21 @@ end
 function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
 
 	s_buffer_store_dwordx4 s[0], s_rsrc, 0	glc:1
+if ACK_SQC_STORE
+	s_waitcnt lgkmcnt(0)
+end
 	s_buffer_store_dwordx4 s[4], s_rsrc, 16	 glc:1
+if ACK_SQC_STORE
+	s_waitcnt lgkmcnt(0)
+end
 	s_buffer_store_dwordx4 s[8], s_rsrc, 32	 glc:1
+if ACK_SQC_STORE
+	s_waitcnt lgkmcnt(0)
+end
 	s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+if ACK_SQC_STORE
+	s_waitcnt lgkmcnt(0)
+end
 	s_add_u32	s_rsrc[0], s_rsrc[0], 4*16
 	s_addc_u32	s_rsrc[1], s_rsrc[1], 0x0	      // +scc
 end
@@ -1145,7 +1161,7 @@ end
 #endif
 
 static const uint32_t cwsr_trap_gfx9_hex[] = {
-	0xbf820001, 0xbf820128,
+	0xbf820001, 0xbf820136,
 	0xb8f0f802, 0x89708670,
 	0xb8f1f803, 0x8674ff71,
 	0x00000400, 0xbf850021,
@@ -1196,35 +1212,40 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0xbef60084, 0xbef600ff,
 	0x01000000, 0xbefe007c,
 	0xbefc007a, 0xc0611efa,
-	0x0000007c, 0x807a847a,
-	0xbefc007e, 0xbefe007c,
-	0xbefc007a, 0xc0611b3a,
-	0x0000007c, 0x807a847a,
+	0x0000007c, 0xbf8cc07f,
+	0x807a847a, 0xbefc007e,
+	0xbefe007c, 0xbefc007a,
+	0xc0611b3a, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
 	0xbefc007e, 0xbefe007c,
 	0xbefc007a, 0xc0611b7a,
-	0x0000007c, 0x807a847a,
-	0xbefc007e, 0xbefe007c,
-	0xbefc007a, 0xc0611bba,
-	0x0000007c, 0x807a847a,
+	0x0000007c, 0xbf8cc07f,
+	0x807a847a, 0xbefc007e,
+	0xbefe007c, 0xbefc007a,
+	0xc0611bba, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
 	0xbefc007e, 0xbefe007c,
 	0xbefc007a, 0xc0611bfa,
-	0x0000007c, 0x807a847a,
-	0xbefc007e, 0xbefe007c,
-	0xbefc007a, 0xc0611c3a,
-	0x0000007c, 0x807a847a,
+	0x0000007c, 0xbf8cc07f,
+	0x807a847a, 0xbefc007e,
+	0xbefe007c, 0xbefc007a,
+	0xc0611c3a, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
 	0xbefc007e, 0xb8f1f803,
 	0xbefe007c, 0xbefc007a,
 	0xc0611c7a, 0x0000007c,
-	0x807a847a, 0xbefc007e,
-	0xbefe007c, 0xbefc007a,
-	0xc0611cba, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0xbefe007c,
+	0xbefc007a, 0xc0611cba,
+	0x0000007c, 0xbf8cc07f,
 	0x807a847a, 0xbefc007e,
 	0xbefe007c, 0xbefc007a,
 	0xc0611cfa, 0x0000007c,
-	0x807a847a, 0xbefc007e,
-	0xb8fbf801, 0xbefe007c,
-	0xbefc007a, 0xc0611efa,
-	0x0000007c, 0x807a847a,
+	0xbf8cc07f, 0x807a847a,
+	0xbefc007e, 0xb8fbf801,
+	0xbefe007c, 0xbefc007a,
+	0xc0611efa, 0x0000007c,
+	0xbf8cc07f, 0x807a847a,
 	0xbefc007e, 0x8676ff7f,
 	0x04000000, 0xbeef0080,
 	0x876f6f76, 0xb8fa2a05,
@@ -1239,12 +1260,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
 	0xbe862b06, 0xbe882b08,
 	0xbe8a2b0a, 0xbe8c2b0c,
 	0xbe8e2b0e, 0xc06b003a,
-	0x00000000, 0xc06b013a,
-	0x00000010, 0xc06b023a,
-	0x00000020, 0xc06b033a,
-	0x00000030, 0x8074c074,
+	0x00000000, 0xbf8cc07f,
+	0xc06b013a, 0x00000010,
+	0xbf8cc07f, 0xc06b023a,
+	0x00000020, 0xbf8cc07f,
+	0xc06b033a, 0x00000030,
+	0xbf8cc07f, 0x8074c074,
 	0x82758075, 0x807c907c,
-	0xbf0a717c, 0xbf85ffeb,
+	0xbf0a717c, 0xbf85ffe7,
 	0xbef40172, 0xbefa0080,
 	0xbefe00c1, 0xbeff00c1,
 	0xbef600ff, 0x01000000,
-- 
2.7.4