aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch155
1 files changed, 155 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch
new file mode 100644
index 00000000..f48e2adc
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch
@@ -0,0 +1,155 @@
+From 1892e4b7e885e2db1cdf38156bda0402e2a6a166 Mon Sep 17 00:00:00 2001
+From: Jay Cornwall <Jay.Cornwall@amd.com>
+Date: Wed, 31 Jan 2018 09:24:37 -0600
+Subject: [PATCH 3311/4131] drm/amdkfd: Workaround SQC store failure in gfx9
+ trap handler
+
+SQC stores may intermittently write incorrect data under concurrency
+when module parameter noretry=1. This can cause failed context
+save/restore cycles as the wavefront state is saved incorrectly.
+
+Within each wavefront wait for SQC store acknowledgment before
+issuing another.
+
+Change-Id: Ie2ba2bff1c9b0257632c617145b133fe3006e301
+Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
+---
+ .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 73 ++++++++++++++--------
+ 1 file changed, 48 insertions(+), 25 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+index 81d7069..f9e819b 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+@@ -90,6 +90,7 @@ var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code
+ var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
+ var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //becasue TC EMU curently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+ var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
++var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
+
+ /**************************************************************************/
+ /* variables */
+@@ -1089,6 +1090,9 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+ s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
+ s_mov_b32 m0, s_mem_offset
+ s_buffer_store_dword s, s_rsrc, m0 glc:1
++if ACK_SQC_STORE
++ s_waitcnt lgkmcnt(0)
++end
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+ s_mov_b32 m0, exec_lo
+ end
+@@ -1098,9 +1102,21 @@ end
+ function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
++if ACK_SQC_STORE
++ s_waitcnt lgkmcnt(0)
++end
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
++if ACK_SQC_STORE
++ s_waitcnt lgkmcnt(0)
++end
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
++if ACK_SQC_STORE
++ s_waitcnt lgkmcnt(0)
++end
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
++if ACK_SQC_STORE
++ s_waitcnt lgkmcnt(0)
++end
+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc
+ end
+@@ -1145,7 +1161,7 @@ end
+ #endif
+
+ static const uint32_t cwsr_trap_gfx9_hex[] = {
+- 0xbf820001, 0xbf820128,
++ 0xbf820001, 0xbf820136,
+ 0xb8f0f802, 0x89708670,
+ 0xb8f1f803, 0x8674ff71,
+ 0x00000400, 0xbf850021,
+@@ -1196,35 +1212,40 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc007a, 0xc0611efa,
+- 0x0000007c, 0x807a847a,
+- 0xbefc007e, 0xbefe007c,
+- 0xbefc007a, 0xc0611b3a,
+- 0x0000007c, 0x807a847a,
++ 0x0000007c, 0xbf8cc07f,
++ 0x807a847a, 0xbefc007e,
++ 0xbefe007c, 0xbefc007a,
++ 0xc0611b3a, 0x0000007c,
++ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611b7a,
+- 0x0000007c, 0x807a847a,
+- 0xbefc007e, 0xbefe007c,
+- 0xbefc007a, 0xc0611bba,
+- 0x0000007c, 0x807a847a,
++ 0x0000007c, 0xbf8cc07f,
++ 0x807a847a, 0xbefc007e,
++ 0xbefe007c, 0xbefc007a,
++ 0xc0611bba, 0x0000007c,
++ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611bfa,
+- 0x0000007c, 0x807a847a,
+- 0xbefc007e, 0xbefe007c,
+- 0xbefc007a, 0xc0611c3a,
+- 0x0000007c, 0x807a847a,
++ 0x0000007c, 0xbf8cc07f,
++ 0x807a847a, 0xbefc007e,
++ 0xbefe007c, 0xbefc007a,
++ 0xc0611c3a, 0x0000007c,
++ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xb8f1f803,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611c7a, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xbefe007c, 0xbefc007a,
+- 0xc0611cba, 0x0000007c,
++ 0xbf8cc07f, 0x807a847a,
++ 0xbefc007e, 0xbefe007c,
++ 0xbefc007a, 0xc0611cba,
++ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611cfa, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xb8fbf801, 0xbefe007c,
+- 0xbefc007a, 0xc0611efa,
+- 0x0000007c, 0x807a847a,
++ 0xbf8cc07f, 0x807a847a,
++ 0xbefc007e, 0xb8fbf801,
++ 0xbefe007c, 0xbefc007a,
++ 0xc0611efa, 0x0000007c,
++ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0x8676ff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f76, 0xb8fa2a05,
+@@ -1239,12 +1260,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+- 0x00000000, 0xc06b013a,
+- 0x00000010, 0xc06b023a,
+- 0x00000020, 0xc06b033a,
+- 0x00000030, 0x8074c074,
++ 0x00000000, 0xbf8cc07f,
++ 0xc06b013a, 0x00000010,
++ 0xbf8cc07f, 0xc06b023a,
++ 0x00000020, 0xbf8cc07f,
++ 0xc06b033a, 0x00000030,
++ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+- 0xbf0a717c, 0xbf85ffeb,
++ 0xbf0a717c, 0xbf85ffe7,
+ 0xbef40172, 0xbefa0080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbef600ff, 0x01000000,
+--
+2.7.4
+