diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch new file mode 100644 index 00000000..f48e2adc --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3311-drm-amdkfd-Workaround-SQC-store-failure-in-gfx9-trap.patch @@ -0,0 +1,155 @@ +From 1892e4b7e885e2db1cdf38156bda0402e2a6a166 Mon Sep 17 00:00:00 2001 +From: Jay Cornwall <Jay.Cornwall@amd.com> +Date: Wed, 31 Jan 2018 09:24:37 -0600 +Subject: [PATCH 3311/4131] drm/amdkfd: Workaround SQC store failure in gfx9 + trap handler + +SQC stores may intermittently write incorrect data under concurrency +when module parameter noretry=1. This can cause failed context +save/restore cycles as the wavefront state is saved incorrectly. + +Within each wavefront wait for SQC store acknowledgment before +issuing another. + +Change-Id: Ie2ba2bff1c9b0257632c617145b133fe3006e301 +Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> +--- + .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 73 ++++++++++++++-------- + 1 file changed, 48 insertions(+), 25 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +index 81d7069..f9e819b 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +@@ -90,6 +90,7 @@ var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code + var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write + var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //becasue TC EMU curently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes + var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing ++var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency + + /**************************************************************************/ + /* variables */ +@@ -1089,6 +1090,9 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo + end +@@ -1098,9 +1102,21 @@ end + function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc + end +@@ -1145,7 +1161,7 @@ end + #endif + + static const uint32_t cwsr_trap_gfx9_hex[] = { +- 0xbf820001, 0xbf820128, ++ 0xbf820001, 0xbf820136, + 0xb8f0f802, 0x89708670, + 0xb8f1f803, 0x8674ff71, + 0x00000400, 0xbf850021, +@@ -1196,35 +1212,40 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc007a, 0xc0611efa, +- 0x0000007c, 0x807a847a, +- 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611b3a, +- 0x0000007c, 0x807a847a, ++ 0x0000007c, 0xbf8cc07f, ++ 0x807a847a, 0xbefc007e, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611b3a, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611b7a, +- 0x0000007c, 0x807a847a, +- 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611bba, +- 0x0000007c, 0x807a847a, ++ 0x0000007c, 0xbf8cc07f, ++ 0x807a847a, 0xbefc007e, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611bba, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611bfa, +- 0x0000007c, 0x807a847a, +- 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611c3a, +- 0x0000007c, 0x807a847a, ++ 0x0000007c, 0xbf8cc07f, ++ 0x807a847a, 0xbefc007e, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611c3a, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8f1f803, + 0xbefe007c, 0xbefc007a, + 0xc0611c7a, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611cba, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc007a, 0xc0611cba, ++ 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611cfa, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xb8fbf801, 0xbefe007c, +- 0xbefc007a, 0xc0611efa, +- 0x0000007c, 0x807a847a, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0xb8fbf801, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611efa, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0x8676ff7f, + 0x04000000, 0xbeef0080, + 0x876f6f76, 0xb8fa2a05, +@@ -1239,12 +1260,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, +- 0x00000000, 0xc06b013a, +- 0x00000010, 0xc06b023a, +- 0x00000020, 0xc06b033a, +- 0x00000030, 0x8074c074, ++ 0x00000000, 0xbf8cc07f, ++ 0xc06b013a, 0x00000010, ++ 0xbf8cc07f, 0xc06b023a, ++ 0x00000020, 0xbf8cc07f, ++ 0xc06b033a, 0x00000030, ++ 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, +- 0xbf0a717c, 0xbf85ffeb, ++ 0xbf0a717c, 0xbf85ffe7, + 0xbef40172, 0xbefa0080, + 0xbefe00c1, 0xbeff00c1, + 0xbef600ff, 0x01000000, +-- +2.7.4 + |