aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1149-drm-amdkfd-Update-CWSR-trap-handler-for-stability.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1149-drm-amdkfd-Update-CWSR-trap-handler-for-stability.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1149-drm-amdkfd-Update-CWSR-trap-handler-for-stability.patch262
1 files changed, 262 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1149-drm-amdkfd-Update-CWSR-trap-handler-for-stability.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1149-drm-amdkfd-Update-CWSR-trap-handler-for-stability.patch
new file mode 100644
index 00000000..6c1c6f18
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1149-drm-amdkfd-Update-CWSR-trap-handler-for-stability.patch
@@ -0,0 +1,262 @@
+From ebf92b2a969cc6c77c94245f511c6defd38f7507 Mon Sep 17 00:00:00 2001
+From: shaoyunl <Shaoyun.Liu@amd.com>
+Date: Thu, 9 Jun 2016 17:07:49 -0400
+Subject: [PATCH 1149/4131] drm/amdkfd: Update CWSR trap handler for stability
+
+The context save handler should be updated with this perforce commit:
+//gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler_perf.sp3
+2386036 - DVFIX: pick up fixes from gfx9.
+ 1. mask off priority been saved.
+ 2. WT_EVICT for SQ L1 cache.
+
+This addresses two issues:
+
+ Context save data may be lost in SQC due to missing writeback
+ Wavefront priority after context restore is incorrect
+
+Change-Id: I0db837ecc86c2ee485f7f646df22726fe6af8fd3
+Signed-off-by: shaoyunl <Shaoyun.Liu@amd.com>
+---
+ .../gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h | 115 +++++++++------------
+ 1 file changed, 48 insertions(+), 67 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
+index 1880dc0..e8a9534 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
+@@ -21,7 +21,8 @@
+ */
+
+ #if 0
+- HW (CARRIZO) source code for CWSR trap handler
++HW (VI) source code for CWSR trap handler
++#Version 8 + multiple trap handler
+
+ var G8SR_WDMEM_HWREG_OFFSET = 0
+ var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes
+@@ -69,6 +70,7 @@ var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
+ /**************************************************************************/
+ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
+ var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
++var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00800006
+
+ var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+ var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+@@ -205,6 +207,7 @@ shader main
+ L_SKIP_RESTORE:
+
+ s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
++ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
+ s_cbranch_scc1 L_SAVE //this is the operation for save
+@@ -375,9 +378,6 @@ end
+ write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC
+ write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+- // Save the tma_lo and tma_hi content from exec_lo and ttmp5
+- s_mov_b32 s_save_exec_lo, exec_lo
+- s_mov_b32 s_save_exec_hi, ttmp5
+ write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS
+
+ //s_save_trapsts conflicts with s_save_alloc_size
+@@ -392,8 +392,8 @@ end
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset) //TBA_LO
+ write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset) //TBA_HI
+- write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //TMA_LO
+- write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) //TMA_HI
++
++
+
+ /* the first wave in the threadgroup */
+ // save fist_wave bits in tba_hi unused bit.26
+@@ -996,9 +996,6 @@ end
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+- read_hwreg_from_mem(tma_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //tma_lo
+- read_hwreg_from_mem(tma_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //tma_hi
+- s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+@@ -1052,26 +1049,19 @@ end
+ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+ s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
+ s_mov_b32 m0, s_mem_offset
+- s_buffer_store_dword s, s_rsrc, m0 glc:0
++ s_buffer_store_dword s, s_rsrc, m0 glc:1
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+ s_mov_b32 m0, exec_lo
+ end
+
+-//Only for save hwreg to mem
+-function write_tma_to_mem(s, s_rsrc, offset_imm)
+- s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
+- s_mov_b32 m0, offset_imm
+- s_buffer_store_dword s, s_rsrc, m0 glc:0
+- s_mov_b32 m0, exec_lo
+-end
+
+ // HWREG are saved before SGPRs, so all HWREG could be use.
+ function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+- s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:0
+- s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:0
+- s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:0
+- s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:0
++ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
++ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
++ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
++ s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc
+ end
+@@ -1109,13 +1099,13 @@ end
+
+ function get_hwreg_size_bytes
+ return 128 //HWREG size 128 bytes
+-end
+
+ #endif
+
+ static const uint32_t cwsr_trap_carrizo_hex[] = {
+- 0xbf820001, 0xbf820131,
+- 0xb8f4f802, 0xb8f5f803,
++ 0xbf820001, 0xbf820125,
++ 0xb8f4f802, 0x8974ff74,
++ 0x00800006, 0xb8f5f803,
+ 0x8675ff75, 0x00000400,
+ 0xbf850013, 0xc00a1e37,
+ 0x00000000, 0xbf8c007f,
+@@ -1157,48 +1147,41 @@ static const uint32_t cwsr_trap_carrizo_hex[] = {
+ 0x806e7a6e, 0xbefa0084,
+ 0xbefa00ff, 0x01000000,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601bfc, 0x0000007c,
++ 0xc0611bfc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601c3c, 0x0000007c,
++ 0xc0611c3c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601c7c, 0x0000007c,
++ 0xc0611c7c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601cbc, 0x0000007c,
++ 0xc0611cbc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601cfc, 0x0000007c,
++ 0xc0611cfc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+- 0xbef2007e, 0xbef30075,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601d3c, 0x0000007c,
++ 0xc0611d3c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xb8f5f803, 0xbefe007c,
+- 0xbefc006e, 0xc0601d7c,
++ 0xbefc006e, 0xc0611d7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+- 0xbefc006e, 0xc0601dbc,
++ 0xbefc006e, 0xc0611dbc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+- 0xbefc006e, 0xc0601dfc,
++ 0xbefc006e, 0xc0611dfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xb8eff801,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601bfc, 0x0000007c,
+- 0x806e846e, 0xbefc007e,
+- 0xbefe007c, 0xbefc006e,
+- 0xc0601b3c, 0x0000007c,
++ 0xc0611bfc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601b7c, 0x0000007c,
++ 0xc0611b3c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+- 0xc0601cbc, 0x0000007c,
+- 0x806e846e, 0xbefc007e,
+- 0xbefe007c, 0xbefc006e,
+- 0xc0601cfc, 0x0000007c,
++ 0xc0611b7c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0x867aff7f, 0x04000000,
+ 0xbef30080, 0x8773737a,
+@@ -1212,10 +1195,10 @@ static const uint32_t cwsr_trap_carrizo_hex[] = {
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+- 0xc06a003c, 0x00000000,
+- 0xc06a013c, 0x00000010,
+- 0xc06a023c, 0x00000020,
+- 0xc06a033c, 0x00000030,
++ 0xc06b003c, 0x00000000,
++ 0xc06b013c, 0x00000010,
++ 0xc06b023c, 0x00000020,
++ 0xc06b033c, 0x00000030,
+ 0x8078c078, 0x82798079,
+ 0x807c907c, 0xbf0a757c,
+ 0xbf85ffeb, 0xbef80176,
+@@ -1267,7 +1250,7 @@ static const uint32_t cwsr_trap_carrizo_hex[] = {
+ 0x807c847c, 0x806eff6e,
+ 0x00000400, 0xbf0a757c,
+ 0xbf85ffef, 0xbf9c0000,
+- 0xbf8200d1, 0xbef8007e,
++ 0xbf8200ca, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+@@ -1354,24 +1337,22 @@ static const uint32_t cwsr_trap_carrizo_hex[] = {
+ 0x80728472, 0xbf8c007f,
+ 0x8671ff71, 0x0000ffff,
+ 0xbefc0073, 0xbefe006e,
+- 0xbeff006f, 0xc0211bbc,
+- 0x00000072, 0x80728472,
+- 0xc0211bfc, 0x00000072,
+- 0x80728472, 0xbf8c007f,
+- 0x867375ff, 0x000003ff,
+- 0xb9734803, 0x867375ff,
+- 0xfffff800, 0x8f738b73,
+- 0xb973a2c3, 0xb977f801,
+- 0x8673ff71, 0xf0000000,
+- 0x8f739c73, 0x8e739073,
+- 0xbef60080, 0x87767376,
+- 0x8673ff71, 0x08000000,
+- 0x8f739b73, 0x8e738f73,
+- 0x87767376, 0x8673ff74,
+- 0x00800000, 0x8f739773,
+- 0xb976f807, 0x86fe7e7e,
+- 0x86ea6a6a, 0xb974f802,
+- 0xbf8a0000, 0x95807370,
+- 0xbf810000, 0x00000000,
++ 0xbeff006f, 0x867375ff,
++ 0x000003ff, 0xb9734803,
++ 0x867375ff, 0xfffff800,
++ 0x8f738b73, 0xb973a2c3,
++ 0xb977f801, 0x8673ff71,
++ 0xf0000000, 0x8f739c73,
++ 0x8e739073, 0xbef60080,
++ 0x87767376, 0x8673ff71,
++ 0x08000000, 0x8f739b73,
++ 0x8e738f73, 0x87767376,
++ 0x8673ff74, 0x00800000,
++ 0x8f739773, 0xb976f807,
++ 0x86fe7e7e, 0x86ea6a6a,
++ 0xb974f802, 0xbf8a0000,
++ 0x95807370, 0xbf810000,
++ 0x00000000, 0x00000000,
++
+ };
+
+--
+2.7.4
+