diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1244-drm-amdkfd-Use-ttmp10-and-ttmp11-to-store-TMA-info-f.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1244-drm-amdkfd-Use-ttmp10-and-ttmp11-to-store-TMA-info-f.patch | 156 |
1 files changed, 0 insertions, 156 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1244-drm-amdkfd-Use-ttmp10-and-ttmp11-to-store-TMA-info-f.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1244-drm-amdkfd-Use-ttmp10-and-ttmp11-to-store-TMA-info-f.patch deleted file mode 100644 index 4d9c8f40..00000000 --- a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1244-drm-amdkfd-Use-ttmp10-and-ttmp11-to-store-TMA-info-f.patch +++ /dev/null @@ -1,156 +0,0 @@ -From c9a5100c168cf7701615d72fd35c6c16c5fe7423 Mon Sep 17 00:00:00 2001 -From: Shaoyun Liu <Shaoyun.Liu@amd.com> -Date: Tue, 21 Mar 2017 17:39:08 -0400 -Subject: [PATCH 1244/4131] drm/amdkfd: Use ttmp10 and ttmp11 to store TMA info - for second level trap handler - -Second level trap handler will return to ISA directly, so first level trap -handler will not have chance to change back the correct TMA setting. -This will cause problem when the same trap happens again. -Change to use ttmp10 and ttmp11 for the TMA info which will keep the same -interface for asics GFX8, GFX9 and up. - -Change-Id: I975baa25297355da6a02eb430ffaca954eb74b4b -Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com> ---- - .../gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h | 43 ++++++++++++++++++---- - .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 15 ++++---- - 2 files changed, 43 insertions(+), 15 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h -index 4e34083..48fcec5 100644 ---- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h -+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h -@@ -22,7 +22,35 @@ - - #if 0 - HW (VI) source code for CWSR trap handler --#Version 9 + multiple trap handler -+#Version 18 + multiple trap handler -+ -+// this performance-optimal version was originally from Seven Xu at SRDC -+ -+// Revison #18 --... -+/* Rev History -+** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) -+** #4. SR Memory Layout: -+** 1. VGPR-SGPR-HWREG-{LDS} -+** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. -+** #5. Update: 1. Accurate g8sr_ts_save_d timestamp -+** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) -+** #7. Update: 1. don't barrier if noLDS -+** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version -+** 2. Fix SQ issue by s_sleep 2 -+** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last -+** 2. optimize s_buffer save by burst 16sgprs... -+** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. -+** #11. Update 1. Add 2 more timestamp for debug version -+** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance -+** #13. Integ 1. Always use MUBUF for PV trap shader... -+** #14. Update 1. s_buffer_store soft clause... -+** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. -+** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree -+** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] -+** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... -+** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 -+** 2. FUNC - Handle non-CWSR traps -+*/ - - var G8SR_WDMEM_HWREG_OFFSET = 0 - var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes -@@ -186,7 +214,7 @@ var s_restore_buf_rsrc3 = ttmp11 - /* Shader Main*/ - - shader main -- asic(CARRIZO) -+ asic(VI) - type(CS) - - -@@ -219,8 +247,6 @@ if (!EMU_RUN_HACK) - s_waitcnt lgkmcnt(0) - s_or_b32 ttmp7, ttmp8, ttmp9 - s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set -- s_mov_b32 tma_lo, ttmp10 //set tma_lo/hi for next level trap handler -- s_mov_b32 tma_hi, ttmp11 - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) - s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler - -@@ -1099,18 +1125,19 @@ end - - function get_hwreg_size_bytes - return 128 //HWREG size 128 bytes -+end -+ - - #endif - - static const uint32_t cwsr_trap_carrizo_hex[] = { -- 0xbf820001, 0xbf820124, -+ 0xbf820001, 0xbf820122, - 0xb8f4f802, 0x89748674, - 0xb8f5f803, 0x8675ff75, -- 0x00000400, 0xbf850013, -+ 0x00000400, 0xbf850011, - 0xc00a1e37, 0x00000000, - 0xbf8c007f, 0x87777978, -- 0xbf840004, 0xbeee007a, -- 0xbeef007b, 0xb974f802, -+ 0xbf840002, 0xb974f802, - 0xbe801d78, 0xb8f5f803, - 0x8675ff75, 0x000001ff, - 0xbf850002, 0x80708470, -diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm -index 0106e77..661bd0a 100644 ---- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm -+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm -@@ -28,7 +28,7 @@ HW (GFX9) source code for CWSR trap handler - - // Revison #18 --... - /* Rev History --** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) -+** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) - ** #4. SR Memory Layout: - ** 1. VGPR-SGPR-HWREG-{LDS} - ** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. -@@ -248,12 +248,12 @@ if (!EMU_RUN_HACK) - /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ - s_getreg_b32 tma_lo,hwreg(HW_REG_SQ_SHADER_TMA_LO) - s_getreg_b32 tma_hi,hwreg(HW_REG_SQ_SHADER_TMA_HI) -- s_load_dwordx4 [tba_lo,tba_hi,tma_lo, tma_hi], [tma_lo,tma_hi], 0 -+ s_load_dwordx4 [ttmp8,ttmp9, ttmp10, ttmp11], [tma_lo,tma_hi], 0 - s_waitcnt lgkmcnt(0) -- s_or_b32 ttmp11, tba_lo, tba_hi -+ s_or_b32 ttmp7, ttmp8, ttmp9 - s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set - s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) -- s_setpc_b64 [tba_lo,tba_hi] //jump to next level trap handler -+ s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler - - L_NO_NEXT_TRAP: - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) -@@ -1135,6 +1135,7 @@ function get_hwreg_size_bytes - end - - -+ - #endif - - static const uint32_t cwsr_trap_gfx9_hex[] = { -@@ -1143,10 +1144,10 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xb8f1f803, 0x8671ff71, - 0x00000400, 0xbf850013, - 0xb8faf812, 0xb8fbf813, -- 0xc00a1e3d, 0x00000000, -- 0xbf8cc07f, 0x87777978, -+ 0xc00a1d3d, 0x00000000, -+ 0xbf8cc07f, 0x87737574, - 0xbf840002, 0xb970f802, -- 0xbe801d78, 0xb8f1f803, -+ 0xbe801d74, 0xb8f1f803, - 0x8671ff71, 0x000001ff, - 0xbf850002, 0x806c846c, - 0x826d806d, 0x866dff6d, --- -2.7.4 - |