aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch857
1 files changed, 857 insertions, 0 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch
new file mode 100644
index 00000000..ebec5a50
--- /dev/null
+++ b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch
@@ -0,0 +1,857 @@
+From 34e333be4e27b795ee1c022fc411fe82ff0cfe03 Mon Sep 17 00:00:00 2001
+From: Jay Cornwall <Jay.Cornwall@amd.com>
+Date: Tue, 15 Aug 2017 14:34:10 -0500
+Subject: [PATCH 1331/4131] drm/amdkfd: Handle ILLEGAL_INST in trap handler
+
+Illegal instruction is a non-maskable exception. It must be handled
+by the trap or it will block context save. Pending a contract with
+the second-level trap handler halt the wavefront and exit the trap.
+
+Change-Id: I65a67ffb60ea848dfa3753e333e270adf712dc08
+Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
+---
+ .../gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h | 289 +++++++++---------
+ .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 330 ++++++++++-----------
+ 2 files changed, 301 insertions(+), 318 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
+index 48fcec5..d5d1331 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
+@@ -458,6 +458,7 @@ end
+ //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+ s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+ s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
++ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ L_SAVE_SGPR_LOOP:
+@@ -1131,7 +1132,7 @@ end
+ #endif
+
+ static const uint32_t cwsr_trap_carrizo_hex[] = {
+- 0xbf820001, 0xbf820122,
++ 0xbf820001, 0xbf820123,
+ 0xb8f4f802, 0x89748674,
+ 0xb8f5f803, 0x8675ff75,
+ 0x00000400, 0xbf850011,
+@@ -1217,167 +1218,167 @@ static const uint32_t cwsr_trap_carrizo_hex[] = {
+ 0x8e758475, 0x8e7a8275,
+ 0xbefa00ff, 0x01000000,
+ 0xbef60178, 0x80786e78,
+- 0xbefc0080, 0xbe802b00,
+- 0xbe822b02, 0xbe842b04,
+- 0xbe862b06, 0xbe882b08,
+- 0xbe8a2b0a, 0xbe8c2b0c,
+- 0xbe8e2b0e, 0xc06b003c,
+- 0x00000000, 0xc06b013c,
+- 0x00000010, 0xc06b023c,
+- 0x00000020, 0xc06b033c,
+- 0x00000030, 0x8078c078,
+- 0x82798079, 0x807c907c,
+- 0xbf0a757c, 0xbf85ffeb,
+- 0xbef80176, 0xbeee0080,
++ 0x82798079, 0xbefc0080,
++ 0xbe802b00, 0xbe822b02,
++ 0xbe842b04, 0xbe862b06,
++ 0xbe882b08, 0xbe8a2b0a,
++ 0xbe8c2b0c, 0xbe8e2b0e,
++ 0xc06b003c, 0x00000000,
++ 0xc06b013c, 0x00000010,
++ 0xc06b023c, 0x00000020,
++ 0xc06b033c, 0x00000030,
++ 0x8078c078, 0x82798079,
++ 0x807c907c, 0xbf0a757c,
++ 0xbf85ffeb, 0xbef80176,
++ 0xbeee0080, 0xbefe00c1,
++ 0xbeff00c1, 0xbefa00ff,
++ 0x01000000, 0xe0724000,
++ 0x6e1e0000, 0xe0724100,
++ 0x6e1e0100, 0xe0724200,
++ 0x6e1e0200, 0xe0724300,
++ 0x6e1e0300, 0xbefe00c1,
++ 0xbeff00c1, 0xb8f54306,
++ 0x8675c175, 0xbf84002c,
++ 0xbf8a0000, 0x867aff73,
++ 0x04000000, 0xbf840028,
++ 0x8e758675, 0x8e758275,
++ 0xbefa0075, 0xb8ee2a05,
++ 0x806e816e, 0x8e6e8a6e,
++ 0xb8fa1605, 0x807a817a,
++ 0x8e7a867a, 0x806e7a6e,
++ 0x806eff6e, 0x00000080,
++ 0xbefa00ff, 0x01000000,
++ 0xbefc0080, 0xd28c0002,
++ 0x000100c1, 0xd28d0003,
++ 0x000204c1, 0xd1060002,
++ 0x00011103, 0x7e0602ff,
++ 0x00000200, 0xbefc00ff,
++ 0x00010000, 0xbe80007b,
++ 0x867bff7b, 0xff7fffff,
++ 0x877bff7b, 0x00058000,
++ 0xd8ec0000, 0x00000002,
++ 0xbf8c007f, 0xe0765000,
++ 0x6e1e0002, 0x32040702,
++ 0xd0c9006a, 0x0000eb02,
++ 0xbf87fff7, 0xbefb0000,
++ 0xbeee00ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
++ 0xb8f52a05, 0x80758175,
++ 0x8e758275, 0x8e7a8875,
+ 0xbefa00ff, 0x01000000,
++ 0xbefc0084, 0xbf0a757c,
++ 0xbf840015, 0xbf11017c,
++ 0x8075ff75, 0x00001000,
++ 0x7e000300, 0x7e020301,
++ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x6e1e0000,
+ 0xe0724100, 0x6e1e0100,
+ 0xe0724200, 0x6e1e0200,
+ 0xe0724300, 0x6e1e0300,
++ 0x807c847c, 0x806eff6e,
++ 0x00000400, 0xbf0a757c,
++ 0xbf85ffef, 0xbf9c0000,
++ 0xbf8200ca, 0xbef8007e,
++ 0x8679ff7f, 0x0000ffff,
++ 0x8779ff79, 0x00040000,
++ 0xbefa0080, 0xbefb00ff,
++ 0x00807fac, 0x8676ff7f,
++ 0x08000000, 0x8f768376,
++ 0x877b767b, 0x8676ff7f,
++ 0x70000000, 0x8f768176,
++ 0x877b767b, 0x8676ff7f,
++ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+- 0xb8f54306, 0x8675c175,
+- 0xbf84002c, 0xbf8a0000,
+- 0x867aff73, 0x04000000,
+- 0xbf840028, 0x8e758675,
+- 0x8e758275, 0xbefa0075,
+- 0xb8ee2a05, 0x806e816e,
+- 0x8e6e8a6e, 0xb8fa1605,
+- 0x807a817a, 0x8e7a867a,
+- 0x806e7a6e, 0x806eff6e,
++ 0xb8f34306, 0x8673c173,
++ 0xbf840019, 0x8e738673,
++ 0x8e738273, 0xbefa0073,
++ 0xb8f22a05, 0x80728172,
++ 0x8e728a72, 0xb8f61605,
++ 0x80768176, 0x8e768676,
++ 0x80727672, 0x8072ff72,
+ 0x00000080, 0xbefa00ff,
+ 0x01000000, 0xbefc0080,
+- 0xd28c0002, 0x000100c1,
+- 0xd28d0003, 0x000204c1,
+- 0xd1060002, 0x00011103,
+- 0x7e0602ff, 0x00000200,
+- 0xbefc00ff, 0x00010000,
+- 0xbe80007b, 0x867bff7b,
+- 0xff7fffff, 0x877bff7b,
+- 0x00058000, 0xd8ec0000,
+- 0x00000002, 0xbf8c007f,
+- 0xe0765000, 0x6e1e0002,
+- 0x32040702, 0xd0c9006a,
+- 0x0000eb02, 0xbf87fff7,
+- 0xbefb0000, 0xbeee00ff,
+- 0x00000400, 0xbefe00c1,
+- 0xbeff00c1, 0xb8f52a05,
+- 0x80758175, 0x8e758275,
+- 0x8e7a8875, 0xbefa00ff,
+- 0x01000000, 0xbefc0084,
+- 0xbf0a757c, 0xbf840015,
+- 0xbf11017c, 0x8075ff75,
+- 0x00001000, 0x7e000300,
++ 0xe0510000, 0x721e0000,
++ 0xe0510100, 0x721e0000,
++ 0x807cff7c, 0x00000200,
++ 0x8072ff72, 0x00000200,
++ 0xbf0a737c, 0xbf85fff6,
++ 0xbef20080, 0xbefe00c1,
++ 0xbeff00c1, 0xb8f32a05,
++ 0x80738173, 0x8e738273,
++ 0x8e7a8873, 0xbefa00ff,
++ 0x01000000, 0xbef60072,
++ 0x8072ff72, 0x00000400,
++ 0xbefc0084, 0xbf11087c,
++ 0x8073ff73, 0x00008000,
++ 0xe0524000, 0x721e0000,
++ 0xe0524100, 0x721e0100,
++ 0xe0524200, 0x721e0200,
++ 0xe0524300, 0x721e0300,
++ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+- 0x7e060303, 0xe0724000,
+- 0x6e1e0000, 0xe0724100,
+- 0x6e1e0100, 0xe0724200,
+- 0x6e1e0200, 0xe0724300,
+- 0x6e1e0300, 0x807c847c,
+- 0x806eff6e, 0x00000400,
+- 0xbf0a757c, 0xbf85ffef,
+- 0xbf9c0000, 0xbf8200ca,
+- 0xbef8007e, 0x8679ff7f,
+- 0x0000ffff, 0x8779ff79,
+- 0x00040000, 0xbefa0080,
+- 0xbefb00ff, 0x00807fac,
+- 0x8676ff7f, 0x08000000,
+- 0x8f768376, 0x877b767b,
+- 0x8676ff7f, 0x70000000,
+- 0x8f768176, 0x877b767b,
+- 0x8676ff7f, 0x04000000,
+- 0xbf84001e, 0xbefe00c1,
+- 0xbeff00c1, 0xb8f34306,
+- 0x8673c173, 0xbf840019,
+- 0x8e738673, 0x8e738273,
+- 0xbefa0073, 0xb8f22a05,
++ 0x7e060303, 0x807c847c,
++ 0x8072ff72, 0x00000400,
++ 0xbf0a737c, 0xbf85ffee,
++ 0xbf9c0000, 0xe0524000,
++ 0x761e0000, 0xe0524100,
++ 0x761e0100, 0xe0524200,
++ 0x761e0200, 0xe0524300,
++ 0x761e0300, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+- 0x8072ff72, 0x00000080,
+- 0xbefa00ff, 0x01000000,
+- 0xbefc0080, 0xe0510000,
+- 0x721e0000, 0xe0510100,
+- 0x721e0000, 0x807cff7c,
+- 0x00000200, 0x8072ff72,
+- 0x00000200, 0xbf0a737c,
+- 0xbf85fff6, 0xbef20080,
+- 0xbefe00c1, 0xbeff00c1,
+- 0xb8f32a05, 0x80738173,
+- 0x8e738273, 0x8e7a8873,
+- 0xbefa00ff, 0x01000000,
+- 0xbef60072, 0x8072ff72,
+- 0x00000400, 0xbefc0084,
+- 0xbf11087c, 0x8073ff73,
+- 0x00008000, 0xe0524000,
+- 0x721e0000, 0xe0524100,
+- 0x721e0100, 0xe0524200,
+- 0x721e0200, 0xe0524300,
+- 0x721e0300, 0xbf8c0f70,
+- 0x7e000300, 0x7e020301,
+- 0x7e040302, 0x7e060303,
+- 0x807c847c, 0x8072ff72,
+- 0x00000400, 0xbf0a737c,
+- 0xbf85ffee, 0xbf9c0000,
+- 0xe0524000, 0x761e0000,
+- 0xe0524100, 0x761e0100,
+- 0xe0524200, 0x761e0200,
+- 0xe0524300, 0x761e0300,
+- 0xb8f22a05, 0x80728172,
+- 0x8e728a72, 0xb8f61605,
+- 0x80768176, 0x8e768676,
+- 0x80727672, 0x80f2c072,
+- 0xb8f31605, 0x80738173,
+- 0x8e738473, 0x8e7a8273,
+- 0xbefa00ff, 0x01000000,
+- 0xbefc0073, 0xc031003c,
+- 0x00000072, 0x80f2c072,
+- 0xbf8c007f, 0x80fc907c,
+- 0xbe802d00, 0xbe822d02,
+- 0xbe842d04, 0xbe862d06,
+- 0xbe882d08, 0xbe8a2d0a,
+- 0xbe8c2d0c, 0xbe8e2d0e,
+- 0xbf06807c, 0xbf84fff1,
+- 0xb8f22a05, 0x80728172,
+- 0x8e728a72, 0xb8f61605,
+- 0x80768176, 0x8e768676,
+- 0x80727672, 0xbefa0084,
+- 0xbefa00ff, 0x01000000,
+- 0xc0211cfc, 0x00000072,
+- 0x80728472, 0xc0211c3c,
++ 0x80f2c072, 0xb8f31605,
++ 0x80738173, 0x8e738473,
++ 0x8e7a8273, 0xbefa00ff,
++ 0x01000000, 0xbefc0073,
++ 0xc031003c, 0x00000072,
++ 0x80f2c072, 0xbf8c007f,
++ 0x80fc907c, 0xbe802d00,
++ 0xbe822d02, 0xbe842d04,
++ 0xbe862d06, 0xbe882d08,
++ 0xbe8a2d0a, 0xbe8c2d0c,
++ 0xbe8e2d0e, 0xbf06807c,
++ 0xbf84fff1, 0xb8f22a05,
++ 0x80728172, 0x8e728a72,
++ 0xb8f61605, 0x80768176,
++ 0x8e768676, 0x80727672,
++ 0xbefa0084, 0xbefa00ff,
++ 0x01000000, 0xc0211cfc,
+ 0x00000072, 0x80728472,
+- 0xc0211c7c, 0x00000072,
+- 0x80728472, 0xc0211bbc,
++ 0xc0211c3c, 0x00000072,
++ 0x80728472, 0xc0211c7c,
+ 0x00000072, 0x80728472,
+- 0xc0211bfc, 0x00000072,
+- 0x80728472, 0xc0211d3c,
++ 0xc0211bbc, 0x00000072,
++ 0x80728472, 0xc0211bfc,
+ 0x00000072, 0x80728472,
+- 0xc0211d7c, 0x00000072,
+- 0x80728472, 0xc0211a3c,
++ 0xc0211d3c, 0x00000072,
++ 0x80728472, 0xc0211d7c,
+ 0x00000072, 0x80728472,
+- 0xc0211a7c, 0x00000072,
+- 0x80728472, 0xc0211dfc,
++ 0xc0211a3c, 0x00000072,
++ 0x80728472, 0xc0211a7c,
+ 0x00000072, 0x80728472,
+- 0xc0211b3c, 0x00000072,
+- 0x80728472, 0xc0211b7c,
++ 0xc0211dfc, 0x00000072,
++ 0x80728472, 0xc0211b3c,
+ 0x00000072, 0x80728472,
+- 0xbf8c007f, 0x8671ff71,
+- 0x0000ffff, 0xbefc0073,
+- 0xbefe006e, 0xbeff006f,
+- 0x867375ff, 0x000003ff,
+- 0xb9734803, 0x867375ff,
+- 0xfffff800, 0x8f738b73,
+- 0xb973a2c3, 0xb977f801,
+- 0x8673ff71, 0xf0000000,
+- 0x8f739c73, 0x8e739073,
+- 0xbef60080, 0x87767376,
+- 0x8673ff71, 0x08000000,
+- 0x8f739b73, 0x8e738f73,
+- 0x87767376, 0x8673ff74,
+- 0x00800000, 0x8f739773,
+- 0xb976f807, 0x86fe7e7e,
+- 0x86ea6a6a, 0xb974f802,
+- 0xbf8a0000, 0x95807370,
+- 0xbf810000, 0x00000000,
++ 0xc0211b7c, 0x00000072,
++ 0x80728472, 0xbf8c007f,
++ 0x8671ff71, 0x0000ffff,
++ 0xbefc0073, 0xbefe006e,
++ 0xbeff006f, 0x867375ff,
++ 0x000003ff, 0xb9734803,
++ 0x867375ff, 0xfffff800,
++ 0x8f738b73, 0xb973a2c3,
++ 0xb977f801, 0x8673ff71,
++ 0xf0000000, 0x8f739c73,
++ 0x8e739073, 0xbef60080,
++ 0x87767376, 0x8673ff71,
++ 0x08000000, 0x8f739b73,
++ 0x8e738f73, 0x87767376,
++ 0x8673ff74, 0x00800000,
++ 0x8f739773, 0xb976f807,
++ 0x86fe7e7e, 0x86ea6a6a,
++ 0xb974f802, 0xbf8a0000,
++ 0x95807370, 0xbf810000,
+ };
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+index f11de028..ae2af3d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+@@ -80,8 +80,6 @@ var EMU_RUN_HACK_RESTORE_NORMAL = 0
+ var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
+ var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
+ var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+-var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+-var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+ var SAVE_LDS = 1
+ var WG_BASE_ADDR_LO = 0x9000a000
+ var WG_BASE_ADDR_HI = 0x0
+@@ -119,7 +117,7 @@ var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
+ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
+ var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
+ var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
+-var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
++var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+
+ var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+ var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
+@@ -148,12 +146,6 @@ var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
+ var s_save_spi_init_lo = exec_lo
+ var s_save_spi_init_hi = exec_hi
+
+- //tba_lo and tba_hi need to be saved/restored
+-var tba_lo = ttmp12
+-var tba_hi = ttmp13
+-var tma_lo = ttmp14
+-var tma_hi = ttmp15
+-
+ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+ var s_save_pc_hi = ttmp1
+ var s_save_exec_lo = ttmp2
+@@ -167,10 +159,10 @@ var s_save_buf_rsrc1 = ttmp9
+ var s_save_buf_rsrc2 = ttmp10
+ var s_save_buf_rsrc3 = ttmp11
+
+-var s_save_mem_offset = tma_lo
++var s_save_mem_offset = ttmp14
+ var s_save_alloc_size = s_save_trapsts //conflict
+ var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time)
+-var s_save_m0 = tma_hi
++var s_save_m0 = ttmp15
+
+ /* Restore */
+ var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+@@ -191,9 +183,9 @@ var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
+ var s_restore_spi_init_lo = exec_lo
+ var s_restore_spi_init_hi = exec_hi
+
+-var s_restore_mem_offset = ttmp2
++var s_restore_mem_offset = ttmp12
+ var s_restore_alloc_size = ttmp3
+-var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored
++var s_restore_tmp = ttmp6
+ var s_restore_mem_offset_save = s_restore_tmp //no conflict
+
+ var s_restore_m0 = s_restore_alloc_size //no conflict
+@@ -202,8 +194,8 @@ var s_restore_mode = ttmp7
+
+ var s_restore_pc_lo = ttmp0
+ var s_restore_pc_hi = ttmp1
+-var s_restore_exec_lo = tma_lo //no conflict
+-var s_restore_exec_hi = tma_hi //no conflict
++var s_restore_exec_lo = ttmp14
++var s_restore_exec_hi = ttmp15
+ var s_restore_status = ttmp4
+ var s_restore_trapsts = ttmp5
+ var s_restore_xnack_mask_lo = xnack_mask_lo
+@@ -247,19 +239,26 @@ L_SKIP_RESTORE:
+
+ // ********* Handle non-CWSR traps *******************
+ if (!EMU_RUN_HACK)
++ // Illegal instruction is a non-maskable exception which blocks context save.
++ // Halt the wavefront and return from the trap.
++ s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
++ s_cbranch_scc1 L_HALT_WAVE
++
+ // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
+ // Instead, halt the wavefront and return from the trap.
+ s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
+ s_cbranch_scc0 L_NO_MEM_VIOL
++
++L_HALT_WAVE:
+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_branch L_EXCP_CASE
+
+ L_NO_MEM_VIOL:
+ /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */
+- s_getreg_b32 tma_lo,hwreg(HW_REG_SQ_SHADER_TMA_LO)
+- s_getreg_b32 tma_hi,hwreg(HW_REG_SQ_SHADER_TMA_HI)
+- s_lshl_b64 [tma_lo, tma_hi], [tma_lo, tma_hi], 0x8
+- s_load_dwordx4 [ttmp8,ttmp9, ttmp10, ttmp11], [tma_lo,tma_hi], 0
++ s_getreg_b32 ttmp14,hwreg(HW_REG_SQ_SHADER_TMA_LO)
++ s_getreg_b32 ttmp15,hwreg(HW_REG_SQ_SHADER_TMA_HI)
++ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
++ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [ttmp14, ttmp15], 0
+ s_waitcnt lgkmcnt(0)
+ s_or_b32 ttmp7, ttmp8, ttmp9
+ s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set
+@@ -412,8 +411,6 @@ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+- s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO
+- s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI
+ end
+
+ write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
+@@ -432,15 +429,11 @@ end
+ //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+- write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset) //TBA_LO
+- write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset) //TBA_HI
+
+
+
+ /* the first wave in the threadgroup */
+- // save fist_wave bits in tba_hi unused bit.26
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
+- //s_or_b32 tba_hi, s_save_tmp, tba_hi // save first wave bit to tba_hi.bits[26]
+ s_mov_b32 s_save_exec_hi, 0x0
+ s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
+
+@@ -474,6 +467,7 @@ end
+ //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+ s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+ s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
++ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+@@ -548,7 +542,6 @@ end
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_barrier //LDS is used? wait for other waves in the same TG
+- //s_and_b32 s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
+ s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+@@ -963,9 +956,6 @@ end
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+- /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111),
+- However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG
+- */
+ s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+@@ -973,6 +963,7 @@ end
+ s_waitcnt lgkmcnt(0) //ensure data ready
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
++ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+@@ -1019,8 +1010,6 @@ end
+ read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO
+ read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE
+- read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_LO
+- read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_HI
+
+ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+
+@@ -1150,97 +1139,93 @@ end
+ #endif
+
+ static const uint32_t cwsr_trap_gfx9_hex[] = {
+- 0xbf820001, 0xbf82012c,
++ 0xbf820001, 0xbf820124,
+ 0xb8f0f802, 0x89708670,
+ 0xb8f1f803, 0x8674ff71,
+- 0x00000400, 0xbf85001a,
+- 0x8674ff71, 0x00000100,
+- 0xbf840003, 0x8770ff70,
+- 0x00002000, 0xbf820010,
+- 0xb8faf812, 0xb8fbf813,
+- 0x8efa887a, 0xc00a1d3d,
+- 0x00000000, 0xbf8cc07f,
+- 0x87737574, 0xbf840002,
+- 0xb970f802, 0xbe801d74,
+- 0xb8f1f803, 0x8671ff71,
+- 0x000001ff, 0xbf850002,
+- 0x806c846c, 0x826d806d,
+- 0x866dff6d, 0x0000ffff,
+- 0xb970f802, 0xbe801f6c,
+- 0xb8f1f803, 0x8671ff71,
+- 0x00000100, 0xbf840006,
+- 0xbef60080, 0xb9760203,
+- 0x866dff6d, 0x0000ffff,
+- 0x80ec886c, 0x82ed806d,
+- 0xbef60080, 0xb9760283,
+- 0xbef20068, 0xbef30069,
+- 0xb8f62407, 0x8e769c76,
+- 0x876d766d, 0xb8f603c7,
+- 0x8e769b76, 0x876d766d,
+- 0xb8f6f807, 0x8676ff76,
+- 0x00007fff, 0xb976f807,
+- 0xbeee007e, 0xbeef007f,
+- 0xbefe0180, 0xbf900004,
+- 0xbf8e0002, 0xbf88fffe,
+- 0xbef4007e, 0x8675ff7f,
+- 0x0000ffff, 0x8775ff75,
+- 0x00040000, 0xbef60080,
+- 0xbef700ff, 0x00807fac,
+- 0x8676ff7f, 0x08000000,
+- 0x8f768376, 0x87777677,
+- 0x8676ff7f, 0x70000000,
+- 0x8f768176, 0x87777677,
+- 0xbefb007c, 0xbefa0080,
+- 0xb8fa2a05, 0x807a817a,
+- 0x8e7a8a7a, 0xb8f61605,
+- 0x80768176, 0x8e768676,
+- 0x807a767a, 0xbef60084,
+- 0xbef600ff, 0x01000000,
+- 0xbefe007c, 0xbefc007a,
+- 0xc0611efa, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xbefe007c, 0xbefc007a,
+- 0xc0611b3a, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xbefe007c, 0xbefc007a,
+- 0xc0611b7a, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xbefe007c, 0xbefc007a,
+- 0xc0611bba, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xbefe007c, 0xbefc007a,
+- 0xc0611bfa, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xbefe007c, 0xbefc007a,
+- 0xc0611c3a, 0x0000007c,
+- 0x807a847a, 0xbefc007e,
+- 0xb8f1f803, 0xbefe007c,
+- 0xbefc007a, 0xc0611c7a,
++ 0x00000400, 0xbf85001d,
++ 0x8674ff71, 0x00000800,
++ 0xbf850003, 0x8674ff71,
++ 0x00000100, 0xbf840003,
++ 0x8770ff70, 0x00002000,
++ 0xbf820010, 0xb8faf812,
++ 0xb8fbf813, 0x8efa887a,
++ 0xc00a1d3d, 0x00000000,
++ 0xbf8cc07f, 0x87737574,
++ 0xbf840002, 0xb970f802,
++ 0xbe801d74, 0xb8f1f803,
++ 0x8671ff71, 0x000001ff,
++ 0xbf850002, 0x806c846c,
++ 0x826d806d, 0x866dff6d,
++ 0x0000ffff, 0xb970f802,
++ 0xbe801f6c, 0xb8f1f803,
++ 0x8671ff71, 0x00000100,
++ 0xbf840006, 0xbef60080,
++ 0xb9760203, 0x866dff6d,
++ 0x0000ffff, 0x80ec886c,
++ 0x82ed806d, 0xbef60080,
++ 0xb9760283, 0xbef20068,
++ 0xbef30069, 0xb8f62407,
++ 0x8e769c76, 0x876d766d,
++ 0xb8f603c7, 0x8e769b76,
++ 0x876d766d, 0xb8f6f807,
++ 0x8676ff76, 0x00007fff,
++ 0xb976f807, 0xbeee007e,
++ 0xbeef007f, 0xbefe0180,
++ 0xbf900004, 0xbf8e0002,
++ 0xbf88fffe, 0xbef4007e,
++ 0x8675ff7f, 0x0000ffff,
++ 0x8775ff75, 0x00040000,
++ 0xbef60080, 0xbef700ff,
++ 0x00807fac, 0x8676ff7f,
++ 0x08000000, 0x8f768376,
++ 0x87777677, 0x8676ff7f,
++ 0x70000000, 0x8f768176,
++ 0x87777677, 0xbefb007c,
++ 0xbefa0080, 0xb8fa2a05,
++ 0x807a817a, 0x8e7a8a7a,
++ 0xb8f61605, 0x80768176,
++ 0x8e768676, 0x807a767a,
++ 0xbef60084, 0xbef600ff,
++ 0x01000000, 0xbefe007c,
++ 0xbefc007a, 0xc0611efa,
++ 0x0000007c, 0x807a847a,
++ 0xbefc007e, 0xbefe007c,
++ 0xbefc007a, 0xc0611b3a,
++ 0x0000007c, 0x807a847a,
++ 0xbefc007e, 0xbefe007c,
++ 0xbefc007a, 0xc0611b7a,
+ 0x0000007c, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+- 0xbefc007a, 0xc0611cba,
++ 0xbefc007a, 0xc0611bba,
+ 0x0000007c, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+- 0xbefc007a, 0xc0611cfa,
++ 0xbefc007a, 0xc0611bfa,
+ 0x0000007c, 0x807a847a,
+- 0xbefc007e, 0xb8fbf801,
++ 0xbefc007e, 0xbefe007c,
++ 0xbefc007a, 0xc0611c3a,
++ 0x0000007c, 0x807a847a,
++ 0xbefc007e, 0xb8f1f803,
+ 0xbefe007c, 0xbefc007a,
+- 0xc0611efa, 0x0000007c,
++ 0xc0611c7a, 0x0000007c,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+- 0xc0611e3a, 0x0000007c,
++ 0xc0611cba, 0x0000007c,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+- 0xc0611e7a, 0x0000007c,
++ 0xc0611cfa, 0x0000007c,
+ 0x807a847a, 0xbefc007e,
+- 0x8676ff7f, 0x04000000,
+- 0xbeef0080, 0x876f6f76,
+- 0xb8fa2a05, 0x807a817a,
+- 0x8e7a8a7a, 0xb8f11605,
+- 0x80718171, 0x8e718471,
+- 0x8e768271, 0xbef600ff,
+- 0x01000000, 0xbef20174,
+- 0x80747a74, 0xbefc0080,
++ 0xb8fbf801, 0xbefe007c,
++ 0xbefc007a, 0xc0611efa,
++ 0x0000007c, 0x807a847a,
++ 0xbefc007e, 0x8676ff7f,
++ 0x04000000, 0xbeef0080,
++ 0x876f6f76, 0xb8fa2a05,
++ 0x807a817a, 0x8e7a8a7a,
++ 0xb8f11605, 0x80718171,
++ 0x8e718471, 0x8e768271,
++ 0xbef600ff, 0x01000000,
++ 0xbef20174, 0x80747a74,
++ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+@@ -1300,7 +1285,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0x7a1d0300, 0x807c847c,
+ 0x807aff7a, 0x00000400,
+ 0xbf0a717c, 0xbf85ffef,
+- 0xbf9c0000, 0xbf8200ca,
++ 0xbf9c0000, 0xbf8200c5,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+@@ -1314,93 +1299,90 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf840019,
+ 0x8e6f866f, 0x8e6f826f,
+- 0xbef6006f, 0xb8ee2a05,
+- 0x806e816e, 0x8e6e8a6e,
++ 0xbef6006f, 0xb8f82a05,
++ 0x80788178, 0x8e788a78,
+ 0xb8f21605, 0x80728172,
+- 0x8e728672, 0x806e726e,
+- 0x806eff6e, 0x00000080,
++ 0x8e728672, 0x80787278,
++ 0x8078ff78, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xe0510000,
+- 0x6e1d0000, 0xe0510100,
+- 0x6e1d0000, 0x807cff7c,
+- 0x00000200, 0x806eff6e,
++ 0x781d0000, 0xe0510100,
++ 0x781d0000, 0x807cff7c,
++ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+- 0xbf85fff6, 0xbeee0080,
++ 0xbf85fff6, 0xbef80080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x8e76886f,
+ 0xbef600ff, 0x01000000,
+- 0xbef2006e, 0x806eff6e,
++ 0xbef20078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0x806fff6f,
+ 0x00008000, 0xe0524000,
+- 0x6e1d0000, 0xe0524100,
+- 0x6e1d0100, 0xe0524200,
+- 0x6e1d0200, 0xe0524300,
+- 0x6e1d0300, 0xbf8c0f70,
++ 0x781d0000, 0xe0524100,
++ 0x781d0100, 0xe0524200,
++ 0x781d0200, 0xe0524300,
++ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+- 0x807c847c, 0x806eff6e,
++ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x721d0000,
+ 0xe0524100, 0x721d0100,
+ 0xe0524200, 0x721d0200,
+ 0xe0524300, 0x721d0300,
+- 0xb8ee2a05, 0x806e816e,
+- 0x8e6e8a6e, 0xb8f21605,
++ 0xb8f82a05, 0x80788178,
++ 0x8e788a78, 0xb8f21605,
+ 0x80728172, 0x8e728672,
+- 0x806e726e, 0x80eec06e,
++ 0x80787278, 0x80f8c078,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f846f, 0x8e76826f,
+ 0xbef600ff, 0x01000000,
+ 0xbefc006f, 0xc031003a,
+- 0x0000006e, 0x80eec06e,
++ 0x00000078, 0x80f8c078,
+ 0xbf8cc07f, 0x80fc907c,
+- 0xbe802d00, 0xbe822d02,
+- 0xbe842d04, 0xbe862d06,
+- 0xbe882d08, 0xbe8a2d0a,
+- 0xbe8c2d0c, 0xbe8e2d0e,
+- 0xbf06807c, 0xbf84fff1,
+- 0xb8ee2a05, 0x806e816e,
+- 0x8e6e8a6e, 0xb8f21605,
+- 0x80728172, 0x8e728672,
+- 0x806e726e, 0xbef60084,
+- 0xbef600ff, 0x01000000,
+- 0xc0211bfa, 0x0000006e,
+- 0x806e846e, 0xc0211b3a,
+- 0x0000006e, 0x806e846e,
+- 0xc0211b7a, 0x0000006e,
+- 0x806e846e, 0xc0211eba,
+- 0x0000006e, 0x806e846e,
+- 0xc0211efa, 0x0000006e,
+- 0x806e846e, 0xc0211c3a,
+- 0x0000006e, 0x806e846e,
+- 0xc0211c7a, 0x0000006e,
+- 0x806e846e, 0xc0211a3a,
+- 0x0000006e, 0x806e846e,
+- 0xc0211a7a, 0x0000006e,
+- 0x806e846e, 0xc0211cfa,
+- 0x0000006e, 0x806e846e,
+- 0xc0211e3a, 0x0000006e,
+- 0x806e846e, 0xc0211e7a,
+- 0x0000006e, 0x806e846e,
+- 0xbf8cc07f, 0x866dff6d,
+- 0x0000ffff, 0xbefc006f,
+- 0xbefe007a, 0xbeff007b,
+- 0x866f71ff, 0x000003ff,
+- 0xb96f4803, 0x866f71ff,
+- 0xfffff800, 0x8f6f8b6f,
+- 0xb96fa2c3, 0xb973f801,
+- 0x866fff6d, 0xf0000000,
+- 0x8f6f9c6f, 0x8e6f906f,
+- 0xbef20080, 0x87726f72,
+- 0x866fff6d, 0x08000000,
+- 0x8f6f9b6f, 0x8e6f8f6f,
+- 0x87726f72, 0x866fff70,
+- 0x00800000, 0x8f6f976f,
+- 0xb972f807, 0x86fe7e7e,
+- 0x86ea6a6a, 0xb970f802,
+- 0xbf8a0000, 0x95806f6c,
+- 0xbf810000, 0x00000000,
++ 0xbf800000, 0xbe802d00,
++ 0xbe822d02, 0xbe842d04,
++ 0xbe862d06, 0xbe882d08,
++ 0xbe8a2d0a, 0xbe8c2d0c,
++ 0xbe8e2d0e, 0xbf06807c,
++ 0xbf84fff0, 0xb8f82a05,
++ 0x80788178, 0x8e788a78,
++ 0xb8f21605, 0x80728172,
++ 0x8e728672, 0x80787278,
++ 0xbef60084, 0xbef600ff,
++ 0x01000000, 0xc0211bfa,
++ 0x00000078, 0x80788478,
++ 0xc0211b3a, 0x00000078,
++ 0x80788478, 0xc0211b7a,
++ 0x00000078, 0x80788478,
++ 0xc0211eba, 0x00000078,
++ 0x80788478, 0xc0211efa,
++ 0x00000078, 0x80788478,
++ 0xc0211c3a, 0x00000078,
++ 0x80788478, 0xc0211c7a,
++ 0x00000078, 0x80788478,
++ 0xc0211a3a, 0x00000078,
++ 0x80788478, 0xc0211a7a,
++ 0x00000078, 0x80788478,
++ 0xc0211cfa, 0x00000078,
++ 0x80788478, 0xbf8cc07f,
++ 0x866dff6d, 0x0000ffff,
++ 0xbefc006f, 0xbefe007a,
++ 0xbeff007b, 0x866f71ff,
++ 0x000003ff, 0xb96f4803,
++ 0x866f71ff, 0xfffff800,
++ 0x8f6f8b6f, 0xb96fa2c3,
++ 0xb973f801, 0x866fff6d,
++ 0xf0000000, 0x8f6f9c6f,
++ 0x8e6f906f, 0xbef20080,
++ 0x87726f72, 0x866fff6d,
++ 0x08000000, 0x8f6f9b6f,
++ 0x8e6f8f6f, 0x87726f72,
++ 0x866fff70, 0x00800000,
++ 0x8f6f976f, 0xb972f807,
++ 0x86fe7e7e, 0x86ea6a6a,
++ 0xb970f802, 0xbf8a0000,
++ 0x95806f6c, 0xbf810000,
+ };
+--
+2.7.4
+