aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch857
1 files changed, 0 insertions, 857 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch
deleted file mode 100644
index ebec5a50..00000000
--- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch
+++ /dev/null
@@ -1,857 +0,0 @@
-From 34e333be4e27b795ee1c022fc411fe82ff0cfe03 Mon Sep 17 00:00:00 2001
-From: Jay Cornwall <Jay.Cornwall@amd.com>
-Date: Tue, 15 Aug 2017 14:34:10 -0500
-Subject: [PATCH 1331/4131] drm/amdkfd: Handle ILLEGAL_INST in trap handler
-
-Illegal instruction is a non-maskable exception. It must be handled
-by the trap or it will block context save. Pending a contract with
-the second-level trap handler halt the wavefront and exit the trap.
-
-Change-Id: I65a67ffb60ea848dfa3753e333e270adf712dc08
-Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
----
- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h | 289 +++++++++---------
- .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 330 ++++++++++-----------
- 2 files changed, 301 insertions(+), 318 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
-index 48fcec5..d5d1331 100644
---- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
-+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h
-@@ -458,6 +458,7 @@ end
- //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
- s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
- s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
-+ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
-
- s_mov_b32 m0, 0x0 //SGPR initial index value =0
- L_SAVE_SGPR_LOOP:
-@@ -1131,7 +1132,7 @@ end
- #endif
-
- static const uint32_t cwsr_trap_carrizo_hex[] = {
-- 0xbf820001, 0xbf820122,
-+ 0xbf820001, 0xbf820123,
- 0xb8f4f802, 0x89748674,
- 0xb8f5f803, 0x8675ff75,
- 0x00000400, 0xbf850011,
-@@ -1217,167 +1218,167 @@ static const uint32_t cwsr_trap_carrizo_hex[] = {
- 0x8e758475, 0x8e7a8275,
- 0xbefa00ff, 0x01000000,
- 0xbef60178, 0x80786e78,
-- 0xbefc0080, 0xbe802b00,
-- 0xbe822b02, 0xbe842b04,
-- 0xbe862b06, 0xbe882b08,
-- 0xbe8a2b0a, 0xbe8c2b0c,
-- 0xbe8e2b0e, 0xc06b003c,
-- 0x00000000, 0xc06b013c,
-- 0x00000010, 0xc06b023c,
-- 0x00000020, 0xc06b033c,
-- 0x00000030, 0x8078c078,
-- 0x82798079, 0x807c907c,
-- 0xbf0a757c, 0xbf85ffeb,
-- 0xbef80176, 0xbeee0080,
-+ 0x82798079, 0xbefc0080,
-+ 0xbe802b00, 0xbe822b02,
-+ 0xbe842b04, 0xbe862b06,
-+ 0xbe882b08, 0xbe8a2b0a,
-+ 0xbe8c2b0c, 0xbe8e2b0e,
-+ 0xc06b003c, 0x00000000,
-+ 0xc06b013c, 0x00000010,
-+ 0xc06b023c, 0x00000020,
-+ 0xc06b033c, 0x00000030,
-+ 0x8078c078, 0x82798079,
-+ 0x807c907c, 0xbf0a757c,
-+ 0xbf85ffeb, 0xbef80176,
-+ 0xbeee0080, 0xbefe00c1,
-+ 0xbeff00c1, 0xbefa00ff,
-+ 0x01000000, 0xe0724000,
-+ 0x6e1e0000, 0xe0724100,
-+ 0x6e1e0100, 0xe0724200,
-+ 0x6e1e0200, 0xe0724300,
-+ 0x6e1e0300, 0xbefe00c1,
-+ 0xbeff00c1, 0xb8f54306,
-+ 0x8675c175, 0xbf84002c,
-+ 0xbf8a0000, 0x867aff73,
-+ 0x04000000, 0xbf840028,
-+ 0x8e758675, 0x8e758275,
-+ 0xbefa0075, 0xb8ee2a05,
-+ 0x806e816e, 0x8e6e8a6e,
-+ 0xb8fa1605, 0x807a817a,
-+ 0x8e7a867a, 0x806e7a6e,
-+ 0x806eff6e, 0x00000080,
-+ 0xbefa00ff, 0x01000000,
-+ 0xbefc0080, 0xd28c0002,
-+ 0x000100c1, 0xd28d0003,
-+ 0x000204c1, 0xd1060002,
-+ 0x00011103, 0x7e0602ff,
-+ 0x00000200, 0xbefc00ff,
-+ 0x00010000, 0xbe80007b,
-+ 0x867bff7b, 0xff7fffff,
-+ 0x877bff7b, 0x00058000,
-+ 0xd8ec0000, 0x00000002,
-+ 0xbf8c007f, 0xe0765000,
-+ 0x6e1e0002, 0x32040702,
-+ 0xd0c9006a, 0x0000eb02,
-+ 0xbf87fff7, 0xbefb0000,
-+ 0xbeee00ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
-+ 0xb8f52a05, 0x80758175,
-+ 0x8e758275, 0x8e7a8875,
- 0xbefa00ff, 0x01000000,
-+ 0xbefc0084, 0xbf0a757c,
-+ 0xbf840015, 0xbf11017c,
-+ 0x8075ff75, 0x00001000,
-+ 0x7e000300, 0x7e020301,
-+ 0x7e040302, 0x7e060303,
- 0xe0724000, 0x6e1e0000,
- 0xe0724100, 0x6e1e0100,
- 0xe0724200, 0x6e1e0200,
- 0xe0724300, 0x6e1e0300,
-+ 0x807c847c, 0x806eff6e,
-+ 0x00000400, 0xbf0a757c,
-+ 0xbf85ffef, 0xbf9c0000,
-+ 0xbf8200ca, 0xbef8007e,
-+ 0x8679ff7f, 0x0000ffff,
-+ 0x8779ff79, 0x00040000,
-+ 0xbefa0080, 0xbefb00ff,
-+ 0x00807fac, 0x8676ff7f,
-+ 0x08000000, 0x8f768376,
-+ 0x877b767b, 0x8676ff7f,
-+ 0x70000000, 0x8f768176,
-+ 0x877b767b, 0x8676ff7f,
-+ 0x04000000, 0xbf84001e,
- 0xbefe00c1, 0xbeff00c1,
-- 0xb8f54306, 0x8675c175,
-- 0xbf84002c, 0xbf8a0000,
-- 0x867aff73, 0x04000000,
-- 0xbf840028, 0x8e758675,
-- 0x8e758275, 0xbefa0075,
-- 0xb8ee2a05, 0x806e816e,
-- 0x8e6e8a6e, 0xb8fa1605,
-- 0x807a817a, 0x8e7a867a,
-- 0x806e7a6e, 0x806eff6e,
-+ 0xb8f34306, 0x8673c173,
-+ 0xbf840019, 0x8e738673,
-+ 0x8e738273, 0xbefa0073,
-+ 0xb8f22a05, 0x80728172,
-+ 0x8e728a72, 0xb8f61605,
-+ 0x80768176, 0x8e768676,
-+ 0x80727672, 0x8072ff72,
- 0x00000080, 0xbefa00ff,
- 0x01000000, 0xbefc0080,
-- 0xd28c0002, 0x000100c1,
-- 0xd28d0003, 0x000204c1,
-- 0xd1060002, 0x00011103,
-- 0x7e0602ff, 0x00000200,
-- 0xbefc00ff, 0x00010000,
-- 0xbe80007b, 0x867bff7b,
-- 0xff7fffff, 0x877bff7b,
-- 0x00058000, 0xd8ec0000,
-- 0x00000002, 0xbf8c007f,
-- 0xe0765000, 0x6e1e0002,
-- 0x32040702, 0xd0c9006a,
-- 0x0000eb02, 0xbf87fff7,
-- 0xbefb0000, 0xbeee00ff,
-- 0x00000400, 0xbefe00c1,
-- 0xbeff00c1, 0xb8f52a05,
-- 0x80758175, 0x8e758275,
-- 0x8e7a8875, 0xbefa00ff,
-- 0x01000000, 0xbefc0084,
-- 0xbf0a757c, 0xbf840015,
-- 0xbf11017c, 0x8075ff75,
-- 0x00001000, 0x7e000300,
-+ 0xe0510000, 0x721e0000,
-+ 0xe0510100, 0x721e0000,
-+ 0x807cff7c, 0x00000200,
-+ 0x8072ff72, 0x00000200,
-+ 0xbf0a737c, 0xbf85fff6,
-+ 0xbef20080, 0xbefe00c1,
-+ 0xbeff00c1, 0xb8f32a05,
-+ 0x80738173, 0x8e738273,
-+ 0x8e7a8873, 0xbefa00ff,
-+ 0x01000000, 0xbef60072,
-+ 0x8072ff72, 0x00000400,
-+ 0xbefc0084, 0xbf11087c,
-+ 0x8073ff73, 0x00008000,
-+ 0xe0524000, 0x721e0000,
-+ 0xe0524100, 0x721e0100,
-+ 0xe0524200, 0x721e0200,
-+ 0xe0524300, 0x721e0300,
-+ 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
-- 0x7e060303, 0xe0724000,
-- 0x6e1e0000, 0xe0724100,
-- 0x6e1e0100, 0xe0724200,
-- 0x6e1e0200, 0xe0724300,
-- 0x6e1e0300, 0x807c847c,
-- 0x806eff6e, 0x00000400,
-- 0xbf0a757c, 0xbf85ffef,
-- 0xbf9c0000, 0xbf8200ca,
-- 0xbef8007e, 0x8679ff7f,
-- 0x0000ffff, 0x8779ff79,
-- 0x00040000, 0xbefa0080,
-- 0xbefb00ff, 0x00807fac,
-- 0x8676ff7f, 0x08000000,
-- 0x8f768376, 0x877b767b,
-- 0x8676ff7f, 0x70000000,
-- 0x8f768176, 0x877b767b,
-- 0x8676ff7f, 0x04000000,
-- 0xbf84001e, 0xbefe00c1,
-- 0xbeff00c1, 0xb8f34306,
-- 0x8673c173, 0xbf840019,
-- 0x8e738673, 0x8e738273,
-- 0xbefa0073, 0xb8f22a05,
-+ 0x7e060303, 0x807c847c,
-+ 0x8072ff72, 0x00000400,
-+ 0xbf0a737c, 0xbf85ffee,
-+ 0xbf9c0000, 0xe0524000,
-+ 0x761e0000, 0xe0524100,
-+ 0x761e0100, 0xe0524200,
-+ 0x761e0200, 0xe0524300,
-+ 0x761e0300, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
-- 0x8072ff72, 0x00000080,
-- 0xbefa00ff, 0x01000000,
-- 0xbefc0080, 0xe0510000,
-- 0x721e0000, 0xe0510100,
-- 0x721e0000, 0x807cff7c,
-- 0x00000200, 0x8072ff72,
-- 0x00000200, 0xbf0a737c,
-- 0xbf85fff6, 0xbef20080,
-- 0xbefe00c1, 0xbeff00c1,
-- 0xb8f32a05, 0x80738173,
-- 0x8e738273, 0x8e7a8873,
-- 0xbefa00ff, 0x01000000,
-- 0xbef60072, 0x8072ff72,
-- 0x00000400, 0xbefc0084,
-- 0xbf11087c, 0x8073ff73,
-- 0x00008000, 0xe0524000,
-- 0x721e0000, 0xe0524100,
-- 0x721e0100, 0xe0524200,
-- 0x721e0200, 0xe0524300,
-- 0x721e0300, 0xbf8c0f70,
-- 0x7e000300, 0x7e020301,
-- 0x7e040302, 0x7e060303,
-- 0x807c847c, 0x8072ff72,
-- 0x00000400, 0xbf0a737c,
-- 0xbf85ffee, 0xbf9c0000,
-- 0xe0524000, 0x761e0000,
-- 0xe0524100, 0x761e0100,
-- 0xe0524200, 0x761e0200,
-- 0xe0524300, 0x761e0300,
-- 0xb8f22a05, 0x80728172,
-- 0x8e728a72, 0xb8f61605,
-- 0x80768176, 0x8e768676,
-- 0x80727672, 0x80f2c072,
-- 0xb8f31605, 0x80738173,
-- 0x8e738473, 0x8e7a8273,
-- 0xbefa00ff, 0x01000000,
-- 0xbefc0073, 0xc031003c,
-- 0x00000072, 0x80f2c072,
-- 0xbf8c007f, 0x80fc907c,
-- 0xbe802d00, 0xbe822d02,
-- 0xbe842d04, 0xbe862d06,
-- 0xbe882d08, 0xbe8a2d0a,
-- 0xbe8c2d0c, 0xbe8e2d0e,
-- 0xbf06807c, 0xbf84fff1,
-- 0xb8f22a05, 0x80728172,
-- 0x8e728a72, 0xb8f61605,
-- 0x80768176, 0x8e768676,
-- 0x80727672, 0xbefa0084,
-- 0xbefa00ff, 0x01000000,
-- 0xc0211cfc, 0x00000072,
-- 0x80728472, 0xc0211c3c,
-+ 0x80f2c072, 0xb8f31605,
-+ 0x80738173, 0x8e738473,
-+ 0x8e7a8273, 0xbefa00ff,
-+ 0x01000000, 0xbefc0073,
-+ 0xc031003c, 0x00000072,
-+ 0x80f2c072, 0xbf8c007f,
-+ 0x80fc907c, 0xbe802d00,
-+ 0xbe822d02, 0xbe842d04,
-+ 0xbe862d06, 0xbe882d08,
-+ 0xbe8a2d0a, 0xbe8c2d0c,
-+ 0xbe8e2d0e, 0xbf06807c,
-+ 0xbf84fff1, 0xb8f22a05,
-+ 0x80728172, 0x8e728a72,
-+ 0xb8f61605, 0x80768176,
-+ 0x8e768676, 0x80727672,
-+ 0xbefa0084, 0xbefa00ff,
-+ 0x01000000, 0xc0211cfc,
- 0x00000072, 0x80728472,
-- 0xc0211c7c, 0x00000072,
-- 0x80728472, 0xc0211bbc,
-+ 0xc0211c3c, 0x00000072,
-+ 0x80728472, 0xc0211c7c,
- 0x00000072, 0x80728472,
-- 0xc0211bfc, 0x00000072,
-- 0x80728472, 0xc0211d3c,
-+ 0xc0211bbc, 0x00000072,
-+ 0x80728472, 0xc0211bfc,
- 0x00000072, 0x80728472,
-- 0xc0211d7c, 0x00000072,
-- 0x80728472, 0xc0211a3c,
-+ 0xc0211d3c, 0x00000072,
-+ 0x80728472, 0xc0211d7c,
- 0x00000072, 0x80728472,
-- 0xc0211a7c, 0x00000072,
-- 0x80728472, 0xc0211dfc,
-+ 0xc0211a3c, 0x00000072,
-+ 0x80728472, 0xc0211a7c,
- 0x00000072, 0x80728472,
-- 0xc0211b3c, 0x00000072,
-- 0x80728472, 0xc0211b7c,
-+ 0xc0211dfc, 0x00000072,
-+ 0x80728472, 0xc0211b3c,
- 0x00000072, 0x80728472,
-- 0xbf8c007f, 0x8671ff71,
-- 0x0000ffff, 0xbefc0073,
-- 0xbefe006e, 0xbeff006f,
-- 0x867375ff, 0x000003ff,
-- 0xb9734803, 0x867375ff,
-- 0xfffff800, 0x8f738b73,
-- 0xb973a2c3, 0xb977f801,
-- 0x8673ff71, 0xf0000000,
-- 0x8f739c73, 0x8e739073,
-- 0xbef60080, 0x87767376,
-- 0x8673ff71, 0x08000000,
-- 0x8f739b73, 0x8e738f73,
-- 0x87767376, 0x8673ff74,
-- 0x00800000, 0x8f739773,
-- 0xb976f807, 0x86fe7e7e,
-- 0x86ea6a6a, 0xb974f802,
-- 0xbf8a0000, 0x95807370,
-- 0xbf810000, 0x00000000,
-+ 0xc0211b7c, 0x00000072,
-+ 0x80728472, 0xbf8c007f,
-+ 0x8671ff71, 0x0000ffff,
-+ 0xbefc0073, 0xbefe006e,
-+ 0xbeff006f, 0x867375ff,
-+ 0x000003ff, 0xb9734803,
-+ 0x867375ff, 0xfffff800,
-+ 0x8f738b73, 0xb973a2c3,
-+ 0xb977f801, 0x8673ff71,
-+ 0xf0000000, 0x8f739c73,
-+ 0x8e739073, 0xbef60080,
-+ 0x87767376, 0x8673ff71,
-+ 0x08000000, 0x8f739b73,
-+ 0x8e738f73, 0x87767376,
-+ 0x8673ff74, 0x00800000,
-+ 0x8f739773, 0xb976f807,
-+ 0x86fe7e7e, 0x86ea6a6a,
-+ 0xb974f802, 0xbf8a0000,
-+ 0x95807370, 0xbf810000,
- };
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
-index f11de028..ae2af3d 100644
---- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
-+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
-@@ -80,8 +80,6 @@ var EMU_RUN_HACK_RESTORE_NORMAL = 0
- var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
- var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
- var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
--var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
--var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
- var SAVE_LDS = 1
- var WG_BASE_ADDR_LO = 0x9000a000
- var WG_BASE_ADDR_HI = 0x0
-@@ -119,7 +117,7 @@ var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
- var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
- var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
- var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
--var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
-+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
-
- var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
- var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
-@@ -148,12 +146,6 @@ var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
- var s_save_spi_init_lo = exec_lo
- var s_save_spi_init_hi = exec_hi
-
-- //tba_lo and tba_hi need to be saved/restored
--var tba_lo = ttmp12
--var tba_hi = ttmp13
--var tma_lo = ttmp14
--var tma_hi = ttmp15
--
- var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
- var s_save_pc_hi = ttmp1
- var s_save_exec_lo = ttmp2
-@@ -167,10 +159,10 @@ var s_save_buf_rsrc1 = ttmp9
- var s_save_buf_rsrc2 = ttmp10
- var s_save_buf_rsrc3 = ttmp11
-
--var s_save_mem_offset = tma_lo
-+var s_save_mem_offset = ttmp14
- var s_save_alloc_size = s_save_trapsts //conflict
- var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time)
--var s_save_m0 = tma_hi
-+var s_save_m0 = ttmp15
-
- /* Restore */
- var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
-@@ -191,9 +183,9 @@ var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
- var s_restore_spi_init_lo = exec_lo
- var s_restore_spi_init_hi = exec_hi
-
--var s_restore_mem_offset = ttmp2
-+var s_restore_mem_offset = ttmp12
- var s_restore_alloc_size = ttmp3
--var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored
-+var s_restore_tmp = ttmp6
- var s_restore_mem_offset_save = s_restore_tmp //no conflict
-
- var s_restore_m0 = s_restore_alloc_size //no conflict
-@@ -202,8 +194,8 @@ var s_restore_mode = ttmp7
-
- var s_restore_pc_lo = ttmp0
- var s_restore_pc_hi = ttmp1
--var s_restore_exec_lo = tma_lo //no conflict
--var s_restore_exec_hi = tma_hi //no conflict
-+var s_restore_exec_lo = ttmp14
-+var s_restore_exec_hi = ttmp15
- var s_restore_status = ttmp4
- var s_restore_trapsts = ttmp5
- var s_restore_xnack_mask_lo = xnack_mask_lo
-@@ -247,19 +239,26 @@ L_SKIP_RESTORE:
-
- // ********* Handle non-CWSR traps *******************
- if (!EMU_RUN_HACK)
-+ // Illegal instruction is a non-maskable exception which blocks context save.
-+ // Halt the wavefront and return from the trap.
-+ s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
-+ s_cbranch_scc1 L_HALT_WAVE
-+
- // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
- // Instead, halt the wavefront and return from the trap.
- s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
- s_cbranch_scc0 L_NO_MEM_VIOL
-+
-+L_HALT_WAVE:
- s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
- s_branch L_EXCP_CASE
-
- L_NO_MEM_VIOL:
- /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */
-- s_getreg_b32 tma_lo,hwreg(HW_REG_SQ_SHADER_TMA_LO)
-- s_getreg_b32 tma_hi,hwreg(HW_REG_SQ_SHADER_TMA_HI)
-- s_lshl_b64 [tma_lo, tma_hi], [tma_lo, tma_hi], 0x8
-- s_load_dwordx4 [ttmp8,ttmp9, ttmp10, ttmp11], [tma_lo,tma_hi], 0
-+ s_getreg_b32 ttmp14,hwreg(HW_REG_SQ_SHADER_TMA_LO)
-+ s_getreg_b32 ttmp15,hwreg(HW_REG_SQ_SHADER_TMA_HI)
-+ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
-+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [ttmp14, ttmp15], 0
- s_waitcnt lgkmcnt(0)
- s_or_b32 ttmp7, ttmp8, ttmp9
- s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set
-@@ -412,8 +411,6 @@ end
- if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
- s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
- s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
-- s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO
-- s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI
- end
-
- write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
-@@ -432,15 +429,11 @@ end
- //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
- s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
-- write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset) //TBA_LO
-- write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset) //TBA_HI
-
-
-
- /* the first wave in the threadgroup */
-- // save fist_wave bits in tba_hi unused bit.26
- s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
-- //s_or_b32 tba_hi, s_save_tmp, tba_hi // save first wave bit to tba_hi.bits[26]
- s_mov_b32 s_save_exec_hi, 0x0
- s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
-
-@@ -474,6 +467,7 @@ end
- //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
- s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
- s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
-+ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
-
- s_mov_b32 m0, 0x0 //SGPR initial index value =0
- s_nop 0x0 //Manually inserted wait states
-@@ -548,7 +542,6 @@ end
- s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
-
- s_barrier //LDS is used? wait for other waves in the same TG
-- //s_and_b32 s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
- s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
- s_cbranch_scc0 L_SAVE_LDS_DONE
-
-@@ -963,9 +956,6 @@ end
- s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
- end
-
-- /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111),
-- However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG
-- */
- s_mov_b32 m0, s_restore_alloc_size
-
- L_RESTORE_SGPR_LOOP:
-@@ -973,6 +963,7 @@ end
- s_waitcnt lgkmcnt(0) //ensure data ready
-
- s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
-+ s_nop 0 // hazard SALU M0=> S_MOVREL
-
- s_movreld_b64 s0, s0 //s[0+m0] = s0
- s_movreld_b64 s2, s2
-@@ -1019,8 +1010,6 @@ end
- read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO
- read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI
- read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE
-- read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_LO
-- read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_HI
-
- s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
-
-@@ -1150,97 +1139,93 @@ end
- #endif
-
- static const uint32_t cwsr_trap_gfx9_hex[] = {
-- 0xbf820001, 0xbf82012c,
-+ 0xbf820001, 0xbf820124,
- 0xb8f0f802, 0x89708670,
- 0xb8f1f803, 0x8674ff71,
-- 0x00000400, 0xbf85001a,
-- 0x8674ff71, 0x00000100,
-- 0xbf840003, 0x8770ff70,
-- 0x00002000, 0xbf820010,
-- 0xb8faf812, 0xb8fbf813,
-- 0x8efa887a, 0xc00a1d3d,
-- 0x00000000, 0xbf8cc07f,
-- 0x87737574, 0xbf840002,
-- 0xb970f802, 0xbe801d74,
-- 0xb8f1f803, 0x8671ff71,
-- 0x000001ff, 0xbf850002,
-- 0x806c846c, 0x826d806d,
-- 0x866dff6d, 0x0000ffff,
-- 0xb970f802, 0xbe801f6c,
-- 0xb8f1f803, 0x8671ff71,
-- 0x00000100, 0xbf840006,
-- 0xbef60080, 0xb9760203,
-- 0x866dff6d, 0x0000ffff,
-- 0x80ec886c, 0x82ed806d,
-- 0xbef60080, 0xb9760283,
-- 0xbef20068, 0xbef30069,
-- 0xb8f62407, 0x8e769c76,
-- 0x876d766d, 0xb8f603c7,
-- 0x8e769b76, 0x876d766d,
-- 0xb8f6f807, 0x8676ff76,
-- 0x00007fff, 0xb976f807,
-- 0xbeee007e, 0xbeef007f,
-- 0xbefe0180, 0xbf900004,
-- 0xbf8e0002, 0xbf88fffe,
-- 0xbef4007e, 0x8675ff7f,
-- 0x0000ffff, 0x8775ff75,
-- 0x00040000, 0xbef60080,
-- 0xbef700ff, 0x00807fac,
-- 0x8676ff7f, 0x08000000,
-- 0x8f768376, 0x87777677,
-- 0x8676ff7f, 0x70000000,
-- 0x8f768176, 0x87777677,
-- 0xbefb007c, 0xbefa0080,
-- 0xb8fa2a05, 0x807a817a,
-- 0x8e7a8a7a, 0xb8f61605,
-- 0x80768176, 0x8e768676,
-- 0x807a767a, 0xbef60084,
-- 0xbef600ff, 0x01000000,
-- 0xbefe007c, 0xbefc007a,
-- 0xc0611efa, 0x0000007c,
-- 0x807a847a, 0xbefc007e,
-- 0xbefe007c, 0xbefc007a,
-- 0xc0611b3a, 0x0000007c,
-- 0x807a847a, 0xbefc007e,
-- 0xbefe007c, 0xbefc007a,
-- 0xc0611b7a, 0x0000007c,
-- 0x807a847a, 0xbefc007e,
-- 0xbefe007c, 0xbefc007a,
-- 0xc0611bba, 0x0000007c,
-- 0x807a847a, 0xbefc007e,
-- 0xbefe007c, 0xbefc007a,
-- 0xc0611bfa, 0x0000007c,
-- 0x807a847a, 0xbefc007e,
-- 0xbefe007c, 0xbefc007a,
-- 0xc0611c3a, 0x0000007c,
-- 0x807a847a, 0xbefc007e,
-- 0xb8f1f803, 0xbefe007c,
-- 0xbefc007a, 0xc0611c7a,
-+ 0x00000400, 0xbf85001d,
-+ 0x8674ff71, 0x00000800,
-+ 0xbf850003, 0x8674ff71,
-+ 0x00000100, 0xbf840003,
-+ 0x8770ff70, 0x00002000,
-+ 0xbf820010, 0xb8faf812,
-+ 0xb8fbf813, 0x8efa887a,
-+ 0xc00a1d3d, 0x00000000,
-+ 0xbf8cc07f, 0x87737574,
-+ 0xbf840002, 0xb970f802,
-+ 0xbe801d74, 0xb8f1f803,
-+ 0x8671ff71, 0x000001ff,
-+ 0xbf850002, 0x806c846c,
-+ 0x826d806d, 0x866dff6d,
-+ 0x0000ffff, 0xb970f802,
-+ 0xbe801f6c, 0xb8f1f803,
-+ 0x8671ff71, 0x00000100,
-+ 0xbf840006, 0xbef60080,
-+ 0xb9760203, 0x866dff6d,
-+ 0x0000ffff, 0x80ec886c,
-+ 0x82ed806d, 0xbef60080,
-+ 0xb9760283, 0xbef20068,
-+ 0xbef30069, 0xb8f62407,
-+ 0x8e769c76, 0x876d766d,
-+ 0xb8f603c7, 0x8e769b76,
-+ 0x876d766d, 0xb8f6f807,
-+ 0x8676ff76, 0x00007fff,
-+ 0xb976f807, 0xbeee007e,
-+ 0xbeef007f, 0xbefe0180,
-+ 0xbf900004, 0xbf8e0002,
-+ 0xbf88fffe, 0xbef4007e,
-+ 0x8675ff7f, 0x0000ffff,
-+ 0x8775ff75, 0x00040000,
-+ 0xbef60080, 0xbef700ff,
-+ 0x00807fac, 0x8676ff7f,
-+ 0x08000000, 0x8f768376,
-+ 0x87777677, 0x8676ff7f,
-+ 0x70000000, 0x8f768176,
-+ 0x87777677, 0xbefb007c,
-+ 0xbefa0080, 0xb8fa2a05,
-+ 0x807a817a, 0x8e7a8a7a,
-+ 0xb8f61605, 0x80768176,
-+ 0x8e768676, 0x807a767a,
-+ 0xbef60084, 0xbef600ff,
-+ 0x01000000, 0xbefe007c,
-+ 0xbefc007a, 0xc0611efa,
-+ 0x0000007c, 0x807a847a,
-+ 0xbefc007e, 0xbefe007c,
-+ 0xbefc007a, 0xc0611b3a,
-+ 0x0000007c, 0x807a847a,
-+ 0xbefc007e, 0xbefe007c,
-+ 0xbefc007a, 0xc0611b7a,
- 0x0000007c, 0x807a847a,
- 0xbefc007e, 0xbefe007c,
-- 0xbefc007a, 0xc0611cba,
-+ 0xbefc007a, 0xc0611bba,
- 0x0000007c, 0x807a847a,
- 0xbefc007e, 0xbefe007c,
-- 0xbefc007a, 0xc0611cfa,
-+ 0xbefc007a, 0xc0611bfa,
- 0x0000007c, 0x807a847a,
-- 0xbefc007e, 0xb8fbf801,
-+ 0xbefc007e, 0xbefe007c,
-+ 0xbefc007a, 0xc0611c3a,
-+ 0x0000007c, 0x807a847a,
-+ 0xbefc007e, 0xb8f1f803,
- 0xbefe007c, 0xbefc007a,
-- 0xc0611efa, 0x0000007c,
-+ 0xc0611c7a, 0x0000007c,
- 0x807a847a, 0xbefc007e,
- 0xbefe007c, 0xbefc007a,
-- 0xc0611e3a, 0x0000007c,
-+ 0xc0611cba, 0x0000007c,
- 0x807a847a, 0xbefc007e,
- 0xbefe007c, 0xbefc007a,
-- 0xc0611e7a, 0x0000007c,
-+ 0xc0611cfa, 0x0000007c,
- 0x807a847a, 0xbefc007e,
-- 0x8676ff7f, 0x04000000,
-- 0xbeef0080, 0x876f6f76,
-- 0xb8fa2a05, 0x807a817a,
-- 0x8e7a8a7a, 0xb8f11605,
-- 0x80718171, 0x8e718471,
-- 0x8e768271, 0xbef600ff,
-- 0x01000000, 0xbef20174,
-- 0x80747a74, 0xbefc0080,
-+ 0xb8fbf801, 0xbefe007c,
-+ 0xbefc007a, 0xc0611efa,
-+ 0x0000007c, 0x807a847a,
-+ 0xbefc007e, 0x8676ff7f,
-+ 0x04000000, 0xbeef0080,
-+ 0x876f6f76, 0xb8fa2a05,
-+ 0x807a817a, 0x8e7a8a7a,
-+ 0xb8f11605, 0x80718171,
-+ 0x8e718471, 0x8e768271,
-+ 0xbef600ff, 0x01000000,
-+ 0xbef20174, 0x80747a74,
-+ 0x82758075, 0xbefc0080,
- 0xbf800000, 0xbe802b00,
- 0xbe822b02, 0xbe842b04,
- 0xbe862b06, 0xbe882b08,
-@@ -1300,7 +1285,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0x7a1d0300, 0x807c847c,
- 0x807aff7a, 0x00000400,
- 0xbf0a717c, 0xbf85ffef,
-- 0xbf9c0000, 0xbf8200ca,
-+ 0xbf9c0000, 0xbf8200c5,
- 0xbef4007e, 0x8675ff7f,
- 0x0000ffff, 0x8775ff75,
- 0x00040000, 0xbef60080,
-@@ -1314,93 +1299,90 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
- 0xbeff00c1, 0xb8ef4306,
- 0x866fc16f, 0xbf840019,
- 0x8e6f866f, 0x8e6f826f,
-- 0xbef6006f, 0xb8ee2a05,
-- 0x806e816e, 0x8e6e8a6e,
-+ 0xbef6006f, 0xb8f82a05,
-+ 0x80788178, 0x8e788a78,
- 0xb8f21605, 0x80728172,
-- 0x8e728672, 0x806e726e,
-- 0x806eff6e, 0x00000080,
-+ 0x8e728672, 0x80787278,
-+ 0x8078ff78, 0x00000080,
- 0xbef600ff, 0x01000000,
- 0xbefc0080, 0xe0510000,
-- 0x6e1d0000, 0xe0510100,
-- 0x6e1d0000, 0x807cff7c,
-- 0x00000200, 0x806eff6e,
-+ 0x781d0000, 0xe0510100,
-+ 0x781d0000, 0x807cff7c,
-+ 0x00000200, 0x8078ff78,
- 0x00000200, 0xbf0a6f7c,
-- 0xbf85fff6, 0xbeee0080,
-+ 0xbf85fff6, 0xbef80080,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8ef2a05, 0x806f816f,
- 0x8e6f826f, 0x8e76886f,
- 0xbef600ff, 0x01000000,
-- 0xbef2006e, 0x806eff6e,
-+ 0xbef20078, 0x8078ff78,
- 0x00000400, 0xbefc0084,
- 0xbf11087c, 0x806fff6f,
- 0x00008000, 0xe0524000,
-- 0x6e1d0000, 0xe0524100,
-- 0x6e1d0100, 0xe0524200,
-- 0x6e1d0200, 0xe0524300,
-- 0x6e1d0300, 0xbf8c0f70,
-+ 0x781d0000, 0xe0524100,
-+ 0x781d0100, 0xe0524200,
-+ 0x781d0200, 0xe0524300,
-+ 0x781d0300, 0xbf8c0f70,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
-- 0x807c847c, 0x806eff6e,
-+ 0x807c847c, 0x8078ff78,
- 0x00000400, 0xbf0a6f7c,
- 0xbf85ffee, 0xbf9c0000,
- 0xe0524000, 0x721d0000,
- 0xe0524100, 0x721d0100,
- 0xe0524200, 0x721d0200,
- 0xe0524300, 0x721d0300,
-- 0xb8ee2a05, 0x806e816e,
-- 0x8e6e8a6e, 0xb8f21605,
-+ 0xb8f82a05, 0x80788178,
-+ 0x8e788a78, 0xb8f21605,
- 0x80728172, 0x8e728672,
-- 0x806e726e, 0x80eec06e,
-+ 0x80787278, 0x80f8c078,
- 0xb8ef1605, 0x806f816f,
- 0x8e6f846f, 0x8e76826f,
- 0xbef600ff, 0x01000000,
- 0xbefc006f, 0xc031003a,
-- 0x0000006e, 0x80eec06e,
-+ 0x00000078, 0x80f8c078,
- 0xbf8cc07f, 0x80fc907c,
-- 0xbe802d00, 0xbe822d02,
-- 0xbe842d04, 0xbe862d06,
-- 0xbe882d08, 0xbe8a2d0a,
-- 0xbe8c2d0c, 0xbe8e2d0e,
-- 0xbf06807c, 0xbf84fff1,
-- 0xb8ee2a05, 0x806e816e,
-- 0x8e6e8a6e, 0xb8f21605,
-- 0x80728172, 0x8e728672,
-- 0x806e726e, 0xbef60084,
-- 0xbef600ff, 0x01000000,
-- 0xc0211bfa, 0x0000006e,
-- 0x806e846e, 0xc0211b3a,
-- 0x0000006e, 0x806e846e,
-- 0xc0211b7a, 0x0000006e,
-- 0x806e846e, 0xc0211eba,
-- 0x0000006e, 0x806e846e,
-- 0xc0211efa, 0x0000006e,
-- 0x806e846e, 0xc0211c3a,
-- 0x0000006e, 0x806e846e,
-- 0xc0211c7a, 0x0000006e,
-- 0x806e846e, 0xc0211a3a,
-- 0x0000006e, 0x806e846e,
-- 0xc0211a7a, 0x0000006e,
-- 0x806e846e, 0xc0211cfa,
-- 0x0000006e, 0x806e846e,
-- 0xc0211e3a, 0x0000006e,
-- 0x806e846e, 0xc0211e7a,
-- 0x0000006e, 0x806e846e,
-- 0xbf8cc07f, 0x866dff6d,
-- 0x0000ffff, 0xbefc006f,
-- 0xbefe007a, 0xbeff007b,
-- 0x866f71ff, 0x000003ff,
-- 0xb96f4803, 0x866f71ff,
-- 0xfffff800, 0x8f6f8b6f,
-- 0xb96fa2c3, 0xb973f801,
-- 0x866fff6d, 0xf0000000,
-- 0x8f6f9c6f, 0x8e6f906f,
-- 0xbef20080, 0x87726f72,
-- 0x866fff6d, 0x08000000,
-- 0x8f6f9b6f, 0x8e6f8f6f,
-- 0x87726f72, 0x866fff70,
-- 0x00800000, 0x8f6f976f,
-- 0xb972f807, 0x86fe7e7e,
-- 0x86ea6a6a, 0xb970f802,
-- 0xbf8a0000, 0x95806f6c,
-- 0xbf810000, 0x00000000,
-+ 0xbf800000, 0xbe802d00,
-+ 0xbe822d02, 0xbe842d04,
-+ 0xbe862d06, 0xbe882d08,
-+ 0xbe8a2d0a, 0xbe8c2d0c,
-+ 0xbe8e2d0e, 0xbf06807c,
-+ 0xbf84fff0, 0xb8f82a05,
-+ 0x80788178, 0x8e788a78,
-+ 0xb8f21605, 0x80728172,
-+ 0x8e728672, 0x80787278,
-+ 0xbef60084, 0xbef600ff,
-+ 0x01000000, 0xc0211bfa,
-+ 0x00000078, 0x80788478,
-+ 0xc0211b3a, 0x00000078,
-+ 0x80788478, 0xc0211b7a,
-+ 0x00000078, 0x80788478,
-+ 0xc0211eba, 0x00000078,
-+ 0x80788478, 0xc0211efa,
-+ 0x00000078, 0x80788478,
-+ 0xc0211c3a, 0x00000078,
-+ 0x80788478, 0xc0211c7a,
-+ 0x00000078, 0x80788478,
-+ 0xc0211a3a, 0x00000078,
-+ 0x80788478, 0xc0211a7a,
-+ 0x00000078, 0x80788478,
-+ 0xc0211cfa, 0x00000078,
-+ 0x80788478, 0xbf8cc07f,
-+ 0x866dff6d, 0x0000ffff,
-+ 0xbefc006f, 0xbefe007a,
-+ 0xbeff007b, 0x866f71ff,
-+ 0x000003ff, 0xb96f4803,
-+ 0x866f71ff, 0xfffff800,
-+ 0x8f6f8b6f, 0xb96fa2c3,
-+ 0xb973f801, 0x866fff6d,
-+ 0xf0000000, 0x8f6f9c6f,
-+ 0x8e6f906f, 0xbef20080,
-+ 0x87726f72, 0x866fff6d,
-+ 0x08000000, 0x8f6f9b6f,
-+ 0x8e6f8f6f, 0x87726f72,
-+ 0x866fff70, 0x00800000,
-+ 0x8f6f976f, 0xb972f807,
-+ 0x86fe7e7e, 0x86ea6a6a,
-+ 0xb970f802, 0xbf8a0000,
-+ 0x95806f6c, 0xbf810000,
- };
---
-2.7.4
-