diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch | 857 |
1 files changed, 857 insertions, 0 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch new file mode 100644 index 00000000..ebec5a50 --- /dev/null +++ b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1331-drm-amdkfd-Handle-ILLEGAL_INST-in-trap-handler.patch @@ -0,0 +1,857 @@ +From 34e333be4e27b795ee1c022fc411fe82ff0cfe03 Mon Sep 17 00:00:00 2001 +From: Jay Cornwall <Jay.Cornwall@amd.com> +Date: Tue, 15 Aug 2017 14:34:10 -0500 +Subject: [PATCH 1331/4131] drm/amdkfd: Handle ILLEGAL_INST in trap handler + +Illegal instruction is a non-maskable exception. It must be handled +by the trap or it will block context save. Pending a contract with +the second-level trap handler halt the wavefront and exit the trap. + +Change-Id: I65a67ffb60ea848dfa3753e333e270adf712dc08 +Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> +--- + .../gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h | 289 +++++++++--------- + .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 330 ++++++++++----------- + 2 files changed, 301 insertions(+), 318 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h +index 48fcec5..d5d1331 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_carrizo.h +@@ -458,6 +458,7 @@ end + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset ++ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + L_SAVE_SGPR_LOOP: +@@ -1131,7 +1132,7 @@ end + #endif + + static const uint32_t cwsr_trap_carrizo_hex[] = { +- 0xbf820001, 0xbf820122, ++ 0xbf820001, 0xbf820123, + 0xb8f4f802, 0x89748674, + 0xb8f5f803, 0x8675ff75, + 0x00000400, 0xbf850011, +@@ -1217,167 +1218,167 @@ static const uint32_t cwsr_trap_carrizo_hex[] = { + 0x8e758475, 0x8e7a8275, + 0xbefa00ff, 0x01000000, + 0xbef60178, 0x80786e78, +- 0xbefc0080, 0xbe802b00, +- 0xbe822b02, 0xbe842b04, +- 0xbe862b06, 0xbe882b08, +- 0xbe8a2b0a, 0xbe8c2b0c, +- 0xbe8e2b0e, 0xc06b003c, +- 0x00000000, 0xc06b013c, +- 0x00000010, 0xc06b023c, +- 0x00000020, 0xc06b033c, +- 0x00000030, 0x8078c078, +- 0x82798079, 0x807c907c, +- 0xbf0a757c, 0xbf85ffeb, +- 0xbef80176, 0xbeee0080, ++ 0x82798079, 0xbefc0080, ++ 0xbe802b00, 0xbe822b02, ++ 0xbe842b04, 0xbe862b06, ++ 0xbe882b08, 0xbe8a2b0a, ++ 0xbe8c2b0c, 0xbe8e2b0e, ++ 0xc06b003c, 0x00000000, ++ 0xc06b013c, 0x00000010, ++ 0xc06b023c, 0x00000020, ++ 0xc06b033c, 0x00000030, ++ 0x8078c078, 0x82798079, ++ 0x807c907c, 0xbf0a757c, ++ 0xbf85ffeb, 0xbef80176, ++ 0xbeee0080, 0xbefe00c1, ++ 0xbeff00c1, 0xbefa00ff, ++ 0x01000000, 0xe0724000, ++ 0x6e1e0000, 0xe0724100, ++ 0x6e1e0100, 0xe0724200, ++ 0x6e1e0200, 0xe0724300, ++ 0x6e1e0300, 0xbefe00c1, ++ 0xbeff00c1, 0xb8f54306, ++ 0x8675c175, 0xbf84002c, ++ 0xbf8a0000, 0x867aff73, ++ 0x04000000, 0xbf840028, ++ 0x8e758675, 0x8e758275, ++ 0xbefa0075, 0xb8ee2a05, ++ 0x806e816e, 0x8e6e8a6e, ++ 0xb8fa1605, 0x807a817a, ++ 0x8e7a867a, 0x806e7a6e, ++ 0x806eff6e, 0x00000080, ++ 0xbefa00ff, 0x01000000, ++ 0xbefc0080, 0xd28c0002, ++ 0x000100c1, 0xd28d0003, ++ 0x000204c1, 0xd1060002, ++ 0x00011103, 0x7e0602ff, ++ 0x00000200, 0xbefc00ff, ++ 0x00010000, 0xbe80007b, ++ 0x867bff7b, 0xff7fffff, ++ 0x877bff7b, 0x00058000, ++ 0xd8ec0000, 0x00000002, ++ 0xbf8c007f, 0xe0765000, ++ 0x6e1e0002, 0x32040702, ++ 0xd0c9006a, 0x0000eb02, ++ 0xbf87fff7, 0xbefb0000, ++ 0xbeee00ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, ++ 0xb8f52a05, 0x80758175, ++ 0x8e758275, 0x8e7a8875, + 0xbefa00ff, 0x01000000, ++ 0xbefc0084, 0xbf0a757c, ++ 0xbf840015, 0xbf11017c, ++ 0x8075ff75, 0x00001000, ++ 0x7e000300, 0x7e020301, ++ 0x7e040302, 0x7e060303, + 0xe0724000, 0x6e1e0000, + 0xe0724100, 0x6e1e0100, + 0xe0724200, 0x6e1e0200, + 0xe0724300, 0x6e1e0300, ++ 0x807c847c, 0x806eff6e, ++ 0x00000400, 0xbf0a757c, ++ 0xbf85ffef, 0xbf9c0000, ++ 0xbf8200ca, 0xbef8007e, ++ 0x8679ff7f, 0x0000ffff, ++ 0x8779ff79, 0x00040000, ++ 0xbefa0080, 0xbefb00ff, ++ 0x00807fac, 0x8676ff7f, ++ 0x08000000, 0x8f768376, ++ 0x877b767b, 0x8676ff7f, ++ 0x70000000, 0x8f768176, ++ 0x877b767b, 0x8676ff7f, ++ 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, +- 0xb8f54306, 0x8675c175, +- 0xbf84002c, 0xbf8a0000, +- 0x867aff73, 0x04000000, +- 0xbf840028, 0x8e758675, +- 0x8e758275, 0xbefa0075, +- 0xb8ee2a05, 0x806e816e, +- 0x8e6e8a6e, 0xb8fa1605, +- 0x807a817a, 0x8e7a867a, +- 0x806e7a6e, 0x806eff6e, ++ 0xb8f34306, 0x8673c173, ++ 0xbf840019, 0x8e738673, ++ 0x8e738273, 0xbefa0073, ++ 0xb8f22a05, 0x80728172, ++ 0x8e728a72, 0xb8f61605, ++ 0x80768176, 0x8e768676, ++ 0x80727672, 0x8072ff72, + 0x00000080, 0xbefa00ff, + 0x01000000, 0xbefc0080, +- 0xd28c0002, 0x000100c1, +- 0xd28d0003, 0x000204c1, +- 0xd1060002, 0x00011103, +- 0x7e0602ff, 0x00000200, +- 0xbefc00ff, 0x00010000, +- 0xbe80007b, 0x867bff7b, +- 0xff7fffff, 0x877bff7b, +- 0x00058000, 0xd8ec0000, +- 0x00000002, 0xbf8c007f, +- 0xe0765000, 0x6e1e0002, +- 0x32040702, 0xd0c9006a, +- 0x0000eb02, 0xbf87fff7, +- 0xbefb0000, 0xbeee00ff, +- 0x00000400, 0xbefe00c1, +- 0xbeff00c1, 0xb8f52a05, +- 0x80758175, 0x8e758275, +- 0x8e7a8875, 0xbefa00ff, +- 0x01000000, 0xbefc0084, +- 0xbf0a757c, 0xbf840015, +- 0xbf11017c, 0x8075ff75, +- 0x00001000, 0x7e000300, ++ 0xe0510000, 0x721e0000, ++ 0xe0510100, 0x721e0000, ++ 0x807cff7c, 0x00000200, ++ 0x8072ff72, 0x00000200, ++ 0xbf0a737c, 0xbf85fff6, ++ 0xbef20080, 0xbefe00c1, ++ 0xbeff00c1, 0xb8f32a05, ++ 0x80738173, 0x8e738273, ++ 0x8e7a8873, 0xbefa00ff, ++ 0x01000000, 0xbef60072, ++ 0x8072ff72, 0x00000400, ++ 0xbefc0084, 0xbf11087c, ++ 0x8073ff73, 0x00008000, ++ 0xe0524000, 0x721e0000, ++ 0xe0524100, 0x721e0100, ++ 0xe0524200, 0x721e0200, ++ 0xe0524300, 0x721e0300, ++ 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, +- 0x7e060303, 0xe0724000, +- 0x6e1e0000, 0xe0724100, +- 0x6e1e0100, 0xe0724200, +- 0x6e1e0200, 0xe0724300, +- 0x6e1e0300, 0x807c847c, +- 0x806eff6e, 0x00000400, +- 0xbf0a757c, 0xbf85ffef, +- 0xbf9c0000, 0xbf8200ca, +- 0xbef8007e, 0x8679ff7f, +- 0x0000ffff, 0x8779ff79, +- 0x00040000, 0xbefa0080, +- 0xbefb00ff, 0x00807fac, +- 0x8676ff7f, 0x08000000, +- 0x8f768376, 0x877b767b, +- 0x8676ff7f, 0x70000000, +- 0x8f768176, 0x877b767b, +- 0x8676ff7f, 0x04000000, +- 0xbf84001e, 0xbefe00c1, +- 0xbeff00c1, 0xb8f34306, +- 0x8673c173, 0xbf840019, +- 0x8e738673, 0x8e738273, +- 0xbefa0073, 0xb8f22a05, ++ 0x7e060303, 0x807c847c, ++ 0x8072ff72, 0x00000400, ++ 0xbf0a737c, 0xbf85ffee, ++ 0xbf9c0000, 0xe0524000, ++ 0x761e0000, 0xe0524100, ++ 0x761e0100, 0xe0524200, ++ 0x761e0200, 0xe0524300, ++ 0x761e0300, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, +- 0x8072ff72, 0x00000080, +- 0xbefa00ff, 0x01000000, +- 0xbefc0080, 0xe0510000, +- 0x721e0000, 0xe0510100, +- 0x721e0000, 0x807cff7c, +- 0x00000200, 0x8072ff72, +- 0x00000200, 0xbf0a737c, +- 0xbf85fff6, 0xbef20080, +- 0xbefe00c1, 0xbeff00c1, +- 0xb8f32a05, 0x80738173, +- 0x8e738273, 0x8e7a8873, +- 0xbefa00ff, 0x01000000, +- 0xbef60072, 0x8072ff72, +- 0x00000400, 0xbefc0084, +- 0xbf11087c, 0x8073ff73, +- 0x00008000, 0xe0524000, +- 0x721e0000, 0xe0524100, +- 0x721e0100, 0xe0524200, +- 0x721e0200, 0xe0524300, +- 0x721e0300, 0xbf8c0f70, +- 0x7e000300, 0x7e020301, +- 0x7e040302, 0x7e060303, +- 0x807c847c, 0x8072ff72, +- 0x00000400, 0xbf0a737c, +- 0xbf85ffee, 0xbf9c0000, +- 0xe0524000, 0x761e0000, +- 0xe0524100, 0x761e0100, +- 0xe0524200, 0x761e0200, +- 0xe0524300, 0x761e0300, +- 0xb8f22a05, 0x80728172, +- 0x8e728a72, 0xb8f61605, +- 0x80768176, 0x8e768676, +- 0x80727672, 0x80f2c072, +- 0xb8f31605, 0x80738173, +- 0x8e738473, 0x8e7a8273, +- 0xbefa00ff, 0x01000000, +- 0xbefc0073, 0xc031003c, +- 0x00000072, 0x80f2c072, +- 0xbf8c007f, 0x80fc907c, +- 0xbe802d00, 0xbe822d02, +- 0xbe842d04, 0xbe862d06, +- 0xbe882d08, 0xbe8a2d0a, +- 0xbe8c2d0c, 0xbe8e2d0e, +- 0xbf06807c, 0xbf84fff1, +- 0xb8f22a05, 0x80728172, +- 0x8e728a72, 0xb8f61605, +- 0x80768176, 0x8e768676, +- 0x80727672, 0xbefa0084, +- 0xbefa00ff, 0x01000000, +- 0xc0211cfc, 0x00000072, +- 0x80728472, 0xc0211c3c, ++ 0x80f2c072, 0xb8f31605, ++ 0x80738173, 0x8e738473, ++ 0x8e7a8273, 0xbefa00ff, ++ 0x01000000, 0xbefc0073, ++ 0xc031003c, 0x00000072, ++ 0x80f2c072, 0xbf8c007f, ++ 0x80fc907c, 0xbe802d00, ++ 0xbe822d02, 0xbe842d04, ++ 0xbe862d06, 0xbe882d08, ++ 0xbe8a2d0a, 0xbe8c2d0c, ++ 0xbe8e2d0e, 0xbf06807c, ++ 0xbf84fff1, 0xb8f22a05, ++ 0x80728172, 0x8e728a72, ++ 0xb8f61605, 0x80768176, ++ 0x8e768676, 0x80727672, ++ 0xbefa0084, 0xbefa00ff, ++ 0x01000000, 0xc0211cfc, + 0x00000072, 0x80728472, +- 0xc0211c7c, 0x00000072, +- 0x80728472, 0xc0211bbc, ++ 0xc0211c3c, 0x00000072, ++ 0x80728472, 0xc0211c7c, + 0x00000072, 0x80728472, +- 0xc0211bfc, 0x00000072, +- 0x80728472, 0xc0211d3c, ++ 0xc0211bbc, 0x00000072, ++ 0x80728472, 0xc0211bfc, + 0x00000072, 0x80728472, +- 0xc0211d7c, 0x00000072, +- 0x80728472, 0xc0211a3c, ++ 0xc0211d3c, 0x00000072, ++ 0x80728472, 0xc0211d7c, + 0x00000072, 0x80728472, +- 0xc0211a7c, 0x00000072, +- 0x80728472, 0xc0211dfc, ++ 0xc0211a3c, 0x00000072, ++ 0x80728472, 0xc0211a7c, + 0x00000072, 0x80728472, +- 0xc0211b3c, 0x00000072, +- 0x80728472, 0xc0211b7c, ++ 0xc0211dfc, 0x00000072, ++ 0x80728472, 0xc0211b3c, + 0x00000072, 0x80728472, +- 0xbf8c007f, 0x8671ff71, +- 0x0000ffff, 0xbefc0073, +- 0xbefe006e, 0xbeff006f, +- 0x867375ff, 0x000003ff, +- 0xb9734803, 0x867375ff, +- 0xfffff800, 0x8f738b73, +- 0xb973a2c3, 0xb977f801, +- 0x8673ff71, 0xf0000000, +- 0x8f739c73, 0x8e739073, +- 0xbef60080, 0x87767376, +- 0x8673ff71, 0x08000000, +- 0x8f739b73, 0x8e738f73, +- 0x87767376, 0x8673ff74, +- 0x00800000, 0x8f739773, +- 0xb976f807, 0x86fe7e7e, +- 0x86ea6a6a, 0xb974f802, +- 0xbf8a0000, 0x95807370, +- 0xbf810000, 0x00000000, ++ 0xc0211b7c, 0x00000072, ++ 0x80728472, 0xbf8c007f, ++ 0x8671ff71, 0x0000ffff, ++ 0xbefc0073, 0xbefe006e, ++ 0xbeff006f, 0x867375ff, ++ 0x000003ff, 0xb9734803, ++ 0x867375ff, 0xfffff800, ++ 0x8f738b73, 0xb973a2c3, ++ 0xb977f801, 0x8673ff71, ++ 0xf0000000, 0x8f739c73, ++ 0x8e739073, 0xbef60080, ++ 0x87767376, 0x8673ff71, ++ 0x08000000, 0x8f739b73, ++ 0x8e738f73, 0x87767376, ++ 0x8673ff74, 0x00800000, ++ 0x8f739773, 0xb976f807, ++ 0x86fe7e7e, 0x86ea6a6a, ++ 0xb974f802, 0xbf8a0000, ++ 0x95807370, 0xbf810000, + }; + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +index f11de028..ae2af3d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +@@ -80,8 +80,6 @@ var EMU_RUN_HACK_RESTORE_NORMAL = 0 + var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 + var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 + var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +-var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +-var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK + var SAVE_LDS = 1 + var WG_BASE_ADDR_LO = 0x9000a000 + var WG_BASE_ADDR_HI = 0x0 +@@ -119,7 +117,7 @@ var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 + var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 + var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 + var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 +-var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 ++var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + + var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME + var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +@@ -148,12 +146,6 @@ var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + var s_save_spi_init_lo = exec_lo + var s_save_spi_init_hi = exec_hi + +- //tba_lo and tba_hi need to be saved/restored +-var tba_lo = ttmp12 +-var tba_hi = ttmp13 +-var tma_lo = ttmp14 +-var tma_hi = ttmp15 +- + var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} + var s_save_pc_hi = ttmp1 + var s_save_exec_lo = ttmp2 +@@ -167,10 +159,10 @@ var s_save_buf_rsrc1 = ttmp9 + var s_save_buf_rsrc2 = ttmp10 + var s_save_buf_rsrc3 = ttmp11 + +-var s_save_mem_offset = tma_lo ++var s_save_mem_offset = ttmp14 + var s_save_alloc_size = s_save_trapsts //conflict + var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) +-var s_save_m0 = tma_hi ++var s_save_m0 = ttmp15 + + /* Restore */ + var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +@@ -191,9 +183,9 @@ var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + var s_restore_spi_init_lo = exec_lo + var s_restore_spi_init_hi = exec_hi + +-var s_restore_mem_offset = ttmp2 ++var s_restore_mem_offset = ttmp12 + var s_restore_alloc_size = ttmp3 +-var s_restore_tmp = ttmp6 //tba_lo/hi need to be restored ++var s_restore_tmp = ttmp6 + var s_restore_mem_offset_save = s_restore_tmp //no conflict + + var s_restore_m0 = s_restore_alloc_size //no conflict +@@ -202,8 +194,8 @@ var s_restore_mode = ttmp7 + + var s_restore_pc_lo = ttmp0 + var s_restore_pc_hi = ttmp1 +-var s_restore_exec_lo = tma_lo //no conflict +-var s_restore_exec_hi = tma_hi //no conflict ++var s_restore_exec_lo = ttmp14 ++var s_restore_exec_hi = ttmp15 + var s_restore_status = ttmp4 + var s_restore_trapsts = ttmp5 + var s_restore_xnack_mask_lo = xnack_mask_lo +@@ -247,19 +239,26 @@ L_SKIP_RESTORE: + + // ********* Handle non-CWSR traps ******************* + if (!EMU_RUN_HACK) ++ // Illegal instruction is a non-maskable exception which blocks context save. ++ // Halt the wavefront and return from the trap. ++ s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK ++ s_cbranch_scc1 L_HALT_WAVE ++ + // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. + // Instead, halt the wavefront and return from the trap. + s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_NO_MEM_VIOL ++ ++L_HALT_WAVE: + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_branch L_EXCP_CASE + + L_NO_MEM_VIOL: + /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ +- s_getreg_b32 tma_lo,hwreg(HW_REG_SQ_SHADER_TMA_LO) +- s_getreg_b32 tma_hi,hwreg(HW_REG_SQ_SHADER_TMA_HI) +- s_lshl_b64 [tma_lo, tma_hi], [tma_lo, tma_hi], 0x8 +- s_load_dwordx4 [ttmp8,ttmp9, ttmp10, ttmp11], [tma_lo,tma_hi], 0 ++ s_getreg_b32 ttmp14,hwreg(HW_REG_SQ_SHADER_TMA_LO) ++ s_getreg_b32 ttmp15,hwreg(HW_REG_SQ_SHADER_TMA_HI) ++ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 ++ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [ttmp14, ttmp15], 0 + s_waitcnt lgkmcnt(0) + s_or_b32 ttmp7, ttmp8, ttmp9 + s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set +@@ -412,8 +411,6 @@ end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over +- s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO +- s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI + end + + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC +@@ -432,15 +429,11 @@ end + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) +- write_hwreg_to_mem(tba_lo, s_save_buf_rsrc0, s_save_mem_offset) //TBA_LO +- write_hwreg_to_mem(tba_hi, s_save_buf_rsrc0, s_save_mem_offset) //TBA_HI + + + + /* the first wave in the threadgroup */ +- // save fist_wave bits in tba_hi unused bit.26 + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit +- //s_or_b32 tba_hi, s_save_tmp, tba_hi // save first wave bit to tba_hi.bits[26] + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + +@@ -474,6 +467,7 @@ end + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset ++ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states +@@ -548,7 +542,6 @@ end + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG +- //s_and_b32 s_save_tmp, tba_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_cbranch_scc0 L_SAVE_LDS_DONE + +@@ -963,9 +956,6 @@ end + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + +- /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111), +- However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG +- */ + s_mov_b32 m0, s_restore_alloc_size + + L_RESTORE_SGPR_LOOP: +@@ -973,6 +963,7 @@ end + s_waitcnt lgkmcnt(0) //ensure data ready + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] ++ s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 +@@ -1019,8 +1010,6 @@ end + read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO + read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE +- read_hwreg_from_mem(tba_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_LO +- read_hwreg_from_mem(tba_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //TBA_HI + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + +@@ -1150,97 +1139,93 @@ end + #endif + + static const uint32_t cwsr_trap_gfx9_hex[] = { +- 0xbf820001, 0xbf82012c, ++ 0xbf820001, 0xbf820124, + 0xb8f0f802, 0x89708670, + 0xb8f1f803, 0x8674ff71, +- 0x00000400, 0xbf85001a, +- 0x8674ff71, 0x00000100, +- 0xbf840003, 0x8770ff70, +- 0x00002000, 0xbf820010, +- 0xb8faf812, 0xb8fbf813, +- 0x8efa887a, 0xc00a1d3d, +- 0x00000000, 0xbf8cc07f, +- 0x87737574, 0xbf840002, +- 0xb970f802, 0xbe801d74, +- 0xb8f1f803, 0x8671ff71, +- 0x000001ff, 0xbf850002, +- 0x806c846c, 0x826d806d, +- 0x866dff6d, 0x0000ffff, +- 0xb970f802, 0xbe801f6c, +- 0xb8f1f803, 0x8671ff71, +- 0x00000100, 0xbf840006, +- 0xbef60080, 0xb9760203, +- 0x866dff6d, 0x0000ffff, +- 0x80ec886c, 0x82ed806d, +- 0xbef60080, 0xb9760283, +- 0xbef20068, 0xbef30069, +- 0xb8f62407, 0x8e769c76, +- 0x876d766d, 0xb8f603c7, +- 0x8e769b76, 0x876d766d, +- 0xb8f6f807, 0x8676ff76, +- 0x00007fff, 0xb976f807, +- 0xbeee007e, 0xbeef007f, +- 0xbefe0180, 0xbf900004, +- 0xbf8e0002, 0xbf88fffe, +- 0xbef4007e, 0x8675ff7f, +- 0x0000ffff, 0x8775ff75, +- 0x00040000, 0xbef60080, +- 0xbef700ff, 0x00807fac, +- 0x8676ff7f, 0x08000000, +- 0x8f768376, 0x87777677, +- 0x8676ff7f, 0x70000000, +- 0x8f768176, 0x87777677, +- 0xbefb007c, 0xbefa0080, +- 0xb8fa2a05, 0x807a817a, +- 0x8e7a8a7a, 0xb8f61605, +- 0x80768176, 0x8e768676, +- 0x807a767a, 0xbef60084, +- 0xbef600ff, 0x01000000, +- 0xbefe007c, 0xbefc007a, +- 0xc0611efa, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611b3a, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611b7a, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611bba, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611bfa, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611c3a, 0x0000007c, +- 0x807a847a, 0xbefc007e, +- 0xb8f1f803, 0xbefe007c, +- 0xbefc007a, 0xc0611c7a, ++ 0x00000400, 0xbf85001d, ++ 0x8674ff71, 0x00000800, ++ 0xbf850003, 0x8674ff71, ++ 0x00000100, 0xbf840003, ++ 0x8770ff70, 0x00002000, ++ 0xbf820010, 0xb8faf812, ++ 0xb8fbf813, 0x8efa887a, ++ 0xc00a1d3d, 0x00000000, ++ 0xbf8cc07f, 0x87737574, ++ 0xbf840002, 0xb970f802, ++ 0xbe801d74, 0xb8f1f803, ++ 0x8671ff71, 0x000001ff, ++ 0xbf850002, 0x806c846c, ++ 0x826d806d, 0x866dff6d, ++ 0x0000ffff, 0xb970f802, ++ 0xbe801f6c, 0xb8f1f803, ++ 0x8671ff71, 0x00000100, ++ 0xbf840006, 0xbef60080, ++ 0xb9760203, 0x866dff6d, ++ 0x0000ffff, 0x80ec886c, ++ 0x82ed806d, 0xbef60080, ++ 0xb9760283, 0xbef20068, ++ 0xbef30069, 0xb8f62407, ++ 0x8e769c76, 0x876d766d, ++ 0xb8f603c7, 0x8e769b76, ++ 0x876d766d, 0xb8f6f807, ++ 0x8676ff76, 0x00007fff, ++ 0xb976f807, 0xbeee007e, ++ 0xbeef007f, 0xbefe0180, ++ 0xbf900004, 0xbf8e0002, ++ 0xbf88fffe, 0xbef4007e, ++ 0x8675ff7f, 0x0000ffff, ++ 0x8775ff75, 0x00040000, ++ 0xbef60080, 0xbef700ff, ++ 0x00807fac, 0x8676ff7f, ++ 0x08000000, 0x8f768376, ++ 0x87777677, 0x8676ff7f, ++ 0x70000000, 0x8f768176, ++ 0x87777677, 0xbefb007c, ++ 0xbefa0080, 0xb8fa2a05, ++ 0x807a817a, 0x8e7a8a7a, ++ 0xb8f61605, 0x80768176, ++ 0x8e768676, 0x807a767a, ++ 0xbef60084, 0xbef600ff, ++ 0x01000000, 0xbefe007c, ++ 0xbefc007a, 0xc0611efa, ++ 0x0000007c, 0x807a847a, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc007a, 0xc0611b3a, ++ 0x0000007c, 0x807a847a, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc007a, 0xc0611b7a, + 0x0000007c, 0x807a847a, + 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611cba, ++ 0xbefc007a, 0xc0611bba, + 0x0000007c, 0x807a847a, + 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611cfa, ++ 0xbefc007a, 0xc0611bfa, + 0x0000007c, 0x807a847a, +- 0xbefc007e, 0xb8fbf801, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc007a, 0xc0611c3a, ++ 0x0000007c, 0x807a847a, ++ 0xbefc007e, 0xb8f1f803, + 0xbefe007c, 0xbefc007a, +- 0xc0611efa, 0x0000007c, ++ 0xc0611c7a, 0x0000007c, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, +- 0xc0611e3a, 0x0000007c, ++ 0xc0611cba, 0x0000007c, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, +- 0xc0611e7a, 0x0000007c, ++ 0xc0611cfa, 0x0000007c, + 0x807a847a, 0xbefc007e, +- 0x8676ff7f, 0x04000000, +- 0xbeef0080, 0x876f6f76, +- 0xb8fa2a05, 0x807a817a, +- 0x8e7a8a7a, 0xb8f11605, +- 0x80718171, 0x8e718471, +- 0x8e768271, 0xbef600ff, +- 0x01000000, 0xbef20174, +- 0x80747a74, 0xbefc0080, ++ 0xb8fbf801, 0xbefe007c, ++ 0xbefc007a, 0xc0611efa, ++ 0x0000007c, 0x807a847a, ++ 0xbefc007e, 0x8676ff7f, ++ 0x04000000, 0xbeef0080, ++ 0x876f6f76, 0xb8fa2a05, ++ 0x807a817a, 0x8e7a8a7a, ++ 0xb8f11605, 0x80718171, ++ 0x8e718471, 0x8e768271, ++ 0xbef600ff, 0x01000000, ++ 0xbef20174, 0x80747a74, ++ 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, +@@ -1300,7 +1285,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0x7a1d0300, 0x807c847c, + 0x807aff7a, 0x00000400, + 0xbf0a717c, 0xbf85ffef, +- 0xbf9c0000, 0xbf8200ca, ++ 0xbf9c0000, 0xbf8200c5, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, +@@ -1314,93 +1299,90 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, +- 0xbef6006f, 0xb8ee2a05, +- 0x806e816e, 0x8e6e8a6e, ++ 0xbef6006f, 0xb8f82a05, ++ 0x80788178, 0x8e788a78, + 0xb8f21605, 0x80728172, +- 0x8e728672, 0x806e726e, +- 0x806eff6e, 0x00000080, ++ 0x8e728672, 0x80787278, ++ 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, +- 0x6e1d0000, 0xe0510100, +- 0x6e1d0000, 0x807cff7c, +- 0x00000200, 0x806eff6e, ++ 0x781d0000, 0xe0510100, ++ 0x781d0000, 0x807cff7c, ++ 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, +- 0xbf85fff6, 0xbeee0080, ++ 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, +- 0xbef2006e, 0x806eff6e, ++ 0xbef20078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, +- 0x6e1d0000, 0xe0524100, +- 0x6e1d0100, 0xe0524200, +- 0x6e1d0200, 0xe0524300, +- 0x6e1d0300, 0xbf8c0f70, ++ 0x781d0000, 0xe0524100, ++ 0x781d0100, 0xe0524200, ++ 0x781d0200, 0xe0524300, ++ 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, +- 0x807c847c, 0x806eff6e, ++ 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x721d0000, + 0xe0524100, 0x721d0100, + 0xe0524200, 0x721d0200, + 0xe0524300, 0x721d0300, +- 0xb8ee2a05, 0x806e816e, +- 0x8e6e8a6e, 0xb8f21605, ++ 0xb8f82a05, 0x80788178, ++ 0x8e788a78, 0xb8f21605, + 0x80728172, 0x8e728672, +- 0x806e726e, 0x80eec06e, ++ 0x80787278, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, +- 0x0000006e, 0x80eec06e, ++ 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, +- 0xbe802d00, 0xbe822d02, +- 0xbe842d04, 0xbe862d06, +- 0xbe882d08, 0xbe8a2d0a, +- 0xbe8c2d0c, 0xbe8e2d0e, +- 0xbf06807c, 0xbf84fff1, +- 0xb8ee2a05, 0x806e816e, +- 0x8e6e8a6e, 0xb8f21605, +- 0x80728172, 0x8e728672, +- 0x806e726e, 0xbef60084, +- 0xbef600ff, 0x01000000, +- 0xc0211bfa, 0x0000006e, +- 0x806e846e, 0xc0211b3a, +- 0x0000006e, 0x806e846e, +- 0xc0211b7a, 0x0000006e, +- 0x806e846e, 0xc0211eba, +- 0x0000006e, 0x806e846e, +- 0xc0211efa, 0x0000006e, +- 0x806e846e, 0xc0211c3a, +- 0x0000006e, 0x806e846e, +- 0xc0211c7a, 0x0000006e, +- 0x806e846e, 0xc0211a3a, +- 0x0000006e, 0x806e846e, +- 0xc0211a7a, 0x0000006e, +- 0x806e846e, 0xc0211cfa, +- 0x0000006e, 0x806e846e, +- 0xc0211e3a, 0x0000006e, +- 0x806e846e, 0xc0211e7a, +- 0x0000006e, 0x806e846e, +- 0xbf8cc07f, 0x866dff6d, +- 0x0000ffff, 0xbefc006f, +- 0xbefe007a, 0xbeff007b, +- 0x866f71ff, 0x000003ff, +- 0xb96f4803, 0x866f71ff, +- 0xfffff800, 0x8f6f8b6f, +- 0xb96fa2c3, 0xb973f801, +- 0x866fff6d, 0xf0000000, +- 0x8f6f9c6f, 0x8e6f906f, +- 0xbef20080, 0x87726f72, +- 0x866fff6d, 0x08000000, +- 0x8f6f9b6f, 0x8e6f8f6f, +- 0x87726f72, 0x866fff70, +- 0x00800000, 0x8f6f976f, +- 0xb972f807, 0x86fe7e7e, +- 0x86ea6a6a, 0xb970f802, +- 0xbf8a0000, 0x95806f6c, +- 0xbf810000, 0x00000000, ++ 0xbf800000, 0xbe802d00, ++ 0xbe822d02, 0xbe842d04, ++ 0xbe862d06, 0xbe882d08, ++ 0xbe8a2d0a, 0xbe8c2d0c, ++ 0xbe8e2d0e, 0xbf06807c, ++ 0xbf84fff0, 0xb8f82a05, ++ 0x80788178, 0x8e788a78, ++ 0xb8f21605, 0x80728172, ++ 0x8e728672, 0x80787278, ++ 0xbef60084, 0xbef600ff, ++ 0x01000000, 0xc0211bfa, ++ 0x00000078, 0x80788478, ++ 0xc0211b3a, 0x00000078, ++ 0x80788478, 0xc0211b7a, ++ 0x00000078, 0x80788478, ++ 0xc0211eba, 0x00000078, ++ 0x80788478, 0xc0211efa, ++ 0x00000078, 0x80788478, ++ 0xc0211c3a, 0x00000078, ++ 0x80788478, 0xc0211c7a, ++ 0x00000078, 0x80788478, ++ 0xc0211a3a, 0x00000078, ++ 0x80788478, 0xc0211a7a, ++ 0x00000078, 0x80788478, ++ 0xc0211cfa, 0x00000078, ++ 0x80788478, 0xbf8cc07f, ++ 0x866dff6d, 0x0000ffff, ++ 0xbefc006f, 0xbefe007a, ++ 0xbeff007b, 0x866f71ff, ++ 0x000003ff, 0xb96f4803, ++ 0x866f71ff, 0xfffff800, ++ 0x8f6f8b6f, 0xb96fa2c3, ++ 0xb973f801, 0x866fff6d, ++ 0xf0000000, 0x8f6f9c6f, ++ 0x8e6f906f, 0xbef20080, ++ 0x87726f72, 0x866fff6d, ++ 0x08000000, 0x8f6f9b6f, ++ 0x8e6f8f6f, 0x87726f72, ++ 0x866fff70, 0x00800000, ++ 0x8f6f976f, 0xb972f807, ++ 0x86fe7e7e, 0x86ea6a6a, ++ 0xb970f802, 0xbf8a0000, ++ 0x95806f6c, 0xbf810000, + }; +-- +2.7.4 + |