diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4167-drm-amdkfd-Implement-SPI-debug-and-exception-support.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/4167-drm-amdkfd-Implement-SPI-debug-and-exception-support.patch | 587 |
1 files changed, 587 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4167-drm-amdkfd-Implement-SPI-debug-and-exception-support.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4167-drm-amdkfd-Implement-SPI-debug-and-exception-support.patch new file mode 100644 index 00000000..96b4ea2d --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4167-drm-amdkfd-Implement-SPI-debug-and-exception-support.patch @@ -0,0 +1,587 @@ +From f1f7d96ba6f0de5623f489c3d28a22c3f95b5707 Mon Sep 17 00:00:00 2001 +From: Jay Cornwall <Jay.Cornwall@amd.com> +Date: Tue, 3 Apr 2018 18:41:50 -0500 +Subject: [PATCH 4167/5725] drm/amdkfd: Implement SPI debug and exception + support in gfx9 trap handler + +The SPI can be configured to populate trap temporary SGPRs with data +specific to individual wavefronts. These SGPRs are currently trashed +by the context save/restore handler and trap/exception handler. + +- Shuffle some ttmp register usage to preserve SPI debug data +- Save/restore SPI debug ttmps 6-11 and 13-15 in context save area +- Propagate exceptions to second-level trap handler +- Modify second-level jump protocol to preserve SPI debug ttmps +- Defer VGPR XNACK mask save until VGPR save, clear mask before using +- Save/restore scalar XNACK state + +Change-Id: I7699ea7a0e61b32c532e50c26a3e24976660960f +Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com> +--- + .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 312 +++++++++++++-------- + 1 file changed, 198 insertions(+), 114 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +index bd2957c..8ef6b44 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +@@ -122,11 +122,14 @@ var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + + var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME + var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME ++var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 + var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + + var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 + var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + ++var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data ++var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 + + /* Save */ + var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +@@ -151,7 +154,7 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3 + var s_save_pc_hi = ttmp1 + var s_save_exec_lo = ttmp2 + var s_save_exec_hi = ttmp3 +-var s_save_status = ttmp4 ++var s_save_tmp = ttmp4 + var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine + var s_save_xnack_mask_lo = ttmp6 + var s_save_xnack_mask_hi = ttmp7 +@@ -159,11 +162,12 @@ var s_save_buf_rsrc0 = ttmp8 + var s_save_buf_rsrc1 = ttmp9 + var s_save_buf_rsrc2 = ttmp10 + var s_save_buf_rsrc3 = ttmp11 +- ++var s_save_status = ttmp12 + var s_save_mem_offset = ttmp14 + var s_save_alloc_size = s_save_trapsts //conflict +-var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) + var s_save_m0 = ttmp15 ++var s_save_ttmps_lo = s_save_tmp //no conflict ++var s_save_ttmps_hi = s_save_trapsts //no conflict + + /* Restore */ + var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +@@ -186,7 +190,7 @@ var s_restore_spi_init_hi = exec_hi + + var s_restore_mem_offset = ttmp12 + var s_restore_alloc_size = ttmp3 +-var s_restore_tmp = ttmp6 ++var s_restore_tmp = ttmp2 + var s_restore_mem_offset_save = s_restore_tmp //no conflict + + var s_restore_m0 = s_restore_alloc_size //no conflict +@@ -205,6 +209,8 @@ var s_restore_buf_rsrc0 = ttmp8 + var s_restore_buf_rsrc1 = ttmp9 + var s_restore_buf_rsrc2 = ttmp10 + var s_restore_buf_rsrc3 = ttmp11 ++var s_restore_ttmps_lo = s_restore_tmp //no conflict ++var s_restore_ttmps_hi = s_restore_alloc_size //no conflict + + /**************************************************************************/ + /* trap handler entry points */ +@@ -235,25 +241,25 @@ L_SKIP_RESTORE: + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) +- s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save ++ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* + if (!EMU_RUN_HACK) + // Illegal instruction is a non-maskable exception which blocks context save. + // Halt the wavefront and return from the trap. +- s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK ++ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_HALT_WAVE + + // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. + // Instead, halt the wavefront and return from the trap. +- s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK +- s_cbranch_scc0 L_NO_MEM_VIOL ++ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK ++ s_cbranch_scc0 L_FETCH_2ND_TRAP + + L_HALT_WAVE: + // If STATUS.HALT is set then this fault must come from SQC instruction fetch. + // We cannot prevent further faults so just terminate the wavefront. +- s_and_b32 ttmp8, s_save_status, SQ_WAVE_STATUS_HALT_MASK ++ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_ALREADY_HALTED + s_endpgm + L_NOT_ALREADY_HALTED: +@@ -264,19 +270,31 @@ L_NOT_ALREADY_HALTED: + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + +- s_branch L_EXCP_CASE +- +-L_NO_MEM_VIOL: +- /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ +- s_getreg_b32 ttmp14,hwreg(HW_REG_SQ_SHADER_TMA_LO) +- s_getreg_b32 ttmp15,hwreg(HW_REG_SQ_SHADER_TMA_HI) +- s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 +- s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [ttmp14, ttmp15], 0 +- s_waitcnt lgkmcnt(0) +- s_or_b32 ttmp7, ttmp8, ttmp9 +- s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set +- s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) +- s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler ++L_FETCH_2ND_TRAP: ++ // Preserve and clear scalar XNACK state before issuing scalar reads. ++ // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. ++ s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) ++ s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK ++ s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) ++ s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK ++ s_or_b32 ttmp11, ttmp11, ttmp3 ++ ++ s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK ++ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 ++ ++ // Read second-level TBA/TMA from first-level TMA and jump if available. ++ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) ++ // ttmp12 holds SQ_WAVE_STATUS ++ s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) ++ s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) ++ s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 ++ s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA ++ s_waitcnt lgkmcnt(0) ++ s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA ++ s_waitcnt lgkmcnt(0) ++ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] ++ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set ++ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + + L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) +@@ -286,8 +304,18 @@ L_NO_NEXT_TRAP: + s_addc_u32 ttmp1, ttmp1, 0 + L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF +- s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) +- s_rfe_b64 [ttmp0, ttmp1] ++ ++ // Restore SQ_WAVE_IB_STS. ++ s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) ++ s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK ++ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 ++ ++ // Restore SQ_WAVE_STATUS. ++ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 ++ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 ++ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status ++ ++ s_rfe_b64 [ttmp0, ttmp1] + end + // ********* End handling of non-CWSR traps ******************* + +@@ -307,8 +335,6 @@ end + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + +- s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK +- s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi //save XNACK must before any memory operation + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp +@@ -350,7 +376,6 @@ if G8SR_DEBUG_TIMESTAMP + s_waitcnt lgkmcnt(0) + end + +- /* setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 +@@ -368,7 +393,24 @@ end + else + end + ++ // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic ++ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 ++ get_vgpr_size_bytes(s_save_ttmps_lo) ++ get_sgpr_size_bytes(s_save_ttmps_hi) ++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi ++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo ++ s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 ++ s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF ++ s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 ++ ack_sqc_store_workaround() ++ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 ++ ack_sqc_store_workaround() ++ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 ++ ack_sqc_store_workaround() ++ s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 ++ ack_sqc_store_workaround() + ++ /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE +@@ -425,8 +467,8 @@ end + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + +- write_hwreg_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO +- write_hwreg_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI ++ write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO ++ write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE +@@ -502,6 +544,8 @@ end + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF ++ s_mov_b32 xnack_mask_lo, 0x0 ++ s_mov_b32 xnack_mask_hi, 0x0 + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? +@@ -1038,6 +1082,21 @@ end + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode ++ ++ // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic ++ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 ++ get_vgpr_size_bytes(s_restore_ttmps_lo) ++ get_sgpr_size_bytes(s_restore_ttmps_hi) ++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi ++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 ++ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 ++ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF ++ s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 ++ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 ++ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 ++ s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 ++ s_waitcnt lgkmcnt(0) ++ + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT +@@ -1085,9 +1144,7 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 +-if ACK_SQC_STORE +- s_waitcnt lgkmcnt(0) +-end ++ ack_sqc_store_workaround() + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo + end +@@ -1097,21 +1154,13 @@ end + function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 +-if ACK_SQC_STORE +- s_waitcnt lgkmcnt(0) +-end ++ ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 +-if ACK_SQC_STORE +- s_waitcnt lgkmcnt(0) +-end ++ ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 +-if ACK_SQC_STORE +- s_waitcnt lgkmcnt(0) +-end ++ ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 +-if ACK_SQC_STORE +- s_waitcnt lgkmcnt(0) +-end ++ ack_sqc_store_workaround() + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc + end +@@ -1151,56 +1200,80 @@ function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes + end + ++function ack_sqc_store_workaround ++ if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++ end ++end + + + #endif + + static const uint32_t cwsr_trap_gfx9_hex[] = { +- 0xbf820001, 0xbf820130, +- 0xb8f0f802, 0x89708670, +- 0xb8f1f803, 0x8674ff71, +- 0x00000400, 0xbf850023, +- 0x8674ff71, 0x00000800, +- 0xbf850003, 0x8674ff71, +- 0x00000100, 0xbf840009, +- 0x8674ff70, 0x00002000, ++ 0xbf820001, 0xbf820158, ++ 0xb8f8f802, 0x89788678, ++ 0xb8f1f803, 0x866eff71, ++ 0x00000400, 0xbf850034, ++ 0x866eff71, 0x00000800, ++ 0xbf850003, 0x866eff71, ++ 0x00000100, 0xbf840008, ++ 0x866eff78, 0x00002000, + 0xbf840001, 0xbf810000, +- 0x8770ff70, 0x00002000, ++ 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, +- 0xbf820010, 0xb8faf812, +- 0xb8fbf813, 0x8efa887a, +- 0xc00a1d3d, 0x00000000, +- 0xbf8cc07f, 0x87737574, +- 0xbf840002, 0xb970f802, +- 0xbe801d74, 0xb8f1f803, +- 0x8671ff71, 0x000001ff, +- 0xbf850002, 0x806c846c, +- 0x826d806d, 0x866dff6d, +- 0x0000ffff, 0xb970f802, +- 0xbe801f6c, 0x866dff6d, +- 0x0000ffff, 0xbef60080, +- 0xb9760283, 0xbef20068, +- 0xbef30069, 0xb8f62407, +- 0x8e769c76, 0x876d766d, +- 0xb8f603c7, 0x8e769b76, +- 0x876d766d, 0xb8f6f807, +- 0x8676ff76, 0x00007fff, +- 0xb976f807, 0xbeee007e, +- 0xbeef007f, 0xbefe0180, +- 0xbf900004, 0xbf8e0002, +- 0xbf88fffe, 0xbef4007e, ++ 0xb8eef807, 0x866fff6e, ++ 0x001f8000, 0x8e6f8b6f, ++ 0x8977ff77, 0xfc000000, ++ 0x87776f77, 0x896eff6e, ++ 0x001f8000, 0xb96ef807, ++ 0xb8f0f812, 0xb8f1f813, ++ 0x8ef08870, 0xc0071bb8, ++ 0x00000000, 0xbf8cc07f, ++ 0xc0071c38, 0x00000008, ++ 0xbf8cc07f, 0x86ee6e6e, ++ 0xbf840001, 0xbe801d6e, ++ 0xb8f1f803, 0x8671ff71, ++ 0x000001ff, 0xbf850002, ++ 0x806c846c, 0x826d806d, ++ 0x866dff6d, 0x0000ffff, ++ 0x8f6e8b77, 0x866eff6e, ++ 0x001f8000, 0xb96ef807, ++ 0x86fe7e7e, 0x86ea6a6a, ++ 0xb978f802, 0xbe801f6c, ++ 0x866dff6d, 0x0000ffff, ++ 0xbef00080, 0xb9700283, ++ 0xb8f02407, 0x8e709c70, ++ 0x876d706d, 0xb8f003c7, ++ 0x8e709b70, 0x876d706d, ++ 0xb8f0f807, 0x8670ff70, ++ 0x00007fff, 0xb970f807, ++ 0xbeee007e, 0xbeef007f, ++ 0xbefe0180, 0xbf900004, ++ 0xbf8e0002, 0xbf88fffe, ++ 0xb8f02a05, 0x80708170, ++ 0x8e708a70, 0xb8f11605, ++ 0x80718171, 0x8e718671, ++ 0x80707170, 0x80707e70, ++ 0x8271807f, 0x8671ff71, ++ 0x0000ffff, 0xc0471cb8, ++ 0x00000040, 0xbf8cc07f, ++ 0xc04b1d38, 0x00000048, ++ 0xbf8cc07f, 0xc0431e78, ++ 0x00000058, 0xbf8cc07f, ++ 0xc0471eb8, 0x0000005c, ++ 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, +- 0x00807fac, 0x8676ff7f, +- 0x08000000, 0x8f768376, +- 0x87777677, 0x8676ff7f, +- 0x70000000, 0x8f768176, +- 0x87777677, 0xbefb007c, ++ 0x00807fac, 0x8670ff7f, ++ 0x08000000, 0x8f708370, ++ 0x87777077, 0x8670ff7f, ++ 0x70000000, 0x8f708170, ++ 0x87777077, 0xbefb007c, + 0xbefa0080, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, +- 0xb8f61605, 0x80768176, +- 0x8e768676, 0x807a767a, ++ 0xb8f01605, 0x80708170, ++ 0x8e708670, 0x807a707a, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc007a, 0xc0611efa, +@@ -1221,26 +1294,26 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, +- 0xc0611c3a, 0x0000007c, ++ 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8f1f803, + 0xbefe007c, 0xbefc007a, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611cba, ++ 0xbefc007a, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, +- 0xc0611cfa, 0x0000007c, ++ 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8fbf801, + 0xbefe007c, 0xbefc007a, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, +- 0xbefc007e, 0x8676ff7f, ++ 0xbefc007e, 0x8670ff7f, + 0x04000000, 0xbeef0080, +- 0x876f6f76, 0xb8fa2a05, ++ 0x876f6f70, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f11605, 0x80718171, + 0x8e718471, 0x8e768271, +@@ -1262,6 +1335,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbf0a717c, 0xbf85ffe7, + 0xbef40172, 0xbefa0080, + 0xbefe00c1, 0xbeff00c1, ++ 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0xe0724000, 0x7a1d0000, + 0xe0724100, 0x7a1d0100, +@@ -1270,13 +1344,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbefe00c1, 0xbeff00c1, + 0xb8f14306, 0x8671c171, + 0xbf84002c, 0xbf8a0000, +- 0x8676ff6f, 0x04000000, ++ 0x8670ff6f, 0x04000000, + 0xbf840028, 0x8e718671, + 0x8e718271, 0xbef60071, + 0xb8fa2a05, 0x807a817a, +- 0x8e7a8a7a, 0xb8f61605, +- 0x80768176, 0x8e768676, +- 0x807a767a, 0x807aff7a, ++ 0x8e7a8a7a, 0xb8f01605, ++ 0x80708170, 0x8e708670, ++ 0x807a707a, 0x807aff7a, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, +@@ -1308,24 +1382,24 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0x7a1d0300, 0x807c847c, + 0x807aff7a, 0x00000400, + 0xbf0a717c, 0xbf85ffef, +- 0xbf9c0000, 0xbf8200c5, ++ 0xbf9c0000, 0xbf8200d9, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, +- 0x8672ff7f, 0x08000000, +- 0x8f728372, 0x87777277, +- 0x8672ff7f, 0x70000000, +- 0x8f728172, 0x87777277, +- 0x8672ff7f, 0x04000000, ++ 0x866eff7f, 0x08000000, ++ 0x8f6e836e, 0x87776e77, ++ 0x866eff7f, 0x70000000, ++ 0x8f6e816e, 0x87776e77, ++ 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, + 0x80788178, 0x8e788a78, +- 0xb8f21605, 0x80728172, +- 0x8e728672, 0x80787278, ++ 0xb8ee1605, 0x806e816e, ++ 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, +@@ -1338,7 +1412,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, +- 0xbef20078, 0x8078ff78, ++ 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, +@@ -1351,14 +1425,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, +- 0xe0524000, 0x721d0000, +- 0xe0524100, 0x721d0100, +- 0xe0524200, 0x721d0200, +- 0xe0524300, 0x721d0300, ++ 0xe0524000, 0x6e1d0000, ++ 0xe0524100, 0x6e1d0100, ++ 0xe0524200, 0x6e1d0200, ++ 0xe0524300, 0x6e1d0300, + 0xb8f82a05, 0x80788178, +- 0x8e788a78, 0xb8f21605, +- 0x80728172, 0x8e728672, +- 0x80787278, 0x80f8c078, ++ 0x8e788a78, 0xb8ee1605, ++ 0x806e816e, 0x8e6e866e, ++ 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, +@@ -1372,8 +1446,8 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, +- 0xb8f21605, 0x80728172, +- 0x8e728672, 0x80787278, ++ 0xb8ee1605, 0x806e816e, ++ 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, +@@ -1397,14 +1471,24 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { + 0x000003ff, 0xb96f4803, + 0x866f71ff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, +- 0xb973f801, 0x866fff6d, ++ 0xb973f801, 0xb8ee2a05, ++ 0x806e816e, 0x8e6e8a6e, ++ 0xb8ef1605, 0x806f816f, ++ 0x8e6f866f, 0x806e6f6e, ++ 0x806e746e, 0x826f8075, ++ 0x866fff6f, 0x0000ffff, ++ 0xc0071cb7, 0x00000040, ++ 0xc00b1d37, 0x00000048, ++ 0xc0031e77, 0x00000058, ++ 0xc0071eb7, 0x0000005c, ++ 0xbf8cc07f, 0x866fff6d, + 0xf0000000, 0x8f6f9c6f, +- 0x8e6f906f, 0xbef20080, +- 0x87726f72, 0x866fff6d, ++ 0x8e6f906f, 0xbeee0080, ++ 0x876e6f6e, 0x866fff6d, + 0x08000000, 0x8f6f9b6f, +- 0x8e6f8f6f, 0x87726f72, ++ 0x8e6f8f6f, 0x876e6f6e, + 0x866fff70, 0x00800000, +- 0x8f6f976f, 0xb972f807, ++ 0x8f6f976f, 0xb96ef807, + 0x86fe7e7e, 0x86ea6a6a, + 0xb970f802, 0xbf8a0000, + 0x95806f6c, 0xbf810000, +-- +2.7.4 + |