aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2775-drm-amdkfd-Use-SQC-when-TCP-would-fail-in-gfx9-conte.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2775-drm-amdkfd-Use-SQC-when-TCP-would-fail-in-gfx9-conte.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2775-drm-amdkfd-Use-SQC-when-TCP-would-fail-in-gfx9-conte.patch572
1 files changed, 572 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2775-drm-amdkfd-Use-SQC-when-TCP-would-fail-in-gfx9-conte.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2775-drm-amdkfd-Use-SQC-when-TCP-would-fail-in-gfx9-conte.patch
new file mode 100644
index 00000000..fbab67c8
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2775-drm-amdkfd-Use-SQC-when-TCP-would-fail-in-gfx9-conte.patch
@@ -0,0 +1,572 @@
+From eb5266a857727d99e0fe697caf40bc1a3147813a Mon Sep 17 00:00:00 2001
+From: Jay Cornwall <Jay.Cornwall@amd.com>
+Date: Mon, 29 Apr 2019 14:08:29 -0500
+Subject: [PATCH 2775/2940] drm/amdkfd: Use SQC when TCP would fail in gfx9
+ context save.
+
+When a wavefront raises TRAPSTS.XNACK_ERROR with STATUS.ALLOW_REPLAY=0
+subsequent memory instructions have undefined behavior. In practice
+SQC stores continue to work but TCP stores do not.
+
+Context save is permitted to fail after XNACK error because the
+wavefront will be halted and subsequently terminated. However the
+debugger has an interest in retrieving the wavefront VGPR/LDS state.
+
+Detect the out-of-spec case and use SQC stores during context save
+in place of TCP stores.
+
+Change-Id: I98050e06282874197fcebda51480a2e931deb40c
+Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
+---
+ .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 365 ++++++++++++------
+ .../drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 81 ++++
+ 2 files changed, 320 insertions(+), 126 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+index e413d4a71fa3..b0b982cd3f0d 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+@@ -274,7 +274,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {
+
+
+ static const uint32_t cwsr_trap_gfx9_hex[] = {
+- 0xbf820001, 0xbf82015e,
++ 0xbf820001, 0xbf820241,
+ 0xb8f8f802, 0x89788678,
+ 0xb8fbf803, 0x866eff7b,
+ 0x00000400, 0xbf85003b,
+@@ -404,15 +404,57 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
++ 0x867aff78, 0x00400000,
++ 0xbf850003, 0xb8faf803,
++ 0x897a7aff, 0x10000000,
++ 0xbf85004d, 0xbe840080,
++ 0xd2890000, 0x00000900,
++ 0x80048104, 0xd2890001,
++ 0x00000900, 0x80048104,
++ 0xd2890002, 0x00000900,
++ 0x80048104, 0xd2890003,
++ 0x00000900, 0x80048104,
++ 0xc069003a, 0x00000070,
++ 0xbf8cc07f, 0x80709070,
++ 0xbf06c004, 0xbf84ffee,
++ 0xbe840080, 0xd2890000,
++ 0x00000901, 0x80048104,
++ 0xd2890001, 0x00000901,
++ 0x80048104, 0xd2890002,
++ 0x00000901, 0x80048104,
++ 0xd2890003, 0x00000901,
++ 0x80048104, 0xc069003a,
++ 0x00000070, 0xbf8cc07f,
++ 0x80709070, 0xbf06c004,
++ 0xbf84ffee, 0xbe840080,
++ 0xd2890000, 0x00000902,
++ 0x80048104, 0xd2890001,
++ 0x00000902, 0x80048104,
++ 0xd2890002, 0x00000902,
++ 0x80048104, 0xd2890003,
++ 0x00000902, 0x80048104,
++ 0xc069003a, 0x00000070,
++ 0xbf8cc07f, 0x80709070,
++ 0xbf06c004, 0xbf84ffee,
++ 0xbe840080, 0xd2890000,
++ 0x00000903, 0x80048104,
++ 0xd2890001, 0x00000903,
++ 0x80048104, 0xd2890002,
++ 0x00000903, 0x80048104,
++ 0xd2890003, 0x00000903,
++ 0x80048104, 0xc069003a,
++ 0x00000070, 0xbf8cc07f,
++ 0x80709070, 0xbf06c004,
++ 0xbf84ffee, 0xbf820008,
+ 0xe0724000, 0x701d0000,
+ 0xe0724100, 0x701d0100,
+ 0xe0724200, 0x701d0200,
+ 0xe0724300, 0x701d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8fb4306, 0x867bc17b,
+- 0xbf84002c, 0xbf8a0000,
++ 0xbf840063, 0xbf8a0000,
+ 0x867aff6f, 0x04000000,
+- 0xbf840028, 0x8e7b867b,
++ 0xbf84005f, 0x8e7b867b,
+ 0x8e7b827b, 0xbef6007b,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8fa1605,
+@@ -422,142 +464,213 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+- 0xd1060002, 0x00011103,
+- 0x7e0602ff, 0x00000200,
+- 0xbefc00ff, 0x00010000,
+- 0xbe800077, 0x8677ff77,
+- 0xff7fffff, 0x8777ff77,
+- 0x00058000, 0xd8ec0000,
+- 0x00000002, 0xbf8cc07f,
+- 0xe0765000, 0x701d0002,
+- 0x68040702, 0xd0c9006a,
+- 0x0000f702, 0xbf87fff7,
+- 0xbef70000, 0xbef000ff,
+- 0x00000400, 0xbefe00c1,
+- 0xbeff00c1, 0xb8fb2a05,
+- 0x807b817b, 0x8e7b827b,
+- 0x8e76887b, 0xbef600ff,
+- 0x01000000, 0xbefc0084,
+- 0xbf0a7b7c, 0xbf840015,
+- 0xbf11017c, 0x807bff7b,
+- 0x00001000, 0x7e000300,
+- 0x7e020301, 0x7e040302,
+- 0x7e060303, 0xe0724000,
+- 0x701d0000, 0xe0724100,
+- 0x701d0100, 0xe0724200,
+- 0x701d0200, 0xe0724300,
+- 0x701d0300, 0x807c847c,
+- 0x8070ff70, 0x00000400,
+- 0xbf0a7b7c, 0xbf85ffef,
+- 0xbf9c0000, 0xbf8200da,
+- 0xbef4007e, 0x8675ff7f,
+- 0x0000ffff, 0x8775ff75,
+- 0x00040000, 0xbef60080,
+- 0xbef700ff, 0x00807fac,
+- 0x866eff7f, 0x08000000,
+- 0x8f6e836e, 0x87776e77,
+- 0x866eff7f, 0x70000000,
+- 0x8f6e816e, 0x87776e77,
+- 0x866eff7f, 0x04000000,
+- 0xbf84001e, 0xbefe00c1,
+- 0xbeff00c1, 0xb8ef4306,
+- 0x866fc16f, 0xbf840019,
+- 0x8e6f866f, 0x8e6f826f,
+- 0xbef6006f, 0xb8f82a05,
+- 0x80788178, 0x8e788a78,
+- 0xb8ee1605, 0x806e816e,
+- 0x8e6e866e, 0x80786e78,
+- 0x8078ff78, 0x00000080,
+- 0xbef600ff, 0x01000000,
+- 0xbefc0080, 0xe0510000,
+- 0x781d0000, 0xe0510100,
+- 0x781d0000, 0x807cff7c,
+- 0x00000200, 0x8078ff78,
+- 0x00000200, 0xbf0a6f7c,
+- 0xbf85fff6, 0xbef80080,
++ 0x867aff78, 0x00400000,
++ 0xbf850003, 0xb8faf803,
++ 0x897a7aff, 0x10000000,
++ 0xbf850030, 0x24040682,
++ 0xd86e4000, 0x00000002,
++ 0xbf8cc07f, 0xbe840080,
++ 0xd2890000, 0x00000900,
++ 0x80048104, 0xd2890001,
++ 0x00000900, 0x80048104,
++ 0xd2890002, 0x00000900,
++ 0x80048104, 0xd2890003,
++ 0x00000900, 0x80048104,
++ 0xc069003a, 0x00000070,
++ 0xbf8cc07f, 0x80709070,
++ 0xbf06c004, 0xbf84ffee,
++ 0xbe840080, 0xd2890000,
++ 0x00000901, 0x80048104,
++ 0xd2890001, 0x00000901,
++ 0x80048104, 0xd2890002,
++ 0x00000901, 0x80048104,
++ 0xd2890003, 0x00000901,
++ 0x80048104, 0xc069003a,
++ 0x00000070, 0xbf8cc07f,
++ 0x80709070, 0xbf06c004,
++ 0xbf84ffee, 0x680404ff,
++ 0x00000200, 0xd0c9006a,
++ 0x0000f702, 0xbf87ffd2,
++ 0xbf820015, 0xd1060002,
++ 0x00011103, 0x7e0602ff,
++ 0x00000200, 0xbefc00ff,
++ 0x00010000, 0xbe800077,
++ 0x8677ff77, 0xff7fffff,
++ 0x8777ff77, 0x00058000,
++ 0xd8ec0000, 0x00000002,
++ 0xbf8cc07f, 0xe0765000,
++ 0x701d0002, 0x68040702,
++ 0xd0c9006a, 0x0000f702,
++ 0xbf87fff7, 0xbef70000,
++ 0xbef000ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
+- 0xb8ef2a05, 0x806f816f,
+- 0x8e6f826f, 0x8e76886f,
++ 0xb8fb2a05, 0x807b817b,
++ 0x8e7b827b, 0x8e76887b,
+ 0xbef600ff, 0x01000000,
+- 0xbeee0078, 0x8078ff78,
+- 0x00000400, 0xbefc0084,
+- 0xbf11087c, 0x806fff6f,
+- 0x00008000, 0xe0524000,
+- 0x781d0000, 0xe0524100,
+- 0x781d0100, 0xe0524200,
+- 0x781d0200, 0xe0524300,
+- 0x781d0300, 0xbf8c0f70,
++ 0xbefc0084, 0xbf0a7b7c,
++ 0xbf84006d, 0xbf11017c,
++ 0x807bff7b, 0x00001000,
++ 0x867aff78, 0x00400000,
++ 0xbf850003, 0xb8faf803,
++ 0x897a7aff, 0x10000000,
++ 0xbf850051, 0xbe840080,
++ 0xd2890000, 0x00000900,
++ 0x80048104, 0xd2890001,
++ 0x00000900, 0x80048104,
++ 0xd2890002, 0x00000900,
++ 0x80048104, 0xd2890003,
++ 0x00000900, 0x80048104,
++ 0xc069003a, 0x00000070,
++ 0xbf8cc07f, 0x80709070,
++ 0xbf06c004, 0xbf84ffee,
++ 0xbe840080, 0xd2890000,
++ 0x00000901, 0x80048104,
++ 0xd2890001, 0x00000901,
++ 0x80048104, 0xd2890002,
++ 0x00000901, 0x80048104,
++ 0xd2890003, 0x00000901,
++ 0x80048104, 0xc069003a,
++ 0x00000070, 0xbf8cc07f,
++ 0x80709070, 0xbf06c004,
++ 0xbf84ffee, 0xbe840080,
++ 0xd2890000, 0x00000902,
++ 0x80048104, 0xd2890001,
++ 0x00000902, 0x80048104,
++ 0xd2890002, 0x00000902,
++ 0x80048104, 0xd2890003,
++ 0x00000902, 0x80048104,
++ 0xc069003a, 0x00000070,
++ 0xbf8cc07f, 0x80709070,
++ 0xbf06c004, 0xbf84ffee,
++ 0xbe840080, 0xd2890000,
++ 0x00000903, 0x80048104,
++ 0xd2890001, 0x00000903,
++ 0x80048104, 0xd2890002,
++ 0x00000903, 0x80048104,
++ 0xd2890003, 0x00000903,
++ 0x80048104, 0xc069003a,
++ 0x00000070, 0xbf8cc07f,
++ 0x80709070, 0xbf06c004,
++ 0xbf84ffee, 0x807c847c,
++ 0xbf0a7b7c, 0xbf85ffb1,
++ 0xbf9c0000, 0xbf820012,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+- 0x807c847c, 0x8078ff78,
+- 0x00000400, 0xbf0a6f7c,
+- 0xbf85ffee, 0xbf9c0000,
+- 0xe0524000, 0x6e1d0000,
+- 0xe0524100, 0x6e1d0100,
+- 0xe0524200, 0x6e1d0200,
+- 0xe0524300, 0x6e1d0300,
++ 0xe0724000, 0x701d0000,
++ 0xe0724100, 0x701d0100,
++ 0xe0724200, 0x701d0200,
++ 0xe0724300, 0x701d0300,
++ 0x807c847c, 0x8070ff70,
++ 0x00000400, 0xbf0a7b7c,
++ 0xbf85ffef, 0xbf9c0000,
++ 0xbf8200da, 0xbef4007e,
++ 0x8675ff7f, 0x0000ffff,
++ 0x8775ff75, 0x00040000,
++ 0xbef60080, 0xbef700ff,
++ 0x00807fac, 0x866eff7f,
++ 0x08000000, 0x8f6e836e,
++ 0x87776e77, 0x866eff7f,
++ 0x70000000, 0x8f6e816e,
++ 0x87776e77, 0x866eff7f,
++ 0x04000000, 0xbf84001e,
++ 0xbefe00c1, 0xbeff00c1,
++ 0xb8ef4306, 0x866fc16f,
++ 0xbf840019, 0x8e6f866f,
++ 0x8e6f826f, 0xbef6006f,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+- 0x80786e78, 0x80f8c078,
+- 0xb8ef1605, 0x806f816f,
+- 0x8e6f846f, 0x8e76826f,
+- 0xbef600ff, 0x01000000,
+- 0xbefc006f, 0xc031003a,
+- 0x00000078, 0x80f8c078,
+- 0xbf8cc07f, 0x80fc907c,
+- 0xbf800000, 0xbe802d00,
+- 0xbe822d02, 0xbe842d04,
+- 0xbe862d06, 0xbe882d08,
+- 0xbe8a2d0a, 0xbe8c2d0c,
+- 0xbe8e2d0e, 0xbf06807c,
+- 0xbf84fff0, 0xb8f82a05,
++ 0x80786e78, 0x8078ff78,
++ 0x00000080, 0xbef600ff,
++ 0x01000000, 0xbefc0080,
++ 0xe0510000, 0x781d0000,
++ 0xe0510100, 0x781d0000,
++ 0x807cff7c, 0x00000200,
++ 0x8078ff78, 0x00000200,
++ 0xbf0a6f7c, 0xbf85fff6,
++ 0xbef80080, 0xbefe00c1,
++ 0xbeff00c1, 0xb8ef2a05,
++ 0x806f816f, 0x8e6f826f,
++ 0x8e76886f, 0xbef600ff,
++ 0x01000000, 0xbeee0078,
++ 0x8078ff78, 0x00000400,
++ 0xbefc0084, 0xbf11087c,
++ 0x806fff6f, 0x00008000,
++ 0xe0524000, 0x781d0000,
++ 0xe0524100, 0x781d0100,
++ 0xe0524200, 0x781d0200,
++ 0xe0524300, 0x781d0300,
++ 0xbf8c0f70, 0x7e000300,
++ 0x7e020301, 0x7e040302,
++ 0x7e060303, 0x807c847c,
++ 0x8078ff78, 0x00000400,
++ 0xbf0a6f7c, 0xbf85ffee,
++ 0xbf9c0000, 0xe0524000,
++ 0x6e1d0000, 0xe0524100,
++ 0x6e1d0100, 0xe0524200,
++ 0x6e1d0200, 0xe0524300,
++ 0x6e1d0300, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+- 0xbef60084, 0xbef600ff,
+- 0x01000000, 0xc0211bfa,
++ 0x80f8c078, 0xb8ef1605,
++ 0x806f816f, 0x8e6f846f,
++ 0x8e76826f, 0xbef600ff,
++ 0x01000000, 0xbefc006f,
++ 0xc031003a, 0x00000078,
++ 0x80f8c078, 0xbf8cc07f,
++ 0x80fc907c, 0xbf800000,
++ 0xbe802d00, 0xbe822d02,
++ 0xbe842d04, 0xbe862d06,
++ 0xbe882d08, 0xbe8a2d0a,
++ 0xbe8c2d0c, 0xbe8e2d0e,
++ 0xbf06807c, 0xbf84fff0,
++ 0xb8f82a05, 0x80788178,
++ 0x8e788a78, 0xb8ee1605,
++ 0x806e816e, 0x8e6e866e,
++ 0x80786e78, 0xbef60084,
++ 0xbef600ff, 0x01000000,
++ 0xc0211bfa, 0x00000078,
++ 0x80788478, 0xc0211b3a,
+ 0x00000078, 0x80788478,
+- 0xc0211b3a, 0x00000078,
+- 0x80788478, 0xc0211b7a,
++ 0xc0211b7a, 0x00000078,
++ 0x80788478, 0xc0211c3a,
+ 0x00000078, 0x80788478,
+- 0xc0211c3a, 0x00000078,
+- 0x80788478, 0xc0211c7a,
++ 0xc0211c7a, 0x00000078,
++ 0x80788478, 0xc0211eba,
+ 0x00000078, 0x80788478,
+- 0xc0211eba, 0x00000078,
+- 0x80788478, 0xc0211efa,
++ 0xc0211efa, 0x00000078,
++ 0x80788478, 0xc0211a3a,
+ 0x00000078, 0x80788478,
+- 0xc0211a3a, 0x00000078,
+- 0x80788478, 0xc0211a7a,
++ 0xc0211a7a, 0x00000078,
++ 0x80788478, 0xc0211cfa,
+ 0x00000078, 0x80788478,
+- 0xc0211cfa, 0x00000078,
+- 0x80788478, 0xbf8cc07f,
+- 0xbefc006f, 0xbefe0070,
+- 0xbeff0071, 0x866f7bff,
+- 0x000003ff, 0xb96f4803,
+- 0x866f7bff, 0xfffff800,
+- 0x8f6f8b6f, 0xb96fa2c3,
+- 0xb973f801, 0xb8ee2a05,
+- 0x806e816e, 0x8e6e8a6e,
+- 0xb8ef1605, 0x806f816f,
+- 0x8e6f866f, 0x806e6f6e,
+- 0x806e746e, 0x826f8075,
+- 0x866fff6f, 0x0000ffff,
+- 0xc00b1c37, 0x00000050,
+- 0xc00b1d37, 0x00000060,
+- 0xc0031e77, 0x00000074,
+- 0xbf8cc07f, 0x866fff6d,
+- 0xf8000000, 0x8f6f9b6f,
+- 0x8e6f906f, 0xbeee0080,
+- 0x876e6f6e, 0x866fff6d,
+- 0x04000000, 0x8f6f9a6f,
+- 0x8e6f8f6f, 0x876e6f6e,
+- 0x866fff7a, 0x00800000,
+- 0x8f6f976f, 0xb96ef807,
+- 0x866dff6d, 0x0000ffff,
+- 0x86fe7e7e, 0x86ea6a6a,
+- 0x8f6e837a, 0xb96ee0c2,
+- 0xbf800002, 0xb97a0002,
+- 0xbf8a0000, 0x95806f6c,
+- 0xbf810000, 0x00000000,
++ 0xbf8cc07f, 0xbefc006f,
++ 0xbefe0070, 0xbeff0071,
++ 0x866f7bff, 0x000003ff,
++ 0xb96f4803, 0x866f7bff,
++ 0xfffff800, 0x8f6f8b6f,
++ 0xb96fa2c3, 0xb973f801,
++ 0xb8ee2a05, 0x806e816e,
++ 0x8e6e8a6e, 0xb8ef1605,
++ 0x806f816f, 0x8e6f866f,
++ 0x806e6f6e, 0x806e746e,
++ 0x826f8075, 0x866fff6f,
++ 0x0000ffff, 0xc00b1c37,
++ 0x00000050, 0xc00b1d37,
++ 0x00000060, 0xc0031e77,
++ 0x00000074, 0xbf8cc07f,
++ 0x866fff6d, 0xf8000000,
++ 0x8f6f9b6f, 0x8e6f906f,
++ 0xbeee0080, 0x876e6f6e,
++ 0x866fff6d, 0x04000000,
++ 0x8f6f9a6f, 0x8e6f8f6f,
++ 0x876e6f6e, 0x866fff7a,
++ 0x00800000, 0x8f6f976f,
++ 0xb96ef807, 0x866dff6d,
++ 0x0000ffff, 0x86fe7e7e,
++ 0x86ea6a6a, 0x8f6e837a,
++ 0xb96ee0c2, 0xbf800002,
++ 0xb97a0002, 0xbf8a0000,
++ 0x95806f6c, 0xbf810000,
+ };
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+index 6bae2e022c6e..2800e9bba1f9 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
+@@ -94,6 +94,7 @@ var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
+ var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+ var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
+ var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
++var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
+
+ /**************************************************************************/
+ /* variables */
+@@ -107,6 +108,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0
+ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1
+ var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3
+ var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29
++var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000
+
+ var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+ var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+@@ -127,6 +129,7 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
+ var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
+ var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
+ var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
++var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000
+
+ var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+ var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
+@@ -582,6 +585,16 @@ if G8SR_VGPR_SR_IN_DWX4
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+ else
++if SAVE_AFTER_XNACK_ERROR
++ check_if_tcp_store_ok()
++ s_cbranch_scc1 L_SAVE_FIRST_VGPRS_WITH_TCP
++
++ write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
++ s_branch L_SAVE_LDS
++
++L_SAVE_FIRST_VGPRS_WITH_TCP:
++end
++
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+@@ -681,6 +694,27 @@ elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss
+ else // BUFFER_STORE
+ v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+ v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid
++
++if SAVE_AFTER_XNACK_ERROR
++ check_if_tcp_store_ok()
++ s_cbranch_scc1 L_SAVE_LDS_WITH_TCP
++
++ v_lshlrev_b32 v2, 2, v3
++L_SAVE_LDS_LOOP_SQC:
++ ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40
++ s_waitcnt lgkmcnt(0)
++
++ write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset)
++
++ v_add_u32 v2, 0x200, v2
++ v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
++ s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC
++
++ s_branch L_SAVE_LDS_DONE
++
++L_SAVE_LDS_WITH_TCP:
++end
++
+ v_mul_i32_i24 v2, v3, 8 // tid*8
+ v_mov_b32 v3, 256*2
+ s_mov_b32 m0, 0x10000
+@@ -767,6 +801,21 @@ else
+ s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
+
++if SAVE_AFTER_XNACK_ERROR
++ check_if_tcp_store_ok()
++ s_cbranch_scc1 L_SAVE_VGPR_LOOP
++
++L_SAVE_VGPR_LOOP_SQC:
++ write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset)
++
++ s_add_u32 m0, m0, 4
++ s_cmp_lt_u32 m0, s_save_alloc_size
++ s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC
++
++ s_set_gpr_idx_off
++ s_branch L_SAVE_VGPR_END
++end
++
+ L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 //v0 = v[0+m0]
+ v_mov_b32 v1, v1 //v0 = v[0+m0]
+@@ -1190,7 +1239,39 @@ function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+ end
+
++function check_if_tcp_store_ok
++ // If STATUS.ALLOW_REPLAY=0 and TRAPSTS.XNACK_ERROR=1 then TCP stores will fail.
++ s_and_b32 s_save_tmp, s_save_status, SQ_WAVE_STATUS_ALLOW_REPLAY_MASK
++ s_cbranch_scc1 L_TCP_STORE_CHECK_DONE
++
++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS)
++ s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp
++
++L_TCP_STORE_CHECK_DONE:
++end
++
++function write_vgpr_to_mem_with_sqc(v, s_rsrc, s_mem_offset)
++ s_mov_b32 s4, 0
++
++L_WRITE_VGPR_LANE_LOOP:
++ for var lane = 0; lane < 4; ++ lane
++ v_readlane_b32 s[lane], v, s4
++ s_add_u32 s4, s4, 1
++ end
++
++ s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1
++ ack_sqc_store_workaround()
+
++ s_add_u32 s_mem_offset, s_mem_offset, 0x10
++ s_cmp_eq_u32 s4, 0x40
++ s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP
++end
++
++function write_vgprs_to_mem_with_sqc(v, n_vgprs, s_rsrc, s_mem_offset)
++ for var vgpr = 0; vgpr < n_vgprs; ++ vgpr
++ write_vgpr_to_mem_with_sqc(v[vgpr], s_rsrc, s_mem_offset)
++ end
++end
+
+ function get_lds_size_bytes(s_lds_size_byte)
+ // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+--
+2.17.1
+