diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/5618-drm-amdkfd-Roll-back-all-q4-amdkfd-patches-added-by-.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/5618-drm-amdkfd-Roll-back-all-q4-amdkfd-patches-added-by-.patch | 8070 |
1 files changed, 8070 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/5618-drm-amdkfd-Roll-back-all-q4-amdkfd-patches-added-by-.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/5618-drm-amdkfd-Roll-back-all-q4-amdkfd-patches-added-by-.patch new file mode 100644 index 00000000..7798330d --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/5618-drm-amdkfd-Roll-back-all-q4-amdkfd-patches-added-by-.patch @@ -0,0 +1,8070 @@ +From f00599ff354b3f061df8ce41217562f7c1bfcc2d Mon Sep 17 00:00:00 2001 +From: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +Date: Wed, 9 Jan 2019 21:21:38 +0530 +Subject: [PATCH 5618/5725] drm/amdkfd: Roll back all q4 amdkfd patches added + by Kalyan. + +Signed-off-by: Ravi Kumar <ravi1.kumar@amd.com> +Signed-off-by: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/Makefile | 4 +- + drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 78 +- + drivers/gpu/drm/amd/amdkfd/cik_int.h | 25 +- + drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 568 ---------- + .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm | 298 +++++- + .../gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 439 +++++--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1090 ++++++-------------- + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 60 +- + drivers/gpu/drm/amd/amdkfd/kfd_crat.h | 48 +- + drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 50 +- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 230 ++--- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 213 ++-- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 16 +- + .../drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 6 +- + .../drm/amd/amdkfd/kfd_device_queue_manager_vi.c | 29 +- + drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 22 +- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 129 +-- + drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 - + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 61 +- + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 81 +- + drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 3 +- + drivers/gpu/drm/amd/amdkfd/kfd_ipc.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 26 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 119 +++ + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 78 +- + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 180 ++-- + drivers/gpu/drm/amd/amdkfd/kfd_module.c | 21 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 1 - + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h | 2 - + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 28 - + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 63 +- + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 47 +- + drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 102 +- + drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c | 8 +- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 178 ++-- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 212 ++-- + .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 26 +- + drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 8 +- + drivers/gpu/drm/amd/amdkfd/kfd_rdma.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 94 +- + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 13 +- + drivers/gpu/drm/amd/amdkfd/soc15_int.h | 41 +- + 43 files changed, 1930 insertions(+), 2774 deletions(-) + delete mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h + mode change 100644 => 100755 drivers/gpu/drm/amd/amdkfd/kfd_device.c + +diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile +index 4804f9c..b65537a 100644 +--- a/drivers/gpu/drm/amd/amdkfd/Makefile ++++ b/drivers/gpu/drm/amd/amdkfd/Makefile +@@ -24,7 +24,9 @@ + # + + FULL_AMD_PATH=$(src)/.. +-ccflags-y := -I$(FULL_AMD_PATH)/include \ ++ ++ccflags-y := -Iinclude/drm \ ++ -I$(FULL_AMD_PATH)/include/ \ + -I$(FULL_AMD_PATH)/include/asic_reg + + amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ +diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +index 5d2475d..751c004 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c ++++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +@@ -24,6 +24,20 @@ + #include "kfd_events.h" + #include "cik_int.h" + ++static bool is_cpc_vm_fault(struct kfd_dev *dev, ++ const uint32_t *ih_ring_entry) ++{ ++ const struct cik_ih_ring_entry *ihre = ++ (const struct cik_ih_ring_entry *)ih_ring_entry; ++ ++ if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || ++ ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && ++ ihre->vmid >= dev->vm_info.first_vmid_kfd && ++ ihre->vmid <= dev->vm_info.last_vmid_kfd) ++ return true; ++ return false; ++} ++ + static bool cik_event_interrupt_isr(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, +@@ -32,7 +46,8 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, + const struct cik_ih_ring_entry *ihre = + (const struct cik_ih_ring_entry *)ih_ring_entry; + const struct kfd2kgd_calls *f2g = dev->kfd2kgd; +- unsigned int vmid, pasid; ++ struct cik_ih_ring_entry *tmp_ihre = ++ (struct cik_ih_ring_entry *) patched_ihre; + + /* This workaround is due to HW/FW limitation on Hawaii that + * VMID and PASID are not written into ih_ring_entry +@@ -40,44 +55,23 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, + if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && + dev->device_info->asic_family == CHIP_HAWAII) { +- struct cik_ih_ring_entry *tmp_ihre = +- (struct cik_ih_ring_entry *)patched_ihre; +- + *patched_flag = true; + *tmp_ihre = *ihre; + +- vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); +- pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); +- +- tmp_ihre->ring_id &= 0x000000ff; +- tmp_ihre->ring_id |= vmid << 8; +- tmp_ihre->ring_id |= pasid << 16; +- +- return (pasid != 0) && +- vmid >= dev->vm_info.first_vmid_kfd && +- vmid <= dev->vm_info.last_vmid_kfd; ++ tmp_ihre->vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); ++ tmp_ihre->pasid = f2g->get_atc_vmid_pasid_mapping_pasid( ++ dev->kgd, tmp_ihre->vmid); ++ return (tmp_ihre->pasid != 0) && ++ tmp_ihre->vmid >= dev->vm_info.first_vmid_kfd && ++ tmp_ihre->vmid <= dev->vm_info.last_vmid_kfd; + } +- +- /* Only handle interrupts from KFD VMIDs */ +- vmid = (ihre->ring_id & 0x0000ff00) >> 8; +- if (vmid < dev->vm_info.first_vmid_kfd || +- vmid > dev->vm_info.last_vmid_kfd) +- return 0; +- +- /* If there is no valid PASID, it's likely a firmware bug */ +- pasid = (ihre->ring_id & 0xffff0000) >> 16; +- if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) +- return 0; +- +- /* Interrupt types we care about: various signals and faults. +- * They will be forwarded to a work queue (see below). +- */ +- return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || ++ /* Do not process in ISR, just request it to be forwarded to WQ. */ ++ return (ihre->pasid != 0) && ++ (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + ihre->source_id == CIK_INTSRC_SDMA_TRAP || + ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || +- ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || +- ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT; ++ is_cpc_vm_fault(dev, ih_ring_entry)); + } + + static void cik_event_interrupt_wq(struct kfd_dev *dev, +@@ -86,35 +80,33 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, + const struct cik_ih_ring_entry *ihre = + (const struct cik_ih_ring_entry *)ih_ring_entry; + uint32_t context_id = ihre->data & 0xfffffff; +- unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8; +- unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16; + +- if (pasid == 0) ++ if (ihre->pasid == 0) + return; + + if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) +- kfd_signal_event_interrupt(pasid, context_id, 28); ++ kfd_signal_event_interrupt(ihre->pasid, context_id, 28); + else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) +- kfd_signal_event_interrupt(pasid, context_id, 28); ++ kfd_signal_event_interrupt(ihre->pasid, context_id, 28); + else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) +- kfd_signal_event_interrupt(pasid, context_id & 0xff, 8); ++ kfd_signal_event_interrupt(ihre->pasid, context_id & 0xff, 8); + else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) +- kfd_signal_hw_exception_event(pasid); ++ kfd_signal_hw_exception_event(ihre->pasid); + else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) { + struct kfd_vm_fault_info info; + +- kfd_process_vm_fault(dev->dqm, pasid); ++ kfd_process_vm_fault(dev->dqm, ihre->pasid); + + memset(&info, 0, sizeof(info)); + dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info); + if (!info.page_addr && !info.status) + return; + +- if (info.vmid == vmid) +- kfd_signal_vm_fault_event(dev, pasid, &info); ++ if (info.vmid == ihre->vmid) ++ kfd_signal_vm_fault_event(dev, ihre->pasid, &info); + else +- kfd_signal_vm_fault_event(dev, pasid, NULL); ++ kfd_signal_vm_fault_event(dev, ihre->pasid, NULL); + } + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h +index a2079a0..ff8255d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h ++++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h +@@ -26,19 +26,32 @@ + #include <linux/types.h> + + struct cik_ih_ring_entry { +- uint32_t source_id; +- uint32_t data; +- uint32_t ring_id; +- uint32_t reserved; ++ uint32_t source_id:8; ++ uint32_t reserved1:8; ++ uint32_t reserved2:16; ++ ++ uint32_t data:28; ++ uint32_t reserved3:4; ++ ++ /* pipeid, meid and unused3 are officially called RINGID, ++ * but for our purposes, they always decode into pipe and ME. ++ */ ++ uint32_t pipeid:2; ++ uint32_t meid:2; ++ uint32_t reserved4:4; ++ uint32_t vmid:8; ++ uint32_t pasid:16; ++ ++ uint32_t reserved5; + }; + ++#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 + #define CIK_INTSRC_CP_END_OF_PIPE 0xB5 + #define CIK_INTSRC_CP_BAD_OPCODE 0xB7 +-#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6 +-#define CIK_INTSRC_SDMA_TRAP 0xE0 + #define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF + #define CIK_INTSRC_GFX_PAGE_INV_FAULT 0x92 + #define CIK_INTSRC_GFX_MEM_PROT_FAULT 0x93 ++#define CIK_INTSRC_SDMA_TRAP 0xE0 + + #endif + +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +deleted file mode 100644 +index 3621efb..0000000 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h ++++ /dev/null +@@ -1,568 +0,0 @@ +-/* +- * Copyright 2018 Advanced Micro Devices, Inc. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +- * OTHER DEALINGS IN THE SOFTWARE. +- */ +- +-static const uint32_t cwsr_trap_gfx8_hex[] = { +- 0xbf820001, 0xbf82012b, +- 0xb8f4f802, 0x89748674, +- 0xb8f5f803, 0x8675ff75, +- 0x00000400, 0xbf850017, +- 0xc00a1e37, 0x00000000, +- 0xbf8c007f, 0x87777978, +- 0xbf840005, 0x8f728374, +- 0xb972e0c2, 0xbf800002, +- 0xb9740002, 0xbe801d78, +- 0xb8f5f803, 0x8675ff75, +- 0x000001ff, 0xbf850002, +- 0x80708470, 0x82718071, +- 0x8671ff71, 0x0000ffff, +- 0x8f728374, 0xb972e0c2, +- 0xbf800002, 0xb9740002, +- 0xbe801f70, 0xb8f5f803, +- 0x8675ff75, 0x00000100, +- 0xbf840006, 0xbefa0080, +- 0xb97a0203, 0x8671ff71, +- 0x0000ffff, 0x80f08870, +- 0x82f18071, 0xbefa0080, +- 0xb97a0283, 0xbef60068, +- 0xbef70069, 0xb8fa1c07, +- 0x8e7a9c7a, 0x87717a71, +- 0xb8fa03c7, 0x8e7a9b7a, +- 0x87717a71, 0xb8faf807, +- 0x867aff7a, 0x00007fff, +- 0xb97af807, 0xbef2007e, +- 0xbef3007f, 0xbefe0180, +- 0xbf900004, 0x877a8474, +- 0xb97af802, 0xbf8e0002, +- 0xbf88fffe, 0xbef8007e, +- 0x8679ff7f, 0x0000ffff, +- 0x8779ff79, 0x00040000, +- 0xbefa0080, 0xbefb00ff, +- 0x00807fac, 0x867aff7f, +- 0x08000000, 0x8f7a837a, +- 0x877b7a7b, 0x867aff7f, +- 0x70000000, 0x8f7a817a, +- 0x877b7a7b, 0xbeef007c, +- 0xbeee0080, 0xb8ee2a05, +- 0x806e816e, 0x8e6e8a6e, +- 0xb8fa1605, 0x807a817a, +- 0x8e7a867a, 0x806e7a6e, +- 0xbefa0084, 0xbefa00ff, +- 0x01000000, 0xbefe007c, +- 0xbefc006e, 0xc0611bfc, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xbefe007c, +- 0xbefc006e, 0xc0611c3c, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xbefe007c, +- 0xbefc006e, 0xc0611c7c, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xbefe007c, +- 0xbefc006e, 0xc0611cbc, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xbefe007c, +- 0xbefc006e, 0xc0611cfc, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xbefe007c, +- 0xbefc006e, 0xc0611d3c, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xb8f5f803, +- 0xbefe007c, 0xbefc006e, +- 0xc0611d7c, 0x0000007c, +- 0x806e846e, 0xbefc007e, +- 0xbefe007c, 0xbefc006e, +- 0xc0611dbc, 0x0000007c, +- 0x806e846e, 0xbefc007e, +- 0xbefe007c, 0xbefc006e, +- 0xc0611dfc, 0x0000007c, +- 0x806e846e, 0xbefc007e, +- 0xb8eff801, 0xbefe007c, +- 0xbefc006e, 0xc0611bfc, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xbefe007c, +- 0xbefc006e, 0xc0611b3c, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0xbefe007c, +- 0xbefc006e, 0xc0611b7c, +- 0x0000007c, 0x806e846e, +- 0xbefc007e, 0x867aff7f, +- 0x04000000, 0xbef30080, +- 0x8773737a, 0xb8ee2a05, +- 0x806e816e, 0x8e6e8a6e, +- 0xb8f51605, 0x80758175, +- 0x8e758475, 0x8e7a8275, +- 0xbefa00ff, 0x01000000, +- 0xbef60178, 0x80786e78, +- 0x82798079, 0xbefc0080, +- 0xbe802b00, 0xbe822b02, +- 0xbe842b04, 0xbe862b06, +- 0xbe882b08, 0xbe8a2b0a, +- 0xbe8c2b0c, 0xbe8e2b0e, +- 0xc06b003c, 0x00000000, +- 0xc06b013c, 0x00000010, +- 0xc06b023c, 0x00000020, +- 0xc06b033c, 0x00000030, +- 0x8078c078, 0x82798079, +- 0x807c907c, 0xbf0a757c, +- 0xbf85ffeb, 0xbef80176, +- 0xbeee0080, 0xbefe00c1, +- 0xbeff00c1, 0xbefa00ff, +- 0x01000000, 0xe0724000, +- 0x6e1e0000, 0xe0724100, +- 0x6e1e0100, 0xe0724200, +- 0x6e1e0200, 0xe0724300, +- 0x6e1e0300, 0xbefe00c1, +- 0xbeff00c1, 0xb8f54306, +- 0x8675c175, 0xbf84002c, +- 0xbf8a0000, 0x867aff73, +- 0x04000000, 0xbf840028, +- 0x8e758675, 0x8e758275, +- 0xbefa0075, 0xb8ee2a05, +- 0x806e816e, 0x8e6e8a6e, +- 0xb8fa1605, 0x807a817a, +- 0x8e7a867a, 0x806e7a6e, +- 0x806eff6e, 0x00000080, +- 0xbefa00ff, 0x01000000, +- 0xbefc0080, 0xd28c0002, +- 0x000100c1, 0xd28d0003, +- 0x000204c1, 0xd1060002, +- 0x00011103, 0x7e0602ff, +- 0x00000200, 0xbefc00ff, +- 0x00010000, 0xbe80007b, +- 0x867bff7b, 0xff7fffff, +- 0x877bff7b, 0x00058000, +- 0xd8ec0000, 0x00000002, +- 0xbf8c007f, 0xe0765000, +- 0x6e1e0002, 0x32040702, +- 0xd0c9006a, 0x0000eb02, +- 0xbf87fff7, 0xbefb0000, +- 0xbeee00ff, 0x00000400, +- 0xbefe00c1, 0xbeff00c1, +- 0xb8f52a05, 0x80758175, +- 0x8e758275, 0x8e7a8875, +- 0xbefa00ff, 0x01000000, +- 0xbefc0084, 0xbf0a757c, +- 0xbf840015, 0xbf11017c, +- 0x8075ff75, 0x00001000, +- 0x7e000300, 0x7e020301, +- 0x7e040302, 0x7e060303, +- 0xe0724000, 0x6e1e0000, +- 0xe0724100, 0x6e1e0100, +- 0xe0724200, 0x6e1e0200, +- 0xe0724300, 0x6e1e0300, +- 0x807c847c, 0x806eff6e, +- 0x00000400, 0xbf0a757c, +- 0xbf85ffef, 0xbf9c0000, +- 0xbf8200cd, 0xbef8007e, +- 0x8679ff7f, 0x0000ffff, +- 0x8779ff79, 0x00040000, +- 0xbefa0080, 0xbefb00ff, +- 0x00807fac, 0x8676ff7f, +- 0x08000000, 0x8f768376, +- 0x877b767b, 0x8676ff7f, +- 0x70000000, 0x8f768176, +- 0x877b767b, 0x8676ff7f, +- 0x04000000, 0xbf84001e, +- 0xbefe00c1, 0xbeff00c1, +- 0xb8f34306, 0x8673c173, +- 0xbf840019, 0x8e738673, +- 0x8e738273, 0xbefa0073, +- 0xb8f22a05, 0x80728172, +- 0x8e728a72, 0xb8f61605, +- 0x80768176, 0x8e768676, +- 0x80727672, 0x8072ff72, +- 0x00000080, 0xbefa00ff, +- 0x01000000, 0xbefc0080, +- 0xe0510000, 0x721e0000, +- 0xe0510100, 0x721e0000, +- 0x807cff7c, 0x00000200, +- 0x8072ff72, 0x00000200, +- 0xbf0a737c, 0xbf85fff6, +- 0xbef20080, 0xbefe00c1, +- 0xbeff00c1, 0xb8f32a05, +- 0x80738173, 0x8e738273, +- 0x8e7a8873, 0xbefa00ff, +- 0x01000000, 0xbef60072, +- 0x8072ff72, 0x00000400, +- 0xbefc0084, 0xbf11087c, +- 0x8073ff73, 0x00008000, +- 0xe0524000, 0x721e0000, +- 0xe0524100, 0x721e0100, +- 0xe0524200, 0x721e0200, +- 0xe0524300, 0x721e0300, +- 0xbf8c0f70, 0x7e000300, +- 0x7e020301, 0x7e040302, +- 0x7e060303, 0x807c847c, +- 0x8072ff72, 0x00000400, +- 0xbf0a737c, 0xbf85ffee, +- 0xbf9c0000, 0xe0524000, +- 0x761e0000, 0xe0524100, +- 0x761e0100, 0xe0524200, +- 0x761e0200, 0xe0524300, +- 0x761e0300, 0xb8f22a05, +- 0x80728172, 0x8e728a72, +- 0xb8f61605, 0x80768176, +- 0x8e768676, 0x80727672, +- 0x80f2c072, 0xb8f31605, +- 0x80738173, 0x8e738473, +- 0x8e7a8273, 0xbefa00ff, +- 0x01000000, 0xbefc0073, +- 0xc031003c, 0x00000072, +- 0x80f2c072, 0xbf8c007f, +- 0x80fc907c, 0xbe802d00, +- 0xbe822d02, 0xbe842d04, +- 0xbe862d06, 0xbe882d08, +- 0xbe8a2d0a, 0xbe8c2d0c, +- 0xbe8e2d0e, 0xbf06807c, +- 0xbf84fff1, 0xb8f22a05, +- 0x80728172, 0x8e728a72, +- 0xb8f61605, 0x80768176, +- 0x8e768676, 0x80727672, +- 0xbefa0084, 0xbefa00ff, +- 0x01000000, 0xc0211cfc, +- 0x00000072, 0x80728472, +- 0xc0211c3c, 0x00000072, +- 0x80728472, 0xc0211c7c, +- 0x00000072, 0x80728472, +- 0xc0211bbc, 0x00000072, +- 0x80728472, 0xc0211bfc, +- 0x00000072, 0x80728472, +- 0xc0211d3c, 0x00000072, +- 0x80728472, 0xc0211d7c, +- 0x00000072, 0x80728472, +- 0xc0211a3c, 0x00000072, +- 0x80728472, 0xc0211a7c, +- 0x00000072, 0x80728472, +- 0xc0211dfc, 0x00000072, +- 0x80728472, 0xc0211b3c, +- 0x00000072, 0x80728472, +- 0xc0211b7c, 0x00000072, +- 0x80728472, 0xbf8c007f, +- 0xbefc0073, 0xbefe006e, +- 0xbeff006f, 0x867375ff, +- 0x000003ff, 0xb9734803, +- 0x867375ff, 0xfffff800, +- 0x8f738b73, 0xb973a2c3, +- 0xb977f801, 0x8673ff71, +- 0xf0000000, 0x8f739c73, +- 0x8e739073, 0xbef60080, +- 0x87767376, 0x8673ff71, +- 0x08000000, 0x8f739b73, +- 0x8e738f73, 0x87767376, +- 0x8673ff74, 0x00800000, +- 0x8f739773, 0xb976f807, +- 0x8671ff71, 0x0000ffff, +- 0x86fe7e7e, 0x86ea6a6a, +- 0x8f768374, 0xb976e0c2, +- 0xbf800002, 0xb9740002, +- 0xbf8a0000, 0x95807370, +- 0xbf810000, 0x00000000, +-}; +- +- +-static const uint32_t cwsr_trap_gfx9_hex[] = { +- 0xbf820001, 0xbf82015d, +- 0xb8f8f802, 0x89788678, +- 0xb8f1f803, 0x866eff71, +- 0x00000400, 0xbf850037, +- 0x866eff71, 0x00000800, +- 0xbf850003, 0x866eff71, +- 0x00000100, 0xbf840008, +- 0x866eff78, 0x00002000, +- 0xbf840001, 0xbf810000, +- 0x8778ff78, 0x00002000, +- 0x80ec886c, 0x82ed806d, +- 0xb8eef807, 0x866fff6e, +- 0x001f8000, 0x8e6f8b6f, +- 0x8977ff77, 0xfc000000, +- 0x87776f77, 0x896eff6e, +- 0x001f8000, 0xb96ef807, +- 0xb8f0f812, 0xb8f1f813, +- 0x8ef08870, 0xc0071bb8, +- 0x00000000, 0xbf8cc07f, +- 0xc0071c38, 0x00000008, +- 0xbf8cc07f, 0x86ee6e6e, +- 0xbf840001, 0xbe801d6e, +- 0xb8f1f803, 0x8671ff71, +- 0x000001ff, 0xbf850002, +- 0x806c846c, 0x826d806d, +- 0x866dff6d, 0x0000ffff, +- 0x8f6e8b77, 0x866eff6e, +- 0x001f8000, 0xb96ef807, +- 0x86fe7e7e, 0x86ea6a6a, +- 0x8f6e8378, 0xb96ee0c2, +- 0xbf800002, 0xb9780002, +- 0xbe801f6c, 0x866dff6d, +- 0x0000ffff, 0xbef00080, +- 0xb9700283, 0xb8f02407, +- 0x8e709c70, 0x876d706d, +- 0xb8f003c7, 0x8e709b70, +- 0x876d706d, 0xb8f0f807, +- 0x8670ff70, 0x00007fff, +- 0xb970f807, 0xbeee007e, +- 0xbeef007f, 0xbefe0180, +- 0xbf900004, 0x87708478, +- 0xb970f802, 0xbf8e0002, +- 0xbf88fffe, 0xb8f02a05, +- 0x80708170, 0x8e708a70, +- 0xb8f11605, 0x80718171, +- 0x8e718671, 0x80707170, +- 0x80707e70, 0x8271807f, +- 0x8671ff71, 0x0000ffff, +- 0xc0471cb8, 0x00000040, +- 0xbf8cc07f, 0xc04b1d38, +- 0x00000048, 0xbf8cc07f, +- 0xc0431e78, 0x00000058, +- 0xbf8cc07f, 0xc0471eb8, +- 0x0000005c, 0xbf8cc07f, +- 0xbef4007e, 0x8675ff7f, +- 0x0000ffff, 0x8775ff75, +- 0x00040000, 0xbef60080, +- 0xbef700ff, 0x00807fac, +- 0x8670ff7f, 0x08000000, +- 0x8f708370, 0x87777077, +- 0x8670ff7f, 0x70000000, +- 0x8f708170, 0x87777077, +- 0xbefb007c, 0xbefa0080, +- 0xb8fa2a05, 0x807a817a, +- 0x8e7a8a7a, 0xb8f01605, +- 0x80708170, 0x8e708670, +- 0x807a707a, 0xbef60084, +- 0xbef600ff, 0x01000000, +- 0xbefe007c, 0xbefc007a, +- 0xc0611efa, 0x0000007c, +- 0xbf8cc07f, 0x807a847a, +- 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611b3a, +- 0x0000007c, 0xbf8cc07f, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611b7a, 0x0000007c, +- 0xbf8cc07f, 0x807a847a, +- 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611bba, +- 0x0000007c, 0xbf8cc07f, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611bfa, 0x0000007c, +- 0xbf8cc07f, 0x807a847a, +- 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611e3a, +- 0x0000007c, 0xbf8cc07f, +- 0x807a847a, 0xbefc007e, +- 0xb8f1f803, 0xbefe007c, +- 0xbefc007a, 0xc0611c7a, +- 0x0000007c, 0xbf8cc07f, +- 0x807a847a, 0xbefc007e, +- 0xbefe007c, 0xbefc007a, +- 0xc0611a3a, 0x0000007c, +- 0xbf8cc07f, 0x807a847a, +- 0xbefc007e, 0xbefe007c, +- 0xbefc007a, 0xc0611a7a, +- 0x0000007c, 0xbf8cc07f, +- 0x807a847a, 0xbefc007e, +- 0xb8fbf801, 0xbefe007c, +- 0xbefc007a, 0xc0611efa, +- 0x0000007c, 0xbf8cc07f, +- 0x807a847a, 0xbefc007e, +- 0x8670ff7f, 0x04000000, +- 0xbeef0080, 0x876f6f70, +- 0xb8fa2a05, 0x807a817a, +- 0x8e7a8a7a, 0xb8f11605, +- 0x80718171, 0x8e718471, +- 0x8e768271, 0xbef600ff, +- 0x01000000, 0xbef20174, +- 0x80747a74, 0x82758075, +- 0xbefc0080, 0xbf800000, +- 0xbe802b00, 0xbe822b02, +- 0xbe842b04, 0xbe862b06, +- 0xbe882b08, 0xbe8a2b0a, +- 0xbe8c2b0c, 0xbe8e2b0e, +- 0xc06b003a, 0x00000000, +- 0xbf8cc07f, 0xc06b013a, +- 0x00000010, 0xbf8cc07f, +- 0xc06b023a, 0x00000020, +- 0xbf8cc07f, 0xc06b033a, +- 0x00000030, 0xbf8cc07f, +- 0x8074c074, 0x82758075, +- 0x807c907c, 0xbf0a717c, +- 0xbf85ffe7, 0xbef40172, +- 0xbefa0080, 0xbefe00c1, +- 0xbeff00c1, 0xbee80080, +- 0xbee90080, 0xbef600ff, +- 0x01000000, 0xe0724000, +- 0x7a1d0000, 0xe0724100, +- 0x7a1d0100, 0xe0724200, +- 0x7a1d0200, 0xe0724300, +- 0x7a1d0300, 0xbefe00c1, +- 0xbeff00c1, 0xb8f14306, +- 0x8671c171, 0xbf84002c, +- 0xbf8a0000, 0x8670ff6f, +- 0x04000000, 0xbf840028, +- 0x8e718671, 0x8e718271, +- 0xbef60071, 0xb8fa2a05, +- 0x807a817a, 0x8e7a8a7a, +- 0xb8f01605, 0x80708170, +- 0x8e708670, 0x807a707a, +- 0x807aff7a, 0x00000080, +- 0xbef600ff, 0x01000000, +- 0xbefc0080, 0xd28c0002, +- 0x000100c1, 0xd28d0003, +- 0x000204c1, 0xd1060002, +- 0x00011103, 0x7e0602ff, +- 0x00000200, 0xbefc00ff, +- 0x00010000, 0xbe800077, +- 0x8677ff77, 0xff7fffff, +- 0x8777ff77, 0x00058000, +- 0xd8ec0000, 0x00000002, +- 0xbf8cc07f, 0xe0765000, +- 0x7a1d0002, 0x68040702, +- 0xd0c9006a, 0x0000e302, +- 0xbf87fff7, 0xbef70000, +- 0xbefa00ff, 0x00000400, +- 0xbefe00c1, 0xbeff00c1, +- 0xb8f12a05, 0x80718171, +- 0x8e718271, 0x8e768871, +- 0xbef600ff, 0x01000000, +- 0xbefc0084, 0xbf0a717c, +- 0xbf840015, 0xbf11017c, +- 0x8071ff71, 0x00001000, +- 0x7e000300, 0x7e020301, +- 0x7e040302, 0x7e060303, +- 0xe0724000, 0x7a1d0000, +- 0xe0724100, 0x7a1d0100, +- 0xe0724200, 0x7a1d0200, +- 0xe0724300, 0x7a1d0300, +- 0x807c847c, 0x807aff7a, +- 0x00000400, 0xbf0a717c, +- 0xbf85ffef, 0xbf9c0000, +- 0xbf8200dc, 0xbef4007e, +- 0x8675ff7f, 0x0000ffff, +- 0x8775ff75, 0x00040000, +- 0xbef60080, 0xbef700ff, +- 0x00807fac, 0x866eff7f, +- 0x08000000, 0x8f6e836e, +- 0x87776e77, 0x866eff7f, +- 0x70000000, 0x8f6e816e, +- 0x87776e77, 0x866eff7f, +- 0x04000000, 0xbf84001e, +- 0xbefe00c1, 0xbeff00c1, +- 0xb8ef4306, 0x866fc16f, +- 0xbf840019, 0x8e6f866f, +- 0x8e6f826f, 0xbef6006f, +- 0xb8f82a05, 0x80788178, +- 0x8e788a78, 0xb8ee1605, +- 0x806e816e, 0x8e6e866e, +- 0x80786e78, 0x8078ff78, +- 0x00000080, 0xbef600ff, +- 0x01000000, 0xbefc0080, +- 0xe0510000, 0x781d0000, +- 0xe0510100, 0x781d0000, +- 0x807cff7c, 0x00000200, +- 0x8078ff78, 0x00000200, +- 0xbf0a6f7c, 0xbf85fff6, +- 0xbef80080, 0xbefe00c1, +- 0xbeff00c1, 0xb8ef2a05, +- 0x806f816f, 0x8e6f826f, +- 0x8e76886f, 0xbef600ff, +- 0x01000000, 0xbeee0078, +- 0x8078ff78, 0x00000400, +- 0xbefc0084, 0xbf11087c, +- 0x806fff6f, 0x00008000, +- 0xe0524000, 0x781d0000, +- 0xe0524100, 0x781d0100, +- 0xe0524200, 0x781d0200, +- 0xe0524300, 0x781d0300, +- 0xbf8c0f70, 0x7e000300, +- 0x7e020301, 0x7e040302, +- 0x7e060303, 0x807c847c, +- 0x8078ff78, 0x00000400, +- 0xbf0a6f7c, 0xbf85ffee, +- 0xbf9c0000, 0xe0524000, +- 0x6e1d0000, 0xe0524100, +- 0x6e1d0100, 0xe0524200, +- 0x6e1d0200, 0xe0524300, +- 0x6e1d0300, 0xb8f82a05, +- 0x80788178, 0x8e788a78, +- 0xb8ee1605, 0x806e816e, +- 0x8e6e866e, 0x80786e78, +- 0x80f8c078, 0xb8ef1605, +- 0x806f816f, 0x8e6f846f, +- 0x8e76826f, 0xbef600ff, +- 0x01000000, 0xbefc006f, +- 0xc031003a, 0x00000078, +- 0x80f8c078, 0xbf8cc07f, +- 0x80fc907c, 0xbf800000, +- 0xbe802d00, 0xbe822d02, +- 0xbe842d04, 0xbe862d06, +- 0xbe882d08, 0xbe8a2d0a, +- 0xbe8c2d0c, 0xbe8e2d0e, +- 0xbf06807c, 0xbf84fff0, +- 0xb8f82a05, 0x80788178, +- 0x8e788a78, 0xb8ee1605, +- 0x806e816e, 0x8e6e866e, +- 0x80786e78, 0xbef60084, +- 0xbef600ff, 0x01000000, +- 0xc0211bfa, 0x00000078, +- 0x80788478, 0xc0211b3a, +- 0x00000078, 0x80788478, +- 0xc0211b7a, 0x00000078, +- 0x80788478, 0xc0211eba, +- 0x00000078, 0x80788478, +- 0xc0211efa, 0x00000078, +- 0x80788478, 0xc0211c3a, +- 0x00000078, 0x80788478, +- 0xc0211c7a, 0x00000078, +- 0x80788478, 0xc0211a3a, +- 0x00000078, 0x80788478, +- 0xc0211a7a, 0x00000078, +- 0x80788478, 0xc0211cfa, +- 0x00000078, 0x80788478, +- 0xbf8cc07f, 0xbefc006f, +- 0xbefe007a, 0xbeff007b, +- 0x866f71ff, 0x000003ff, +- 0xb96f4803, 0x866f71ff, +- 0xfffff800, 0x8f6f8b6f, +- 0xb96fa2c3, 0xb973f801, +- 0xb8ee2a05, 0x806e816e, +- 0x8e6e8a6e, 0xb8ef1605, +- 0x806f816f, 0x8e6f866f, +- 0x806e6f6e, 0x806e746e, +- 0x826f8075, 0x866fff6f, +- 0x0000ffff, 0xc0071cb7, +- 0x00000040, 0xc00b1d37, +- 0x00000048, 0xc0031e77, +- 0x00000058, 0xc0071eb7, +- 0x0000005c, 0xbf8cc07f, +- 0x866fff6d, 0xf0000000, +- 0x8f6f9c6f, 0x8e6f906f, +- 0xbeee0080, 0x876e6f6e, +- 0x866fff6d, 0x08000000, +- 0x8f6f9b6f, 0x8e6f8f6f, +- 0x876e6f6e, 0x866fff70, +- 0x00800000, 0x8f6f976f, +- 0xb96ef807, 0x866dff6d, +- 0x0000ffff, 0x86fe7e7e, +- 0x86ea6a6a, 0x8f6e8370, +- 0xb96ee0c2, 0xbf800002, +- 0xb9700002, 0xbf8a0000, +- 0x95806f6c, 0xbf810000, +-}; +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +index abe1a5d..751cc2e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +@@ -20,12 +20,9 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + +-/* To compile this assembly code: +- * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex +- */ +- +-/* HW (VI) source code for CWSR trap handler */ +-/* Version 18 + multiple trap handler */ ++#if 0 ++HW (VI) source code for CWSR trap handler ++#Version 18 + multiple trap handler + + // this performance-optimal version was originally from Seven Xu at SRDC + +@@ -77,7 +74,7 @@ var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_D + /*************************************************************************/ + /* control on how to run the shader */ + /*************************************************************************/ +-//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) ++//any hack that needs to be made to run this code in EMU (either becasue various EMU code are not ready or no compute save & restore in EMU run) + var EMU_RUN_HACK = 0 + var EMU_RUN_HACK_RESTORE_NORMAL = 0 + var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +@@ -91,9 +88,9 @@ var WG_BASE_ADDR_HI = 0x0 + var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem + var CTX_SAVE_CONTROL = 0x0 + var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +-var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) ++var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either becasue various RTL code are not ready or no compute save & restore in RTL run) + var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +-var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes ++var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //becasue TC EMU curently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes + var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing + + /**************************************************************************/ +@@ -101,12 +98,7 @@ var SWIZZLE_EN = 0 //whether we use swi + /**************************************************************************/ + var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 + var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +-var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 + var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +-var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0 +-var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 +-var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 +-var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 + + var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 + var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +@@ -157,7 +149,7 @@ var s_save_spi_init_lo = exec_lo + var s_save_spi_init_hi = exec_hi + + //tba_lo and tba_hi need to be saved/restored +-var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} ++var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} + var s_save_pc_hi = ttmp1 + var s_save_exec_lo = ttmp2 + var s_save_exec_hi = ttmp3 +@@ -255,7 +247,7 @@ if (!EMU_RUN_HACK) + s_waitcnt lgkmcnt(0) + s_or_b32 ttmp7, ttmp8, ttmp9 + s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set +- set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) ++ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler + + L_NO_NEXT_TRAP: +@@ -266,7 +258,7 @@ L_NO_NEXT_TRAP: + s_addc_u32 ttmp1, ttmp1, 0 + L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF +- set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) ++ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) + s_rfe_b64 [ttmp0, ttmp1] + end + // ********* End handling of non-CWSR traps ******************* +@@ -327,10 +319,6 @@ end + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + +- // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. +- s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) +- s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp +- + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + +@@ -1019,6 +1007,8 @@ end + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + ++ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS ++ + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) +@@ -1054,12 +1044,11 @@ end + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + +- s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 +- set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu ++ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + +- s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time ++ s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG //FIXME not performance-optimal at this time + + if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d +@@ -1139,10 +1128,257 @@ function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes + end + +-function set_status_without_spi_prio(status, tmp) +- // Do not restore STATUS.SPI_PRIO since scheduler may have raised it. +- s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT +- s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp +- s_nop 0x2 // avoid S_SETREG => S_SETREG hazard +- s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status +-end ++ ++#endif ++ ++static const uint32_t cwsr_trap_gfx8_hex[] = { ++ 0xbf820001, 0xbf820123, ++ 0xb8f4f802, 0x89748674, ++ 0xb8f5f803, 0x8675ff75, ++ 0x00000400, 0xbf850011, ++ 0xc00a1e37, 0x00000000, ++ 0xbf8c007f, 0x87777978, ++ 0xbf840002, 0xb974f802, ++ 0xbe801d78, 0xb8f5f803, ++ 0x8675ff75, 0x000001ff, ++ 0xbf850002, 0x80708470, ++ 0x82718071, 0x8671ff71, ++ 0x0000ffff, 0xb974f802, ++ 0xbe801f70, 0xb8f5f803, ++ 0x8675ff75, 0x00000100, ++ 0xbf840006, 0xbefa0080, ++ 0xb97a0203, 0x8671ff71, ++ 0x0000ffff, 0x80f08870, ++ 0x82f18071, 0xbefa0080, ++ 0xb97a0283, 0xbef60068, ++ 0xbef70069, 0xb8fa1c07, ++ 0x8e7a9c7a, 0x87717a71, ++ 0xb8fa03c7, 0x8e7a9b7a, ++ 0x87717a71, 0xb8faf807, ++ 0x867aff7a, 0x00007fff, ++ 0xb97af807, 0xbef2007e, ++ 0xbef3007f, 0xbefe0180, ++ 0xbf900004, 0xbf8e0002, ++ 0xbf88fffe, 0xbef8007e, ++ 0x8679ff7f, 0x0000ffff, ++ 0x8779ff79, 0x00040000, ++ 0xbefa0080, 0xbefb00ff, ++ 0x00807fac, 0x867aff7f, ++ 0x08000000, 0x8f7a837a, ++ 0x877b7a7b, 0x867aff7f, ++ 0x70000000, 0x8f7a817a, ++ 0x877b7a7b, 0xbeef007c, ++ 0xbeee0080, 0xb8ee2a05, ++ 0x806e816e, 0x8e6e8a6e, ++ 0xb8fa1605, 0x807a817a, ++ 0x8e7a867a, 0x806e7a6e, ++ 0xbefa0084, 0xbefa00ff, ++ 0x01000000, 0xbefe007c, ++ 0xbefc006e, 0xc0611bfc, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc006e, 0xc0611c3c, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc006e, 0xc0611c7c, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc006e, 0xc0611cbc, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc006e, 0xc0611cfc, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc006e, 0xc0611d3c, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xb8f5f803, ++ 0xbefe007c, 0xbefc006e, ++ 0xc0611d7c, 0x0000007c, ++ 0x806e846e, 0xbefc007e, ++ 0xbefe007c, 0xbefc006e, ++ 0xc0611dbc, 0x0000007c, ++ 0x806e846e, 0xbefc007e, ++ 0xbefe007c, 0xbefc006e, ++ 0xc0611dfc, 0x0000007c, ++ 0x806e846e, 0xbefc007e, ++ 0xb8eff801, 0xbefe007c, ++ 0xbefc006e, 0xc0611bfc, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc006e, 0xc0611b3c, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc006e, 0xc0611b7c, ++ 0x0000007c, 0x806e846e, ++ 0xbefc007e, 0x867aff7f, ++ 0x04000000, 0xbef30080, ++ 0x8773737a, 0xb8ee2a05, ++ 0x806e816e, 0x8e6e8a6e, ++ 0xb8f51605, 0x80758175, ++ 0x8e758475, 0x8e7a8275, ++ 0xbefa00ff, 0x01000000, ++ 0xbef60178, 0x80786e78, ++ 0x82798079, 0xbefc0080, ++ 0xbe802b00, 0xbe822b02, ++ 0xbe842b04, 0xbe862b06, ++ 0xbe882b08, 0xbe8a2b0a, ++ 0xbe8c2b0c, 0xbe8e2b0e, ++ 0xc06b003c, 0x00000000, ++ 0xc06b013c, 0x00000010, ++ 0xc06b023c, 0x00000020, ++ 0xc06b033c, 0x00000030, ++ 0x8078c078, 0x82798079, ++ 0x807c907c, 0xbf0a757c, ++ 0xbf85ffeb, 0xbef80176, ++ 0xbeee0080, 0xbefe00c1, ++ 0xbeff00c1, 0xbefa00ff, ++ 0x01000000, 0xe0724000, ++ 0x6e1e0000, 0xe0724100, ++ 0x6e1e0100, 0xe0724200, ++ 0x6e1e0200, 0xe0724300, ++ 0x6e1e0300, 0xbefe00c1, ++ 0xbeff00c1, 0xb8f54306, ++ 0x8675c175, 0xbf84002c, ++ 0xbf8a0000, 0x867aff73, ++ 0x04000000, 0xbf840028, ++ 0x8e758675, 0x8e758275, ++ 0xbefa0075, 0xb8ee2a05, ++ 0x806e816e, 0x8e6e8a6e, ++ 0xb8fa1605, 0x807a817a, ++ 0x8e7a867a, 0x806e7a6e, ++ 0x806eff6e, 0x00000080, ++ 0xbefa00ff, 0x01000000, ++ 0xbefc0080, 0xd28c0002, ++ 0x000100c1, 0xd28d0003, ++ 0x000204c1, 0xd1060002, ++ 0x00011103, 0x7e0602ff, ++ 0x00000200, 0xbefc00ff, ++ 0x00010000, 0xbe80007b, ++ 0x867bff7b, 0xff7fffff, ++ 0x877bff7b, 0x00058000, ++ 0xd8ec0000, 0x00000002, ++ 0xbf8c007f, 0xe0765000, ++ 0x6e1e0002, 0x32040702, ++ 0xd0c9006a, 0x0000eb02, ++ 0xbf87fff7, 0xbefb0000, ++ 0xbeee00ff, 0x00000400, ++ 0xbefe00c1, 0xbeff00c1, ++ 0xb8f52a05, 0x80758175, ++ 0x8e758275, 0x8e7a8875, ++ 0xbefa00ff, 0x01000000, ++ 0xbefc0084, 0xbf0a757c, ++ 0xbf840015, 0xbf11017c, ++ 0x8075ff75, 0x00001000, ++ 0x7e000300, 0x7e020301, ++ 0x7e040302, 0x7e060303, ++ 0xe0724000, 0x6e1e0000, ++ 0xe0724100, 0x6e1e0100, ++ 0xe0724200, 0x6e1e0200, ++ 0xe0724300, 0x6e1e0300, ++ 0x807c847c, 0x806eff6e, ++ 0x00000400, 0xbf0a757c, ++ 0xbf85ffef, 0xbf9c0000, ++ 0xbf8200ca, 0xbef8007e, ++ 0x8679ff7f, 0x0000ffff, ++ 0x8779ff79, 0x00040000, ++ 0xbefa0080, 0xbefb00ff, ++ 0x00807fac, 0x8676ff7f, ++ 0x08000000, 0x8f768376, ++ 0x877b767b, 0x8676ff7f, ++ 0x70000000, 0x8f768176, ++ 0x877b767b, 0x8676ff7f, ++ 0x04000000, 0xbf84001e, ++ 0xbefe00c1, 0xbeff00c1, ++ 0xb8f34306, 0x8673c173, ++ 0xbf840019, 0x8e738673, ++ 0x8e738273, 0xbefa0073, ++ 0xb8f22a05, 0x80728172, ++ 0x8e728a72, 0xb8f61605, ++ 0x80768176, 0x8e768676, ++ 0x80727672, 0x8072ff72, ++ 0x00000080, 0xbefa00ff, ++ 0x01000000, 0xbefc0080, ++ 0xe0510000, 0x721e0000, ++ 0xe0510100, 0x721e0000, ++ 0x807cff7c, 0x00000200, ++ 0x8072ff72, 0x00000200, ++ 0xbf0a737c, 0xbf85fff6, ++ 0xbef20080, 0xbefe00c1, ++ 0xbeff00c1, 0xb8f32a05, ++ 0x80738173, 0x8e738273, ++ 0x8e7a8873, 0xbefa00ff, ++ 0x01000000, 0xbef60072, ++ 0x8072ff72, 0x00000400, ++ 0xbefc0084, 0xbf11087c, ++ 0x8073ff73, 0x00008000, ++ 0xe0524000, 0x721e0000, ++ 0xe0524100, 0x721e0100, ++ 0xe0524200, 0x721e0200, ++ 0xe0524300, 0x721e0300, ++ 0xbf8c0f70, 0x7e000300, ++ 0x7e020301, 0x7e040302, ++ 0x7e060303, 0x807c847c, ++ 0x8072ff72, 0x00000400, ++ 0xbf0a737c, 0xbf85ffee, ++ 0xbf9c0000, 0xe0524000, ++ 0x761e0000, 0xe0524100, ++ 0x761e0100, 0xe0524200, ++ 0x761e0200, 0xe0524300, ++ 0x761e0300, 0xb8f22a05, ++ 0x80728172, 0x8e728a72, ++ 0xb8f61605, 0x80768176, ++ 0x8e768676, 0x80727672, ++ 0x80f2c072, 0xb8f31605, ++ 0x80738173, 0x8e738473, ++ 0x8e7a8273, 0xbefa00ff, ++ 0x01000000, 0xbefc0073, ++ 0xc031003c, 0x00000072, ++ 0x80f2c072, 0xbf8c007f, ++ 0x80fc907c, 0xbe802d00, ++ 0xbe822d02, 0xbe842d04, ++ 0xbe862d06, 0xbe882d08, ++ 0xbe8a2d0a, 0xbe8c2d0c, ++ 0xbe8e2d0e, 0xbf06807c, ++ 0xbf84fff1, 0xb8f22a05, ++ 0x80728172, 0x8e728a72, ++ 0xb8f61605, 0x80768176, ++ 0x8e768676, 0x80727672, ++ 0xbefa0084, 0xbefa00ff, ++ 0x01000000, 0xc0211cfc, ++ 0x00000072, 0x80728472, ++ 0xc0211c3c, 0x00000072, ++ 0x80728472, 0xc0211c7c, ++ 0x00000072, 0x80728472, ++ 0xc0211bbc, 0x00000072, ++ 0x80728472, 0xc0211bfc, ++ 0x00000072, 0x80728472, ++ 0xc0211d3c, 0x00000072, ++ 0x80728472, 0xc0211d7c, ++ 0x00000072, 0x80728472, ++ 0xc0211a3c, 0x00000072, ++ 0x80728472, 0xc0211a7c, ++ 0x00000072, 0x80728472, ++ 0xc0211dfc, 0x00000072, ++ 0x80728472, 0xc0211b3c, ++ 0x00000072, 0x80728472, ++ 0xc0211b7c, 0x00000072, ++ 0x80728472, 0xbf8c007f, ++ 0x8671ff71, 0x0000ffff, ++ 0xbefc0073, 0xbefe006e, ++ 0xbeff006f, 0x867375ff, ++ 0x000003ff, 0xb9734803, ++ 0x867375ff, 0xfffff800, ++ 0x8f738b73, 0xb973a2c3, ++ 0xb977f801, 0x8673ff71, ++ 0xf0000000, 0x8f739c73, ++ 0x8e739073, 0xbef60080, ++ 0x87767376, 0x8673ff71, ++ 0x08000000, 0x8f739b73, ++ 0x8e738f73, 0x87767376, ++ 0x8673ff74, 0x00800000, ++ 0x8f739773, 0xb976f807, ++ 0x86fe7e7e, 0x86ea6a6a, ++ 0xb974f802, 0xbf8a0000, ++ 0x95807370, 0xbf810000, ++}; ++ +diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +index 0bb9c57..bd2957c 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm ++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +@@ -20,12 +20,9 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + +-/* To compile this assembly code: +- * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex +- */ +- +-/* HW (GFX9) source code for CWSR trap handler */ +-/* Version 18 + multiple trap handler */ ++#if 0 ++HW (GFX9) source code for CWSR trap handler ++#Version 18 + multiple trap handler + + // this performance-optimal version was originally from Seven Xu at SRDC + +@@ -77,7 +74,7 @@ var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_D + /*************************************************************************/ + /* control on how to run the shader */ + /*************************************************************************/ +-//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) ++//any hack that needs to be made to run this code in EMU (either becasue various EMU code are not ready or no compute save & restore in EMU run) + var EMU_RUN_HACK = 0 + var EMU_RUN_HACK_RESTORE_NORMAL = 0 + var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +@@ -89,9 +86,9 @@ var WG_BASE_ADDR_HI = 0x0 + var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem + var CTX_SAVE_CONTROL = 0x0 + var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +-var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) ++var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either becasue various RTL code are not ready or no compute save & restore in RTL run) + var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +-var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes ++var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //becasue TC EMU curently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes + var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing + var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency + +@@ -100,13 +97,8 @@ var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing + /**************************************************************************/ + var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 + var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +-var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 + var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 + var SQ_WAVE_STATUS_HALT_MASK = 0x2000 +-var SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT = 0 +-var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 +-var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 +-var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 + + var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 + var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +@@ -130,14 +122,11 @@ var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + + var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME + var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +-var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 + var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + + var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 + var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + +-var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data +-var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 + + /* Save */ + var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +@@ -158,11 +147,11 @@ var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + var s_save_spi_init_lo = exec_lo + var s_save_spi_init_hi = exec_hi + +-var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} ++var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} + var s_save_pc_hi = ttmp1 + var s_save_exec_lo = ttmp2 + var s_save_exec_hi = ttmp3 +-var s_save_tmp = ttmp4 ++var s_save_status = ttmp4 + var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine + var s_save_xnack_mask_lo = ttmp6 + var s_save_xnack_mask_hi = ttmp7 +@@ -170,12 +159,11 @@ var s_save_buf_rsrc0 = ttmp8 + var s_save_buf_rsrc1 = ttmp9 + var s_save_buf_rsrc2 = ttmp10 + var s_save_buf_rsrc3 = ttmp11 +-var s_save_status = ttmp12 ++ + var s_save_mem_offset = ttmp14 + var s_save_alloc_size = s_save_trapsts //conflict ++var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) + var s_save_m0 = ttmp15 +-var s_save_ttmps_lo = s_save_tmp //no conflict +-var s_save_ttmps_hi = s_save_trapsts //no conflict + + /* Restore */ + var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +@@ -198,7 +186,7 @@ var s_restore_spi_init_hi = exec_hi + + var s_restore_mem_offset = ttmp12 + var s_restore_alloc_size = ttmp3 +-var s_restore_tmp = ttmp2 ++var s_restore_tmp = ttmp6 + var s_restore_mem_offset_save = s_restore_tmp //no conflict + + var s_restore_m0 = s_restore_alloc_size //no conflict +@@ -217,8 +205,6 @@ var s_restore_buf_rsrc0 = ttmp8 + var s_restore_buf_rsrc1 = ttmp9 + var s_restore_buf_rsrc2 = ttmp10 + var s_restore_buf_rsrc3 = ttmp11 +-var s_restore_ttmps_lo = s_restore_tmp //no conflict +-var s_restore_ttmps_hi = s_restore_alloc_size //no conflict + + /**************************************************************************/ + /* trap handler entry points */ +@@ -249,25 +235,25 @@ L_SKIP_RESTORE: + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) +- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save ++ s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* + if (!EMU_RUN_HACK) + // Illegal instruction is a non-maskable exception which blocks context save. + // Halt the wavefront and return from the trap. +- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK ++ s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_HALT_WAVE + + // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. + // Instead, halt the wavefront and return from the trap. +- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK +- s_cbranch_scc0 L_FETCH_2ND_TRAP ++ s_and_b32 ttmp8, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK ++ s_cbranch_scc0 L_NO_MEM_VIOL + + L_HALT_WAVE: + // If STATUS.HALT is set then this fault must come from SQC instruction fetch. + // We cannot prevent further faults so just terminate the wavefront. +- s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK ++ s_and_b32 ttmp8, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_ALREADY_HALTED + s_endpgm + L_NOT_ALREADY_HALTED: +@@ -278,31 +264,19 @@ L_NOT_ALREADY_HALTED: + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + +-L_FETCH_2ND_TRAP: +- // Preserve and clear scalar XNACK state before issuing scalar reads. +- // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. +- s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) +- s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK +- s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) +- s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK +- s_or_b32 ttmp11, ttmp11, ttmp3 +- +- s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK +- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 +- +- // Read second-level TBA/TMA from first-level TMA and jump if available. +- // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) +- // ttmp12 holds SQ_WAVE_STATUS +- s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) +- s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) +- s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 +- s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA +- s_waitcnt lgkmcnt(0) +- s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA +- s_waitcnt lgkmcnt(0) +- s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] +- s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set +- s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler ++ s_branch L_EXCP_CASE ++ ++L_NO_MEM_VIOL: ++ /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ ++ s_getreg_b32 ttmp14,hwreg(HW_REG_SQ_SHADER_TMA_LO) ++ s_getreg_b32 ttmp15,hwreg(HW_REG_SQ_SHADER_TMA_HI) ++ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 ++ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [ttmp14, ttmp15], 0 ++ s_waitcnt lgkmcnt(0) ++ s_or_b32 ttmp7, ttmp8, ttmp9 ++ s_cbranch_scc0 L_NO_NEXT_TRAP //next level trap handler not been set ++ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) ++ s_setpc_b64 [ttmp8,ttmp9] //jump to next level trap handler + + L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) +@@ -312,18 +286,8 @@ L_NO_NEXT_TRAP: + s_addc_u32 ttmp1, ttmp1, 0 + L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF +- +- // Restore SQ_WAVE_IB_STS. +- s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) +- s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK +- s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 +- +- // Restore SQ_WAVE_STATUS. +- s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 +- s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 +- set_status_without_spi_prio(s_save_status, ttmp2) +- +- s_rfe_b64 [ttmp0, ttmp1] ++ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //restore HW status(SCC) ++ s_rfe_b64 [ttmp0, ttmp1] + end + // ********* End handling of non-CWSR traps ******************* + +@@ -343,6 +307,8 @@ end + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + ++ s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK ++ s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi //save XNACK must before any memory operation + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp +@@ -370,10 +336,6 @@ end + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + +- // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. +- s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) +- s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp +- + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + +@@ -388,6 +350,7 @@ if G8SR_DEBUG_TIMESTAMP + s_waitcnt lgkmcnt(0) + end + ++ /* setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 +@@ -405,24 +368,7 @@ end + else + end + +- // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic +- // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 +- get_vgpr_size_bytes(s_save_ttmps_lo) +- get_sgpr_size_bytes(s_save_ttmps_hi) +- s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi +- s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo +- s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 +- s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF +- s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 +- ack_sqc_store_workaround() +- s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 +- ack_sqc_store_workaround() +- s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 +- ack_sqc_store_workaround() +- s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 +- ack_sqc_store_workaround() + +- /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE +@@ -479,8 +425,8 @@ end + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + +- write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO +- write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI ++ write_hwreg_to_mem(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO ++ write_hwreg_to_mem(s_save_xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE +@@ -556,8 +502,6 @@ end + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF +- s_mov_b32 xnack_mask_lo, 0x0 +- s_mov_b32 xnack_mask_hi, 0x0 + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? +@@ -1071,6 +1015,8 @@ end + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + ++ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS ++ + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) +@@ -1092,21 +1038,6 @@ end + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode +- +- // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic +- // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 +- get_vgpr_size_bytes(s_restore_ttmps_lo) +- get_sgpr_size_bytes(s_restore_ttmps_hi) +- s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi +- s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 +- s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 +- s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF +- s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 +- s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 +- s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 +- s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 +- s_waitcnt lgkmcnt(0) +- + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT +@@ -1121,12 +1052,11 @@ end + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + +- s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 +- set_status_without_spi_prio(s_restore_status, s_restore_tmp) // SCC is included, which is changed by previous salu ++ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + +- s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time ++ s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG //FIXME not performance-optimal at this time + + if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d +@@ -1155,7 +1085,9 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 +- ack_sqc_store_workaround() ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo + end +@@ -1165,13 +1097,21 @@ end + function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 +- ack_sqc_store_workaround() ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 +- ack_sqc_store_workaround() ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 +- ack_sqc_store_workaround() ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 +- ack_sqc_store_workaround() ++if ACK_SQC_STORE ++ s_waitcnt lgkmcnt(0) ++end + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc + end +@@ -1211,16 +1151,261 @@ function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes + end + +-function ack_sqc_store_workaround +- if ACK_SQC_STORE +- s_waitcnt lgkmcnt(0) +- end +-end + +-function set_status_without_spi_prio(status, tmp) +- // Do not restore STATUS.SPI_PRIO since scheduler may have raised it. +- s_lshr_b32 tmp, status, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT +- s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE), tmp +- s_nop 0x2 // avoid S_SETREG => S_SETREG hazard +- s_setreg_b32 hwreg(HW_REG_STATUS, SQ_WAVE_STATUS_PRE_SPI_PRIO_SHIFT, SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE), status +-end ++ ++#endif ++ ++static const uint32_t cwsr_trap_gfx9_hex[] = { ++ 0xbf820001, 0xbf820130, ++ 0xb8f0f802, 0x89708670, ++ 0xb8f1f803, 0x8674ff71, ++ 0x00000400, 0xbf850023, ++ 0x8674ff71, 0x00000800, ++ 0xbf850003, 0x8674ff71, ++ 0x00000100, 0xbf840009, ++ 0x8674ff70, 0x00002000, ++ 0xbf840001, 0xbf810000, ++ 0x8770ff70, 0x00002000, ++ 0x80ec886c, 0x82ed806d, ++ 0xbf820010, 0xb8faf812, ++ 0xb8fbf813, 0x8efa887a, ++ 0xc00a1d3d, 0x00000000, ++ 0xbf8cc07f, 0x87737574, ++ 0xbf840002, 0xb970f802, ++ 0xbe801d74, 0xb8f1f803, ++ 0x8671ff71, 0x000001ff, ++ 0xbf850002, 0x806c846c, ++ 0x826d806d, 0x866dff6d, ++ 0x0000ffff, 0xb970f802, ++ 0xbe801f6c, 0x866dff6d, ++ 0x0000ffff, 0xbef60080, ++ 0xb9760283, 0xbef20068, ++ 0xbef30069, 0xb8f62407, ++ 0x8e769c76, 0x876d766d, ++ 0xb8f603c7, 0x8e769b76, ++ 0x876d766d, 0xb8f6f807, ++ 0x8676ff76, 0x00007fff, ++ 0xb976f807, 0xbeee007e, ++ 0xbeef007f, 0xbefe0180, ++ 0xbf900004, 0xbf8e0002, ++ 0xbf88fffe, 0xbef4007e, ++ 0x8675ff7f, 0x0000ffff, ++ 0x8775ff75, 0x00040000, ++ 0xbef60080, 0xbef700ff, ++ 0x00807fac, 0x8676ff7f, ++ 0x08000000, 0x8f768376, ++ 0x87777677, 0x8676ff7f, ++ 0x70000000, 0x8f768176, ++ 0x87777677, 0xbefb007c, ++ 0xbefa0080, 0xb8fa2a05, ++ 0x807a817a, 0x8e7a8a7a, ++ 0xb8f61605, 0x80768176, ++ 0x8e768676, 0x807a767a, ++ 0xbef60084, 0xbef600ff, ++ 0x01000000, 0xbefe007c, ++ 0xbefc007a, 0xc0611efa, ++ 0x0000007c, 0xbf8cc07f, ++ 0x807a847a, 0xbefc007e, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611b3a, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc007a, 0xc0611b7a, ++ 0x0000007c, 0xbf8cc07f, ++ 0x807a847a, 0xbefc007e, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611bba, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc007a, 0xc0611bfa, ++ 0x0000007c, 0xbf8cc07f, ++ 0x807a847a, 0xbefc007e, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611c3a, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0xb8f1f803, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611c7a, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0xbefe007c, ++ 0xbefc007a, 0xc0611cba, ++ 0x0000007c, 0xbf8cc07f, ++ 0x807a847a, 0xbefc007e, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611cfa, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0xb8fbf801, ++ 0xbefe007c, 0xbefc007a, ++ 0xc0611efa, 0x0000007c, ++ 0xbf8cc07f, 0x807a847a, ++ 0xbefc007e, 0x8676ff7f, ++ 0x04000000, 0xbeef0080, ++ 0x876f6f76, 0xb8fa2a05, ++ 0x807a817a, 0x8e7a8a7a, ++ 0xb8f11605, 0x80718171, ++ 0x8e718471, 0x8e768271, ++ 0xbef600ff, 0x01000000, ++ 0xbef20174, 0x80747a74, ++ 0x82758075, 0xbefc0080, ++ 0xbf800000, 0xbe802b00, ++ 0xbe822b02, 0xbe842b04, ++ 0xbe862b06, 0xbe882b08, ++ 0xbe8a2b0a, 0xbe8c2b0c, ++ 0xbe8e2b0e, 0xc06b003a, ++ 0x00000000, 0xbf8cc07f, ++ 0xc06b013a, 0x00000010, ++ 0xbf8cc07f, 0xc06b023a, ++ 0x00000020, 0xbf8cc07f, ++ 0xc06b033a, 0x00000030, ++ 0xbf8cc07f, 0x8074c074, ++ 0x82758075, 0x807c907c, ++ 0xbf0a717c, 0xbf85ffe7, ++ 0xbef40172, 0xbefa0080, ++ 0xbefe00c1, 0xbeff00c1, ++ 0xbef600ff, 0x01000000, ++ 0xe0724000, 0x7a1d0000, ++ 0xe0724100, 0x7a1d0100, ++ 0xe0724200, 0x7a1d0200, ++ 0xe0724300, 0x7a1d0300, ++ 0xbefe00c1, 0xbeff00c1, ++ 0xb8f14306, 0x8671c171, ++ 0xbf84002c, 0xbf8a0000, ++ 0x8676ff6f, 0x04000000, ++ 0xbf840028, 0x8e718671, ++ 0x8e718271, 0xbef60071, ++ 0xb8fa2a05, 0x807a817a, ++ 0x8e7a8a7a, 0xb8f61605, ++ 0x80768176, 0x8e768676, ++ 0x807a767a, 0x807aff7a, ++ 0x00000080, 0xbef600ff, ++ 0x01000000, 0xbefc0080, ++ 0xd28c0002, 0x000100c1, ++ 0xd28d0003, 0x000204c1, ++ 0xd1060002, 0x00011103, ++ 0x7e0602ff, 0x00000200, ++ 0xbefc00ff, 0x00010000, ++ 0xbe800077, 0x8677ff77, ++ 0xff7fffff, 0x8777ff77, ++ 0x00058000, 0xd8ec0000, ++ 0x00000002, 0xbf8cc07f, ++ 0xe0765000, 0x7a1d0002, ++ 0x68040702, 0xd0c9006a, ++ 0x0000e302, 0xbf87fff7, ++ 0xbef70000, 0xbefa00ff, ++ 0x00000400, 0xbefe00c1, ++ 0xbeff00c1, 0xb8f12a05, ++ 0x80718171, 0x8e718271, ++ 0x8e768871, 0xbef600ff, ++ 0x01000000, 0xbefc0084, ++ 0xbf0a717c, 0xbf840015, ++ 0xbf11017c, 0x8071ff71, ++ 0x00001000, 0x7e000300, ++ 0x7e020301, 0x7e040302, ++ 0x7e060303, 0xe0724000, ++ 0x7a1d0000, 0xe0724100, ++ 0x7a1d0100, 0xe0724200, ++ 0x7a1d0200, 0xe0724300, ++ 0x7a1d0300, 0x807c847c, ++ 0x807aff7a, 0x00000400, ++ 0xbf0a717c, 0xbf85ffef, ++ 0xbf9c0000, 0xbf8200c5, ++ 0xbef4007e, 0x8675ff7f, ++ 0x0000ffff, 0x8775ff75, ++ 0x00040000, 0xbef60080, ++ 0xbef700ff, 0x00807fac, ++ 0x8672ff7f, 0x08000000, ++ 0x8f728372, 0x87777277, ++ 0x8672ff7f, 0x70000000, ++ 0x8f728172, 0x87777277, ++ 0x8672ff7f, 0x04000000, ++ 0xbf84001e, 0xbefe00c1, ++ 0xbeff00c1, 0xb8ef4306, ++ 0x866fc16f, 0xbf840019, ++ 0x8e6f866f, 0x8e6f826f, ++ 0xbef6006f, 0xb8f82a05, ++ 0x80788178, 0x8e788a78, ++ 0xb8f21605, 0x80728172, ++ 0x8e728672, 0x80787278, ++ 0x8078ff78, 0x00000080, ++ 0xbef600ff, 0x01000000, ++ 0xbefc0080, 0xe0510000, ++ 0x781d0000, 0xe0510100, ++ 0x781d0000, 0x807cff7c, ++ 0x00000200, 0x8078ff78, ++ 0x00000200, 0xbf0a6f7c, ++ 0xbf85fff6, 0xbef80080, ++ 0xbefe00c1, 0xbeff00c1, ++ 0xb8ef2a05, 0x806f816f, ++ 0x8e6f826f, 0x8e76886f, ++ 0xbef600ff, 0x01000000, ++ 0xbef20078, 0x8078ff78, ++ 0x00000400, 0xbefc0084, ++ 0xbf11087c, 0x806fff6f, ++ 0x00008000, 0xe0524000, ++ 0x781d0000, 0xe0524100, ++ 0x781d0100, 0xe0524200, ++ 0x781d0200, 0xe0524300, ++ 0x781d0300, 0xbf8c0f70, ++ 0x7e000300, 0x7e020301, ++ 0x7e040302, 0x7e060303, ++ 0x807c847c, 0x8078ff78, ++ 0x00000400, 0xbf0a6f7c, ++ 0xbf85ffee, 0xbf9c0000, ++ 0xe0524000, 0x721d0000, ++ 0xe0524100, 0x721d0100, ++ 0xe0524200, 0x721d0200, ++ 0xe0524300, 0x721d0300, ++ 0xb8f82a05, 0x80788178, ++ 0x8e788a78, 0xb8f21605, ++ 0x80728172, 0x8e728672, ++ 0x80787278, 0x80f8c078, ++ 0xb8ef1605, 0x806f816f, ++ 0x8e6f846f, 0x8e76826f, ++ 0xbef600ff, 0x01000000, ++ 0xbefc006f, 0xc031003a, ++ 0x00000078, 0x80f8c078, ++ 0xbf8cc07f, 0x80fc907c, ++ 0xbf800000, 0xbe802d00, ++ 0xbe822d02, 0xbe842d04, ++ 0xbe862d06, 0xbe882d08, ++ 0xbe8a2d0a, 0xbe8c2d0c, ++ 0xbe8e2d0e, 0xbf06807c, ++ 0xbf84fff0, 0xb8f82a05, ++ 0x80788178, 0x8e788a78, ++ 0xb8f21605, 0x80728172, ++ 0x8e728672, 0x80787278, ++ 0xbef60084, 0xbef600ff, ++ 0x01000000, 0xc0211bfa, ++ 0x00000078, 0x80788478, ++ 0xc0211b3a, 0x00000078, ++ 0x80788478, 0xc0211b7a, ++ 0x00000078, 0x80788478, ++ 0xc0211eba, 0x00000078, ++ 0x80788478, 0xc0211efa, ++ 0x00000078, 0x80788478, ++ 0xc0211c3a, 0x00000078, ++ 0x80788478, 0xc0211c7a, ++ 0x00000078, 0x80788478, ++ 0xc0211a3a, 0x00000078, ++ 0x80788478, 0xc0211a7a, ++ 0x00000078, 0x80788478, ++ 0xc0211cfa, 0x00000078, ++ 0x80788478, 0xbf8cc07f, ++ 0x866dff6d, 0x0000ffff, ++ 0xbefc006f, 0xbefe007a, ++ 0xbeff007b, 0x866f71ff, ++ 0x000003ff, 0xb96f4803, ++ 0x866f71ff, 0xfffff800, ++ 0x8f6f8b6f, 0xb96fa2c3, ++ 0xb973f801, 0x866fff6d, ++ 0xf0000000, 0x8f6f9c6f, ++ 0x8e6f906f, 0xbef20080, ++ 0x87726f72, 0x866fff6d, ++ 0x08000000, 0x8f6f9b6f, ++ 0x8e6f8f6f, 0x87726f72, ++ 0x866fff70, 0x00800000, ++ 0x8f6f976f, 0xb972f807, ++ 0x86fe7e7e, 0x86ea6a6a, ++ 0xb970f802, 0xbf8a0000, ++ 0x95806f6c, 0xbf810000, ++}; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 56c1230..01c8b19 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -24,7 +24,6 @@ + #include <linux/export.h> + #include <linux/err.h> + #include <linux/fs.h> +-#include <linux/file.h> + #include <linux/sched.h> + #include <linux/sched/mm.h> + #include <linux/slab.h> +@@ -36,7 +35,6 @@ + #include <linux/mman.h> + #include <asm/processor.h> + #include <linux/ptrace.h> +-#include <linux/pagemap.h> + + #include "kfd_priv.h" + #include "kfd_device_queue_manager.h" +@@ -46,6 +44,7 @@ + static long kfd_ioctl(struct file *, unsigned int, unsigned long); + static int kfd_open(struct inode *, struct file *); + static int kfd_mmap(struct file *, struct vm_area_struct *); ++static bool kfd_dev_is_large_bar(struct kfd_dev *dev); + + static const char kfd_dev_name[] = "kfd"; + +@@ -137,9 +136,6 @@ static int kfd_open(struct inode *inode, struct file *filep) + if (IS_ERR(process)) + return PTR_ERR(process); + +- if (kfd_is_locked()) +- return -EAGAIN; +- + dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", + process->pasid, process->is_32bit_user_mode); + +@@ -251,7 +247,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, + pr_debug("Queue Size: 0x%llX, %u\n", + q_properties->queue_size, args->ring_size); + +- pr_debug("Queue r/w Pointers: %px, %px\n", ++ pr_debug("Queue r/w Pointers: %p, %p\n", + q_properties->read_ptr, + q_properties->write_ptr); + +@@ -903,7 +899,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + mutex_lock(&p->mutex); + + if (!kfd_has_process_device_data(p)) +- goto out_unlock; ++ goto out_upwrite; + + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); +@@ -912,7 +908,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + pdd = kfd_get_next_process_device_data(p, pdd); + } while (pdd); + +- goto out_unlock; ++ goto out_upwrite; + } + + /* Fill in process-aperture information for all available +@@ -929,7 +925,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + if (!kfd_has_process_device_data(p)) { + args->num_of_nodes = 0; + kfree(pa); +- goto out_unlock; ++ goto out_upwrite; + } + + /* Run over all pdd of the process */ +@@ -971,7 +967,7 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp, + kfree(pa); + return ret ? -EFAULT : 0; + +-out_unlock: ++out_upwrite: + mutex_unlock(&p->mutex); + return 0; + } +@@ -980,70 +976,55 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, + void *data) + { + struct kfd_ioctl_create_event_args *args = data; +- int err; +- +- /* For dGPUs the event page is allocated in user mode. The +- * handle is passed to KFD with the first call to this IOCTL +- * through the event_page_offset field. +- */ +- if (args->event_page_offset) { +- struct kfd_dev *kfd; +- struct kfd_process_device *pdd; +- void *mem, *kern_addr; +- uint64_t size; ++ struct kfd_dev *kfd; ++ struct kfd_process_device *pdd; ++ int err = -EINVAL; ++ void *mem, *kern_addr = NULL; + +- if (p->signal_page) { +- pr_err("Event page is already set\n"); +- return -EINVAL; +- } ++ pr_debug("Event page offset 0x%llx\n", args->event_page_offset); + ++ if (args->event_page_offset) { + kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); + if (!kfd) { + pr_err("Getting device by id failed in %s\n", __func__); +- return -EINVAL; ++ return -EFAULT; + } +- +- mutex_lock(&p->mutex); +- pdd = kfd_bind_process_to_device(kfd, p); +- if (IS_ERR(pdd)) { +- err = PTR_ERR(pdd); +- goto out_unlock; +- } +- +- mem = kfd_process_device_translate_handle(pdd, ++ if (!kfd->device_info->needs_iommu_device) { ++ mutex_lock(&p->mutex); ++ pdd = kfd_bind_process_to_device(kfd, p); ++ if (IS_ERR(pdd)) { ++ err = PTR_ERR(pdd); ++ goto out_upwrite; ++ } ++ mem = kfd_process_device_translate_handle(pdd, + GET_IDR_HANDLE(args->event_page_offset)); +- if (!mem) { +- pr_err("Can't find BO, offset is 0x%llx\n", +- args->event_page_offset); +- err = -EINVAL; +- goto out_unlock; +- } +- mutex_unlock(&p->mutex); +- +- err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, +- mem, &kern_addr, &size); +- if (err) { +- pr_err("Failed to map event page to kernel\n"); +- return err; +- } ++ if (!mem) { ++ pr_err("Can't find BO, offset is 0x%llx\n", ++ args->event_page_offset); ++ err = -EFAULT; ++ goto out_upwrite; ++ } ++ mutex_unlock(&p->mutex); + +- err = kfd_event_page_set(p, kern_addr, size); +- if (err) { +- pr_err("Failed to set event page\n"); +- return err; ++ /* Map dGPU gtt BO to kernel */ ++ kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, ++ mem, &kern_addr, NULL); + } + } + +- +- err = kfd_event_create(filp, p, args->event_type, +- args->auto_reset != 0, args->node_id, +- &args->event_id, &args->event_trigger_data, +- &args->event_page_offset, +- &args->event_slot_index); ++ err = kfd_event_create(filp, p, ++ args->event_type, ++ args->auto_reset != 0, ++ args->node_id, ++ &args->event_id, ++ &args->event_trigger_data, ++ &args->event_page_offset, ++ &args->event_slot_index, ++ kern_addr); + + return err; + +-out_unlock: ++out_upwrite: + mutex_unlock(&p->mutex); + return err; + } +@@ -1085,14 +1066,17 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, + + return err; + } +-static int kfd_ioctl_set_scratch_backing_va(struct file *filep, ++static int kfd_ioctl_alloc_scratch_memory(struct file *filep, + struct kfd_process *p, void *data) + { +- struct kfd_ioctl_set_scratch_backing_va_args *args = data; ++ struct kfd_ioctl_alloc_memory_of_scratch_args *args = data; + struct kfd_process_device *pdd; + struct kfd_dev *dev; + long err; + ++ if (args->size == 0) ++ return -EINVAL; ++ + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; +@@ -1242,8 +1226,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + uint64_t offset = args->mmap_offset; + uint32_t flags = args->flags; + struct vm_area_struct *vma; +- uint64_t cpuva = 0; +- unsigned int mem_type = 0; + + if (args->size == 0) + return -EINVAL; +@@ -1273,13 +1255,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + flags |= KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL; + flags &= ~KFD_IOC_ALLOC_MEM_FLAGS_USERPTR; + offset = (pfn << PAGE_SHIFT); +- } else { +- if (offset & (PAGE_SIZE - 1)) { +- pr_debug("Unaligned userptr address:%llx\n", +- offset); +- return -EINVAL; +- } +- cpuva = offset; + } + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { + if (args->size != kfd_doorbell_process_slice(dev)) +@@ -1297,18 +1272,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + + err = dev->kfd2kgd->alloc_memory_of_gpu( + dev->kgd, args->va_addr, args->size, +- pdd->vm, NULL, (struct kgd_mem **) &mem, &offset, ++ pdd->vm, (struct kgd_mem **) &mem, &offset, + flags); + + if (err) + goto err_unlock; + +- mem_type = flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | +- KFD_IOC_ALLOC_MEM_FLAGS_GTT | +- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | +- KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL); + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, +- args->va_addr, args->size, cpuva, mem_type, NULL); ++ args->va_addr, args->size, NULL); + if (idr_handle < 0) { + err = -EFAULT; + goto err_free; +@@ -1322,7 +1293,8 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, + return 0; + + err_free: +- dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); ++ dev->kfd2kgd->free_memory_of_gpu(dev->kgd, ++ (struct kgd_mem *) mem); + err_unlock: + mutex_unlock(&p->mutex); + return err; +@@ -1363,7 +1335,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, + /* If freeing the buffer failed, leave the handle in place for + * clean-up during process tear-down. + */ +- if (!ret) ++ if (ret == 0) + kfd_process_device_remove_obj_handle( + pdd, GET_IDR_HANDLE(args->handle)); + +@@ -1380,30 +1352,31 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; +- int i; ++ int i, num_dev = 0; + uint32_t *devices_arr = NULL; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + +- if (!args->n_devices) { +- pr_debug("Device IDs array empty\n"); ++ if (args->device_ids_array_size == 0) { ++ pr_debug("Device ID array size is 0\n"); + return -EINVAL; + } +- if (args->n_success > args->n_devices) { +- pr_debug("n_success exceeds n_devices\n"); ++ ++ if (args->device_ids_array_size % sizeof(uint32_t)) { ++ pr_debug("Node IDs array size %u\n", ++ args->device_ids_array_size); + return -EINVAL; + } + +- devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), +- GFP_KERNEL); ++ devices_arr = kmalloc(args->device_ids_array_size, GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, +- (void __user *)args->device_ids_array_ptr, +- args->n_devices * sizeof(*devices_arr)); ++ (void __user *)args->device_ids_array_ptr, ++ args->device_ids_array_size); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; +@@ -1424,11 +1397,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + goto get_mem_obj_from_handle_failed; + } + +- for (i = args->n_success; i < args->n_devices; i++) { ++ num_dev = args->device_ids_array_size / sizeof(uint32_t); ++ for (i = 0 ; i < num_dev; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + pr_debug("Getting device by id failed for 0x%x\n", +- devices_arr[i]); ++ devices_arr[i]); + err = -EINVAL; + goto get_mem_obj_from_handle_failed; + } +@@ -1439,13 +1413,12 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + goto get_mem_obj_from_handle_failed; + } + err = peer->kfd2kgd->map_memory_to_gpu( +- peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); +- if (err) { +- pr_err("Failed to map to gpu %d/%d\n", +- i, args->n_devices); ++ peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); ++ if (err != 0) { ++ pr_err("Failed to map to gpu %d, num_dev=%d\n", ++ i, num_dev); + goto map_memory_to_gpu_failed; + } +- args->n_success = i+1; + } + + mutex_unlock(&p->mutex); +@@ -1457,7 +1430,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, + } + + /* Flush TLBs after waiting for the page table updates to complete */ +- for (i = 0; i < args->n_devices; i++) { ++ for (i = 0; i < num_dev; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (WARN_ON_ONCE(!peer)) + continue; +@@ -1490,29 +1463,30 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + void *mem; + struct kfd_dev *dev, *peer; + long err = 0; +- uint32_t *devices_arr = NULL, i; ++ uint32_t *devices_arr = NULL, num_dev, i; + + dev = kfd_device_by_id(GET_GPU_ID(args->handle)); + if (!dev) + return -EINVAL; + +- if (!args->n_devices) { +- pr_debug("Device IDs array empty\n"); ++ if (args->device_ids_array_size == 0) { ++ pr_debug("Device ID array size is 0\n"); + return -EINVAL; + } +- if (args->n_success > args->n_devices) { +- pr_debug("n_success exceeds n_devices\n"); ++ ++ if (args->device_ids_array_size % sizeof(uint32_t)) { ++ pr_debug("Node IDs array size %u\n", ++ args->device_ids_array_size); + return -EINVAL; + } + +- devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), +- GFP_KERNEL); ++ devices_arr = kmalloc(args->device_ids_array_size, GFP_KERNEL); + if (!devices_arr) + return -ENOMEM; + + err = copy_from_user(devices_arr, +- (void __user *)args->device_ids_array_ptr, +- args->n_devices * sizeof(*devices_arr)); ++ (void __user *)args->device_ids_array_ptr, ++ args->device_ids_array_size); + if (err != 0) { + err = -EFAULT; + goto copy_from_user_failed; +@@ -1522,7 +1496,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) { +- err = -EINVAL; ++ pr_debug("Process device data doesn't exist\n"); ++ err = -ENODEV; + goto bind_process_to_device_failed; + } + +@@ -1533,7 +1508,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + goto get_mem_obj_from_handle_failed; + } + +- for (i = args->n_success; i < args->n_devices; i++) { ++ num_dev = args->device_ids_array_size / sizeof(uint32_t); ++ for (i = 0 ; i < num_dev; i++) { + peer = kfd_device_by_id(devices_arr[i]); + if (!peer) { + err = -EINVAL; +@@ -1549,10 +1525,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); + if (err) { + pr_err("Failed to unmap from gpu %d/%d\n", +- i, args->n_devices); ++ i, num_dev); + goto unmap_memory_from_gpu_failed; + } +- args->n_success = i+1; + } + kfree(devices_arr); + +@@ -1569,6 +1544,34 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, + return err; + } + ++static int kfd_ioctl_set_process_dgpu_aperture(struct file *filep, ++ struct kfd_process *p, void *data) ++{ ++ struct kfd_ioctl_set_process_dgpu_aperture_args *args = data; ++ struct kfd_dev *dev; ++ struct kfd_process_device *pdd; ++ long err; ++ ++ dev = kfd_device_by_id(args->gpu_id); ++ if (!dev) ++ return -EINVAL; ++ ++ mutex_lock(&p->mutex); ++ ++ pdd = kfd_bind_process_to_device(dev, p); ++ if (IS_ERR(pdd)) { ++ err = PTR_ERR(pdd); ++ goto exit; ++ } ++ ++ err = kfd_set_process_dgpu_aperture(pdd, args->dgpu_base, ++ args->dgpu_limit); ++ ++exit: ++ mutex_unlock(&p->mutex); ++ return err; ++} ++ + static int kfd_ioctl_get_dmabuf_info(struct file *filep, + struct kfd_process *p, void *data) + { +@@ -1683,636 +1686,22 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep, + return r; + } + +-/* Maximum number of entries for process pages array which lives on stack */ +-#define MAX_PP_STACK_COUNT 16 +-/* Maximum number of pages kmalloc'd to hold struct page's during copy */ +-#define MAX_KMALLOC_PAGES (PAGE_SIZE * 2) +-#define MAX_PP_KMALLOC_COUNT (MAX_KMALLOC_PAGES/sizeof(struct page *)) +- +-static void kfd_put_sg_table(struct sg_table *sg) +-{ +- unsigned int i; +- struct scatterlist *s; +- +- for_each_sg(sg->sgl, s, sg->nents, i) +- put_page(sg_page(s)); +-} +- +- +-/* Create a sg table for the given userptr BO by pinning its system pages +- * @bo: userptr BO +- * @offset: Offset into BO +- * @mm/@task: mm_struct & task_struct of the process that holds the BO +- * @size: in/out: desired size / actual size which could be smaller +- * @sg_size: out: Size of sg table. This is ALIGN_UP(@size) +- * @ret_sg: out sg table +- */ +-static int kfd_create_sg_table_from_userptr_bo(struct kfd_bo *bo, +- int64_t offset, int cma_write, +- struct mm_struct *mm, +- struct task_struct *task, +- uint64_t *size, +- uint64_t *sg_size, +- struct sg_table **ret_sg) +-{ +- int ret, locked = 1; +- struct sg_table *sg = NULL; +- unsigned int i, offset_in_page, flags = 0; +- unsigned long nents, n; +- unsigned long pa = (bo->cpuva + offset) & PAGE_MASK; +- unsigned int cur_page = 0; +- struct scatterlist *s; +- uint64_t sz = *size; +- struct page **process_pages; +- +- *sg_size = 0; +- sg = kmalloc(sizeof(*sg), GFP_KERNEL); +- if (!sg) +- return -ENOMEM; +- +- offset_in_page = offset & (PAGE_SIZE - 1); +- nents = (sz + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE; +- +- ret = sg_alloc_table(sg, nents, GFP_KERNEL); +- if (unlikely(ret)) { +- ret = -ENOMEM; +- goto sg_alloc_fail; +- } +- process_pages = kmalloc_array(nents, sizeof(struct pages *), +- GFP_KERNEL); +- if (!process_pages) { +- ret = -ENOMEM; +- goto page_alloc_fail; +- } +- +- if (cma_write) +- flags = FOLL_WRITE; +- locked = 1; +- down_read(&mm->mmap_sem); +- n = get_user_pages_remote(task, mm, pa, nents, flags, process_pages, +- NULL, &locked); +- if (locked) +- up_read(&mm->mmap_sem); +- if (n <= 0) { +- pr_err("CMA: Invalid virtual address 0x%lx\n", pa); +- ret = -EFAULT; +- goto get_user_fail; +- } +- if (n != nents) { +- /* Pages pinned < requested. Set the size accordingly */ +- *size = (n * PAGE_SIZE) - offset_in_page; +- pr_debug("Requested %lx but pinned %lx\n", nents, n); +- } +- +- sz = 0; +- for_each_sg(sg->sgl, s, n, i) { +- sg_set_page(s, process_pages[cur_page], PAGE_SIZE, +- offset_in_page); +- sg_dma_address(s) = page_to_phys(process_pages[cur_page]); +- offset_in_page = 0; +- cur_page++; +- sz += PAGE_SIZE; +- } +- *ret_sg = sg; +- *sg_size = sz; +- +- kfree(process_pages); +- return 0; +- +-get_user_fail: +- kfree(process_pages); +-page_alloc_fail: +- sg_free_table(sg); +-sg_alloc_fail: +- kfree(sg); +- return ret; +-} +- +-static void kfd_free_cma_bos(struct cma_iter *ci) +-{ +- struct cma_system_bo *cma_bo, *tmp; +- +- list_for_each_entry_safe(cma_bo, tmp, &ci->cma_list, list) { +- struct kfd_dev *dev = cma_bo->dev; +- +- /* sg table is deleted by free_memory_of_gpu */ +- if (cma_bo->sg) +- kfd_put_sg_table(cma_bo->sg); +- dev->kfd2kgd->free_memory_of_gpu(dev->kgd, cma_bo->mem); +- list_del(&cma_bo->list); +- kfree(cma_bo); +- } +-} +- +-/* 1 second timeout */ +-#define CMA_WAIT_TIMEOUT msecs_to_jiffies(1000) +- +-static int kfd_cma_fence_wait(struct dma_fence *f) +-{ +- int ret; +- +- ret = dma_fence_wait_timeout(f, false, CMA_WAIT_TIMEOUT); +- if (likely(ret > 0)) +- return 0; +- if (!ret) +- ret = -ETIME; +- return ret; +-} +- +-/* Put previous (old) fence @pf but it waits for @pf to signal if the context +- * of the current fence @cf is different. +- */ +-static int kfd_fence_put_wait_if_diff_context(struct dma_fence *cf, +- struct dma_fence *pf) +-{ +- int ret = 0; +- +- if (pf && cf && cf->context != pf->context) +- ret = kfd_cma_fence_wait(pf); +- dma_fence_put(pf); +- return ret; +-} +- +-#define MAX_SYSTEM_BO_SIZE (512*PAGE_SIZE) +- +-/* Create an equivalent system BO for the given @bo. If @bo is a userptr then +- * create a new system BO by pinning underlying system pages of the given +- * userptr BO. If @bo is in Local Memory then create an empty system BO and +- * then copy @bo into this new BO. +- * @bo: Userptr BO or Local Memory BO +- * @offset: Offset into bo +- * @size: in/out: The size of the new BO could be less than requested if all +- * the pages couldn't be pinned or size > MAX_SYSTEM_BO_SIZE. This would +- * be reflected in @size +- * @mm/@task: mm/task to which @bo belongs to +- * @cma_bo: out: new system BO +- */ +-static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *bo, +- uint64_t *size, uint64_t offset, +- int cma_write, struct kfd_process *p, +- struct mm_struct *mm, +- struct task_struct *task, +- struct cma_system_bo **cma_bo) +-{ +- int ret; +- struct kfd_process_device *pdd = NULL; +- struct cma_system_bo *cbo; +- uint64_t bo_size = 0; +- struct dma_fence *f; +- +- uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_WRITABLE | +- ALLOC_MEM_FLAGS_NO_SUBSTITUTE; +- +- *cma_bo = NULL; +- cbo = kzalloc(sizeof(**cma_bo), GFP_KERNEL); +- if (!cbo) +- return -ENOMEM; +- +- INIT_LIST_HEAD(&cbo->list); +- if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) +- bo_size = min_t(uint64_t, *size, MAX_SYSTEM_BO_SIZE); +- else if (bo->cpuva) { +- ret = kfd_create_sg_table_from_userptr_bo(bo, offset, +- cma_write, mm, task, +- size, &bo_size, +- &cbo->sg); +- if (ret) { +- pr_err("CMA: BO create with sg failed %d\n", ret); +- goto sg_fail; +- } +- } else { +- WARN_ON(1); +- ret = -EINVAL; +- goto sg_fail; +- } +- mutex_lock(&p->mutex); +- pdd = kfd_get_process_device_data(kdev, p); +- if (!pdd) { +- mutex_unlock(&p->mutex); +- pr_err("Process device data doesn't exist\n"); +- ret = -EINVAL; +- goto pdd_fail; +- } +- +- ret = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, 0ULL, bo_size, +- pdd->vm, cbo->sg, +- &cbo->mem, NULL, flags); +- mutex_unlock(&p->mutex); +- if (ret) { +- pr_err("Failed to create shadow system BO %d\n", ret); +- goto pdd_fail; +- } +- +- if (bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { +- ret = kdev->kfd2kgd->copy_mem_to_mem(kdev->kgd, bo->mem, +- offset, cbo->mem, 0, +- bo_size, &f, size); +- if (ret) { +- pr_err("CMA: Intermediate copy failed %d\n", ret); +- goto copy_fail; +- } +- +- /* Wait for the copy to finish as subsequent copy will be done +- * by different device +- */ +- ret = kfd_cma_fence_wait(f); +- dma_fence_put(f); +- if (ret) { +- pr_err("CMA: Intermediate copy timed out %d\n", ret); +- goto copy_fail; +- } +- } +- +- cbo->dev = kdev; +- *cma_bo = cbo; +- +- return ret; +- +-copy_fail: +- kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, bo->mem); +-pdd_fail: +- if (cbo->sg) { +- kfd_put_sg_table(cbo->sg); +- sg_free_table(cbo->sg); +- kfree(cbo->sg); +- } +-sg_fail: +- kfree(cbo); +- return ret; +-} +- +-/* Update cma_iter.cur_bo with KFD BO that is assocaited with +- * cma_iter.array.va_addr +- */ +-static int kfd_cma_iter_update_bo(struct cma_iter *ci) +-{ +- struct kfd_memory_range *arr = ci->array; +- uint64_t va_end = arr->va_addr + arr->size - 1; +- +- mutex_lock(&ci->p->mutex); +- ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr, +- va_end); +- mutex_unlock(&ci->p->mutex); +- +- if (!ci->cur_bo || va_end > ci->cur_bo->it.last) { +- pr_err("CMA failed. Range out of bounds\n"); +- return -EFAULT; +- } +- return 0; +-} +- +-/* Advance iter by @size bytes. */ +-static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size) +-{ +- int ret = 0; +- +- ci->offset += size; +- if (WARN_ON(size > ci->total || ci->offset > ci->array->size)) +- return -EFAULT; +- ci->total -= size; +- /* If current range is copied, move to next range if available. */ +- if (ci->offset == ci->array->size) { +- +- /* End of all ranges */ +- if (!(--ci->nr_segs)) +- return 0; +- +- ci->array++; +- ci->offset = 0; +- ret = kfd_cma_iter_update_bo(ci); +- if (ret) +- return ret; +- } +- ci->bo_offset = (ci->array->va_addr + ci->offset) - +- ci->cur_bo->it.start; +- return ret; +-} +- +-static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs, +- struct kfd_process *p, struct mm_struct *mm, +- struct task_struct *task, struct cma_iter *ci) +-{ +- int ret; +- int nr; +- +- if (!arr || !segs) +- return -EINVAL; +- +- memset(ci, 0, sizeof(*ci)); +- INIT_LIST_HEAD(&ci->cma_list); +- ci->array = arr; +- ci->nr_segs = segs; +- ci->p = p; +- ci->offset = 0; +- ci->mm = mm; +- ci->task = task; +- for (nr = 0; nr < segs; nr++) +- ci->total += arr[nr].size; +- +- /* Valid but size is 0. So copied will also be 0 */ +- if (!ci->total) +- return 0; +- +- ret = kfd_cma_iter_update_bo(ci); +- if (!ret) +- ci->bo_offset = arr->va_addr - ci->cur_bo->it.start; +- return ret; +-} +- +-static bool kfd_cma_iter_end(struct cma_iter *ci) +-{ +- if (!(ci->nr_segs) || !(ci->total)) +- return true; +- return false; +-} +- +-/* Copies @size bytes from si->cur_bo to di->cur_bo BO. The function assumes +- * both source and dest. BOs are userptr BOs. Both BOs can either belong to +- * current process or one of the BOs can belong to a differnt +- * process. @Returns 0 on success, -ve on failure +- * +- * @si: Source iter +- * @di: Dest. iter +- * @cma_write: Indicates if it is write to remote or read from remote +- * @size: amount of bytes to be copied +- * @copied: Return number of bytes actually copied. +- */ +-static int kfd_copy_userptr_bos(struct cma_iter *si, struct cma_iter *di, +- bool cma_write, uint64_t size, +- uint64_t *copied) +-{ +- int i, ret = 0, locked; +- unsigned int nents, nl; +- unsigned int offset_in_page; +- struct page *pp_stack[MAX_PP_STACK_COUNT]; +- struct page **process_pages = pp_stack; +- unsigned long rva, lva = 0, flags = 0; +- uint64_t copy_size, to_copy = size; +- struct cma_iter *li, *ri; +- +- if (cma_write) { +- ri = di; +- li = si; +- flags |= FOLL_WRITE; +- } else { +- li = di; +- ri = si; +- } +- /* rva: remote virtual address. Page aligned to start page. +- * rva + offset_in_page: Points to remote start address +- * lva: local virtual address. Points to the start address. +- * nents: computes number of remote pages to request +- */ +- offset_in_page = ri->bo_offset & (PAGE_SIZE - 1); +- rva = (ri->cur_bo->cpuva + ri->bo_offset) & PAGE_MASK; +- lva = li->cur_bo->cpuva + li->bo_offset; +- +- nents = (size + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE; +- +- copy_size = min_t(uint64_t, size, PAGE_SIZE - offset_in_page); +- *copied = 0; +- +- if (nents > MAX_PP_STACK_COUNT) { +- /* For reliability kmalloc only 2 pages worth */ +- process_pages = kmalloc(min_t(size_t, MAX_KMALLOC_PAGES, +- sizeof(struct pages *)*nents), +- GFP_KERNEL); +- +- if (!process_pages) +- return -ENOMEM; +- } +- +- while (nents && to_copy) { +- nl = min_t(unsigned int, MAX_PP_KMALLOC_COUNT, nents); +- locked = 1; +- down_read(&ri->mm->mmap_sem); +- nl = get_user_pages_remote(ri->task, ri->mm, rva, nl, +- flags, process_pages, NULL, +- &locked); +- if (locked) +- up_read(&ri->mm->mmap_sem); +- if (nl <= 0) { +- pr_err("CMA: Invalid virtual address 0x%lx\n", rva); +- ret = -EFAULT; +- break; +- } +- +- for (i = 0; i < nl; i++) { +- unsigned int n; +- void *kaddr = kmap(process_pages[i]); +- +- if (cma_write) { +- n = copy_from_user(kaddr+offset_in_page, +- (void *)lva, copy_size); +- set_page_dirty(process_pages[i]); +- } else { +- n = copy_to_user((void *)lva, +- kaddr+offset_in_page, +- copy_size); +- } +- kunmap(kaddr); +- if (n) { +- ret = -EFAULT; +- break; +- } +- to_copy -= copy_size; +- if (!to_copy) +- break; +- lva += copy_size; +- rva += (copy_size + offset_in_page); +- WARN_ONCE(rva & (PAGE_SIZE - 1), +- "CMA: Error in remote VA computation"); +- offset_in_page = 0; +- copy_size = min_t(uint64_t, to_copy, PAGE_SIZE); +- } +- +- for (i = 0; i < nl; i++) +- put_page(process_pages[i]); +- +- if (ret) +- break; +- nents -= nl; +- } +- +- if (process_pages != pp_stack) +- kfree(process_pages); +- +- *copied = (size - to_copy); +- return ret; +- +-} +- +-/* Copies @size bytes from si->cur_bo to di->cur_bo starting at their +- * respective offset. +- * @si: Source iter +- * @di: Dest. iter +- * @cma_write: Indicates if it is write to remote or read from remote +- * @size: amount of bytes to be copied +- * @f: Return the last fence if any +- * @copied: Return number of bytes actually copied. +- */ +-static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di, +- int cma_write, uint64_t size, +- struct dma_fence **f, uint64_t *copied) +-{ +- int err = 0; +- struct kfd_bo *dst_bo = di->cur_bo, *src_bo = si->cur_bo; +- uint64_t src_offset = si->bo_offset, dst_offset = di->bo_offset; +- struct kgd_mem *src_mem = src_bo->mem, *dst_mem = dst_bo->mem; +- struct kfd_dev *dev = dst_bo->dev; +- struct cma_system_bo *tmp_bo = NULL; +- +- *copied = 0; +- if (f) +- *f = NULL; +- if (src_bo->cpuva && dst_bo->cpuva) +- return kfd_copy_userptr_bos(si, di, cma_write, size, copied); +- +- /* If either source or dest. is userptr, create a shadow system BO +- * by using the underlying userptr BO pages. Then use this shadow +- * BO for copy. src_offset & dst_offset are adjusted because the new BO +- * is only created for the window (offset, size) requested. +- * The shadow BO is created on the other device. This means if the +- * other BO is a device memory, the copy will be using that device. +- * The BOs are stored in cma_list for deferred cleanup. This minimizes +- * fence waiting just to the last fence. +- */ +- if (src_bo->cpuva) { +- dev = dst_bo->dev; +- err = kfd_create_cma_system_bo(dev, src_bo, &size, +- si->bo_offset, cma_write, +- si->p, si->mm, si->task, +- &si->cma_bo); +- src_mem = si->cma_bo->mem; +- src_offset = si->bo_offset & (PAGE_SIZE - 1); +- list_add_tail(&si->cma_bo->list, &si->cma_list); +- } else if (dst_bo->cpuva) { +- dev = src_bo->dev; +- err = kfd_create_cma_system_bo(dev, dst_bo, &size, +- di->bo_offset, cma_write, +- di->p, di->mm, di->task, +- &di->cma_bo); +- dst_mem = di->cma_bo->mem; +- dst_offset = di->bo_offset & (PAGE_SIZE - 1); +- list_add_tail(&di->cma_bo->list, &di->cma_list); +- } else if (src_bo->dev->kgd != dst_bo->dev->kgd) { +- /* This indicates that atleast on of the BO is in local mem. +- * If both are in local mem of different devices then create an +- * intermediate System BO and do a double copy +- * [VRAM]--gpu1-->[System BO]--gpu2-->[VRAM]. +- * If only one BO is in VRAM then use that GPU to do the copy +- */ +- if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM && +- dst_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { +- dev = dst_bo->dev; +- err = kfd_create_cma_system_bo(src_bo->dev, src_bo, +- &size, si->bo_offset, +- cma_write, si->p, +- si->mm, si->task, +- &tmp_bo); +- src_mem = tmp_bo->mem; +- src_offset = 0; +- } else if (src_bo->mem_type == KFD_IOC_ALLOC_MEM_FLAGS_VRAM) +- dev = src_bo->dev; +- /* else already set to dst_bo->dev */ +- } +- +- if (err) { +- pr_err("Failed to create system BO %d", err); +- return -EINVAL; +- } +- +- err = dev->kfd2kgd->copy_mem_to_mem(dev->kgd, src_mem, src_offset, +- dst_mem, dst_offset, size, f, +- copied); +- /* The tmp_bo allocates additional memory. So it is better to wait and +- * delete. Also since multiple GPUs are involved the copies are +- * currently not pipelined. +- */ +- if (tmp_bo) { +- if (!err) { +- kfd_cma_fence_wait(*f); +- dma_fence_put(*f); +- *f = NULL; +- } +- dev->kfd2kgd->free_memory_of_gpu(dev->kgd, tmp_bo->mem); +- kfree(tmp_bo); +- } +- return err; +-} +- +-/* Copy single range from source iterator @si to destination iterator @di. +- * @si will move to next range and @di will move by bytes copied. +- * @return : 0 for success or -ve for failure +- * @f: The last fence if any +- * @copied: out: number of bytes copied +- */ +-static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di, +- bool cma_write, struct dma_fence **f, +- uint64_t *copied) +-{ +- int err = 0; +- uint64_t copy_size, n; +- uint64_t size = si->array->size; +- struct kfd_bo *src_bo = si->cur_bo; +- struct dma_fence *lfence = NULL; +- +- if (!src_bo || !di || !copied) +- return -EINVAL; +- *copied = 0; +- if (f) +- *f = NULL; +- +- while (size && !kfd_cma_iter_end(di)) { +- struct dma_fence *fence = NULL; +- +- copy_size = min(size, (di->array->size - di->offset)); +- +- err = kfd_copy_bos(si, di, cma_write, copy_size, &fence, &n); +- if (err) { +- pr_err("CMA %d failed\n", err); +- break; +- } +- +- if (fence) { +- err = kfd_fence_put_wait_if_diff_context(fence, +- lfence); +- lfence = fence; +- if (err) +- break; +- } +- +- size -= n; +- *copied += n; +- err = kfd_cma_iter_advance(si, n); +- if (err) +- break; +- err = kfd_cma_iter_advance(di, n); +- if (err) +- break; +- } +- +- if (f) +- *f = dma_fence_get(lfence); +- dma_fence_put(lfence); +- +- return err; +-} +- + static int kfd_ioctl_cross_memory_copy(struct file *filep, + struct kfd_process *local_p, void *data) + { + struct kfd_ioctl_cross_memory_copy_args *args = data; + struct kfd_memory_range *src_array, *dst_array; +- struct kfd_process *remote_p; ++ struct kfd_bo *src_bo, *dst_bo; ++ struct kfd_process *remote_p, *src_p, *dst_p; + struct task_struct *remote_task; + struct mm_struct *remote_mm; + struct pid *remote_pid; +- struct dma_fence *lfence = NULL; +- uint64_t copied = 0, total_copied = 0; +- struct cma_iter di, si; ++ struct dma_fence *fence = NULL, *lfence = NULL; ++ uint64_t dst_va_addr; ++ uint64_t copied, total_copied = 0; ++ uint64_t src_offset, dst_offset, dst_va_addr_end; + const char *cma_op; +- int err = 0; ++ int i, j = 0, err = 0; + + /* Check parameters */ + if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 || +@@ -2372,76 +1761,169 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep, + } + + remote_p = kfd_get_process(remote_task); +- if (IS_ERR(remote_p)) { ++ if (!remote_p) { + pr_err("Cross mem copy failed. Invalid kfd process %d\n", + args->pid); + err = -EINVAL; + goto kfd_process_fail; + } +- /* Initialise cma_iter si & @di with source & destination range. */ ++ + if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) { ++ src_p = local_p; ++ dst_p = remote_p; + cma_op = "WRITE"; + pr_debug("CMA WRITE: local -> remote\n"); +- err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, +- remote_p, remote_mm, remote_task, &di); +- if (err) +- goto kfd_process_fail; +- err = kfd_cma_iter_init(src_array, args->src_mem_array_size, +- local_p, current->mm, current, &si); +- if (err) +- goto kfd_process_fail; + } else { ++ src_p = remote_p; ++ dst_p = local_p; + cma_op = "READ"; + pr_debug("CMA READ: remote -> local\n"); +- +- err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, +- local_p, current->mm, current, &di); +- if (err) +- goto kfd_process_fail; +- err = kfd_cma_iter_init(src_array, args->src_mem_array_size, +- remote_p, remote_mm, remote_task, &si); +- if (err) +- goto kfd_process_fail; + } + +- /* Copy one si range at a time into di. After each call to +- * kfd_copy_single_range() si will move to next range. di will be +- * incremented by bytes copied +- */ +- while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) { +- struct dma_fence *fence = NULL; +- +- err = kfd_copy_single_range(&si, &di, +- KFD_IS_CROSS_MEMORY_WRITE(args->flags), +- &fence, &copied); +- total_copied += copied; + +- if (err) ++ /* For each source kfd_range: ++ * - Find the BO. Each range has to be within the same BO. ++ * - Copy this range to single or multiple destination BOs. ++ * - dst_va_addr - will point to next va address into which data will ++ * be copied. ++ * - dst_bo & src_bo - the current destination and source BOs ++ * - src_offset & dst_offset - offset into the respective BOs from ++ * data will be sourced or copied ++ */ ++ dst_va_addr = dst_array[0].va_addr; ++ dst_va_addr_end = dst_va_addr + dst_array[0].size - 1; ++ mutex_lock(&dst_p->mutex); ++ dst_bo = kfd_process_find_bo_from_interval(dst_p, ++ dst_va_addr, ++ dst_va_addr_end); ++ mutex_unlock(&dst_p->mutex); ++ if (!dst_bo || dst_va_addr_end > dst_bo->it.last) { ++ pr_err("CMA %s failed. Invalid dst range\n", cma_op); ++ err = -EFAULT; ++ goto kfd_process_fail; ++ } ++ dst_offset = dst_va_addr - dst_bo->it.start; ++ ++ for (i = 0; i < args->src_mem_array_size; i++) { ++ uint64_t src_va_addr_end = src_array[i].va_addr + ++ src_array[i].size - 1; ++ uint64_t src_size_to_copy = src_array[i].size; ++ ++ mutex_lock(&src_p->mutex); ++ src_bo = kfd_process_find_bo_from_interval(src_p, ++ src_array[i].va_addr, ++ src_va_addr_end); ++ mutex_unlock(&src_p->mutex); ++ if (!src_bo || src_va_addr_end > src_bo->it.last) { ++ pr_err("CMA %s failed. Invalid src range\n", cma_op); ++ err = -EFAULT; + break; ++ } ++ ++ src_offset = src_array[i].va_addr - src_bo->it.start; + +- /* Release old fence if a later fence is created. If no +- * new fence is created, then keep the preivous fence ++ /* Copy src_bo to one or multiple dst_bo(s) based on size and ++ * and current copy location. + */ +- if (fence) { +- err = kfd_fence_put_wait_if_diff_context(fence, +- lfence); ++ while (j < args->dst_mem_array_size) { ++ uint64_t copy_size; ++ int64_t space_left; ++ ++ /* Find the current copy_size. This will be smaller of ++ * the following ++ * - space left in the current dest memory range ++ * - data left to copy from source range ++ */ ++ space_left = (dst_array[j].va_addr + dst_array[j].size) ++ - dst_va_addr; ++ copy_size = (src_size_to_copy < space_left) ? ++ src_size_to_copy : space_left; ++ ++ /* Check both BOs belong to same device */ ++ if (src_bo->dev->kgd != dst_bo->dev->kgd) { ++ pr_err("CMA %s fail. Not same dev\n", cma_op); ++ err = -EINVAL; ++ break; ++ } ++ ++ /* Store prev fence. Release it when a later fence is ++ * created ++ */ + lfence = fence; +- if (err) ++ fence = NULL; ++ ++ err = dst_bo->dev->kfd2kgd->copy_mem_to_mem( ++ src_bo->dev->kgd, ++ src_bo->mem, src_offset, ++ dst_bo->mem, dst_offset, ++ copy_size, ++ &fence, &copied); ++ ++ if (err) { ++ pr_err("GPU CMA %s failed\n", cma_op); ++ break; ++ } ++ ++ /* Later fence available. Release old fence */ ++ if (fence && lfence) { ++ dma_fence_put(lfence); ++ lfence = NULL; ++ } ++ ++ total_copied += copied; ++ src_size_to_copy -= copied; ++ space_left -= copied; ++ dst_va_addr += copied; ++ dst_offset += copied; ++ src_offset += copied; ++ if (dst_va_addr > dst_bo->it.last + 1) { ++ pr_err("CMA %s fail. Mem overflow\n", cma_op); ++ err = -EFAULT; ++ break; ++ } ++ ++ /* If the cur dest range is full move to next one */ ++ if (space_left <= 0) { ++ if (++j >= args->dst_mem_array_size) ++ break; ++ ++ dst_va_addr = dst_array[j].va_addr; ++ dst_va_addr_end = dst_va_addr + ++ dst_array[j].size - 1; ++ dst_bo = kfd_process_find_bo_from_interval( ++ dst_p, ++ dst_va_addr, ++ dst_va_addr_end); ++ if (!dst_bo || ++ dst_va_addr_end > dst_bo->it.last) { ++ pr_err("CMA %s failed. Invalid dst range\n", ++ cma_op); ++ err = -EFAULT; ++ break; ++ } ++ dst_offset = dst_va_addr - dst_bo->it.start; ++ } ++ ++ /* If the cur src range is done, move to next one */ ++ if (src_size_to_copy <= 0) + break; + } ++ if (err) ++ break; + } + + /* Wait for the last fence irrespective of error condition */ +- if (lfence) { +- err = kfd_cma_fence_wait(lfence); +- dma_fence_put(lfence); +- if (err) ++ if (fence) { ++ if (dma_fence_wait_timeout(fence, false, msecs_to_jiffies(1000)) ++ < 0) + pr_err("CMA %s failed. BO timed out\n", cma_op); ++ dma_fence_put(fence); ++ } else if (lfence) { ++ pr_debug("GPU copy fail. But wait for prev DMA to finish\n"); ++ dma_fence_wait_timeout(lfence, true, msecs_to_jiffies(1000)); ++ dma_fence_put(lfence); + } + +- kfd_free_cma_bos(&si); +- kfd_free_cma_bos(&di); +- + kfd_process_fail: + mmput(remote_mm); + mm_access_fail: +@@ -2530,21 +2012,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, + kfd_ioctl_dbg_wave_control, 0), + +- AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, +- kfd_ioctl_set_scratch_backing_va, 0), +- +- AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, +- kfd_ioctl_get_tile_config, 0), +- +- AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, +- kfd_ioctl_set_trap_handler, 0), +- +- AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, +- kfd_ioctl_get_process_apertures_new, 0), +- +- AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, +- kfd_ioctl_acquire_vm, 0), +- + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, + kfd_ioctl_alloc_memory_of_gpu, 0), + +@@ -2557,15 +2024,30 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, + kfd_ioctl_unmap_memory_from_gpu, 0), + ++ AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH, ++ kfd_ioctl_alloc_scratch_memory, 0), ++ + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, + kfd_ioctl_set_cu_mask, 0), + ++ AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE, ++ kfd_ioctl_set_process_dgpu_aperture, 0), ++ ++ AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, ++ kfd_ioctl_set_trap_handler, 0), ++ ++ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, ++ kfd_ioctl_get_process_apertures_new, 0), ++ + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, + kfd_ioctl_get_dmabuf_info, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, + kfd_ioctl_import_dmabuf, 0), + ++ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, ++ kfd_ioctl_get_tile_config, 0), ++ + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IPC_IMPORT_HANDLE, + kfd_ioctl_ipc_import_handle, 0), + +@@ -2578,6 +2060,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, + kfd_ioctl_get_queue_wave_state, 0), + ++ AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, ++ kfd_ioctl_acquire_vm, 0) ++ + }; + + #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) +@@ -2673,33 +2158,34 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) + static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) + { + struct kfd_process *process; +- struct kfd_dev *dev = NULL; ++ struct kfd_dev *kfd; + unsigned long vm_pgoff; +- unsigned int gpu_id; ++ unsigned long long mmap_type; + + process = kfd_get_process(current); + if (IS_ERR(process)) + return PTR_ERR(process); + + vm_pgoff = vma->vm_pgoff; +- vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); +- gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); +- if (gpu_id) +- dev = kfd_device_by_id(gpu_id); ++ vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vma->vm_pgoff); ++ mmap_type = vm_pgoff & KFD_MMAP_TYPE_MASK; + +- switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { ++ switch (mmap_type) { + case KFD_MMAP_TYPE_DOORBELL: +- if (!dev) +- return -ENODEV; +- return kfd_doorbell_mmap(dev, process, vma); ++ kfd = kfd_device_by_id(KFD_MMAP_GPU_ID_GET(vm_pgoff)); ++ if (!kfd) ++ return -EFAULT; ++ return kfd_doorbell_mmap(kfd, process, vma); + + case KFD_MMAP_TYPE_EVENTS: + return kfd_event_mmap(process, vma); + + case KFD_MMAP_TYPE_RESERVED_MEM: +- if (!dev) +- return -ENODEV; +- return kfd_reserved_mem_mmap(dev, process, vma); ++ return kfd_reserved_mem_mmap(process, vma); ++ ++ default: ++ pr_err("Unsupported kfd mmap type %llx\n", mmap_type); ++ break; + } + + return -EFAULT; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +index c540b65..24d0634 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +@@ -1,27 +1,7 @@ +-/* +- * Copyright 2015-2017 Advanced Micro Devices, Inc. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included in +- * all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +- * OTHER DEALINGS IN THE SOFTWARE. +- */ +- +-#include <linux/pci.h> ++#include <linux/kernel.h> + #include <linux/acpi.h> ++#include <linux/mm.h> ++#include <linux/pci.h> + #include "kfd_crat.h" + #include "kfd_priv.h" + #include "kfd_topology.h" +@@ -286,7 +266,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, + + id = cache->processor_id_low; + +- pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); + list_for_each_entry(dev, device_list, list) { + total_num_of_cu = (dev->node_props.array_count * + dev->node_props.cu_per_simd_array); +@@ -436,15 +415,11 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + ret = kfd_parse_subtype_cache(cache, device_list); + break; + case CRAT_SUBTYPE_TLB_AFFINITY: +- /* +- * For now, nothing to do here +- */ ++ /* For now, nothing to do here */ + pr_debug("Found TLB entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: +- /* +- * For now, nothing to do here +- */ ++ /* For now, nothing to do here */ + pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_IOLINK_AFFINITY: +@@ -469,8 +444,9 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, + * + * Return - 0 if successful else -ve value + */ +-int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, +- uint32_t proximity_domain) ++int kfd_parse_crat_table(void *crat_image, ++ struct list_head *device_list, ++ uint32_t proximity_domain) + { + struct kfd_topology_device *top_dev = NULL; + struct crat_subtype_generic *sub_type_hdr; +@@ -642,7 +618,6 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, + num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); + break; + case CHIP_VEGA10: +- case CHIP_VEGA20: + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; +@@ -718,7 +693,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, + * crat_image will be NULL + * @size: [OUT] size of crat_image + * +- * Return 0 if successful else return error code ++ * Return 0 if successful else return -ve value + */ + #ifdef CONFIG_ACPI + int kfd_create_crat_image_acpi(void **crat_image, size_t *size) +@@ -750,8 +725,10 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) + } + + pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); +- if (!pcrat_image) ++ if (!pcrat_image) { ++ pr_err("No memory for allocating CRAT image\n"); + return -ENOMEM; ++ } + + memcpy(pcrat_image, crat_table, crat_table->length); + +@@ -938,7 +915,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) + + #ifdef CONFIG_ACPI + status = acpi_get_table("DSDT", 0, &acpi_table); +- if (status != AE_OK) ++ if (status == AE_NOT_FOUND) + pr_warn("DSDT table not found for OEM information\n"); + else { + crat_table->oem_revision = acpi_table->revision; +@@ -1095,8 +1072,8 @@ static int kfd_fill_gpu_direct_io_link(int *avail_size, + * [OUT] actual size of data filled in crat_image + */ + static int kfd_create_vcrat_image_gpu(void *pcrat_image, +- size_t *size, struct kfd_dev *kdev, +- uint32_t proximity_domain) ++ size_t *size, struct kfd_dev *kdev, ++ uint32_t proximity_domain) + { + struct crat_header *crat_table = (struct crat_header *)pcrat_image; + struct crat_subtype_generic *sub_type_hdr; +@@ -1264,8 +1241,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, + * Return 0 if successful else return -ve value + */ + int kfd_create_crat_image_virtual(void **crat_image, size_t *size, +- int flags, struct kfd_dev *kdev, +- uint32_t proximity_domain) ++ int flags, struct kfd_dev *kdev, uint32_t proximity_domain) + { + void *pcrat_image = NULL; + int ret = 0; +@@ -1295,8 +1271,8 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, + if (!pcrat_image) + return -ENOMEM; + *size = VCRAT_SIZE_FOR_GPU; +- ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, +- proximity_domain); ++ ret = kfd_create_vcrat_image_gpu(pcrat_image, size, ++ kdev, proximity_domain); + break; + case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): + /* TODO: */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +index cd7ee6d..00de41f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +@@ -24,6 +24,7 @@ + #define KFD_CRAT_H_INCLUDED + + #include <linux/types.h> ++#include "kfd_priv.h" + + #pragma pack(1) + +@@ -227,12 +228,12 @@ struct crat_subtype_ccompute { + /* + * HSA IO Link Affinity structure and definitions + */ +-#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) +-#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) +-#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) +-#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) +-#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) +-#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 ++#define CRAT_IOLINK_FLAGS_ENABLED (1 << 0) ++#define CRAT_IOLINK_FLAGS_NON_COHERENT (1 << 1) ++#define CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT (1 << 2) ++#define CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT (1 << 3) ++#define CRAT_IOLINK_FLAGS_NO_PEER_TO_PEER_DMA (1 << 4) ++#define CRAT_IOLINK_FLAGS_RESERVED_MASK 0xffffffe0 + + /* + * IO interface types +@@ -240,18 +241,18 @@ struct crat_subtype_ccompute { + #define CRAT_IOLINK_TYPE_UNDEFINED 0 + #define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1 + #define CRAT_IOLINK_TYPE_PCIEXPRESS 2 +-#define CRAT_IOLINK_TYPE_AMBA 3 +-#define CRAT_IOLINK_TYPE_MIPI 4 +-#define CRAT_IOLINK_TYPE_QPI_1_1 5 +-#define CRAT_IOLINK_TYPE_RESERVED1 6 +-#define CRAT_IOLINK_TYPE_RESERVED2 7 +-#define CRAT_IOLINK_TYPE_RAPID_IO 8 +-#define CRAT_IOLINK_TYPE_INFINIBAND 9 +-#define CRAT_IOLINK_TYPE_RESERVED3 10 +-#define CRAT_IOLINK_TYPE_OTHER 11 +-#define CRAT_IOLINK_TYPE_MAX 255 +- +-#define CRAT_IOLINK_RESERVED_LENGTH 24 ++#define CRAT_IOLINK_TYPE_AMBA 3 ++#define CRAT_IOLINK_TYPE_MIPI 4 ++#define CRAT_IOLINK_TYPE_QPI_1_1 5 ++#define CRAT_IOLINK_TYPE_RESERVED1 6 ++#define CRAT_IOLINK_TYPE_RESERVED2 7 ++#define CRAT_IOLINK_TYPE_RAPID_IO 8 ++#define CRAT_IOLINK_TYPE_INFINIBAND 9 ++#define CRAT_IOLINK_TYPE_RESERVED3 10 ++#define CRAT_IOLINK_TYPE_OTHER 11 ++#define CRAT_IOLINK_TYPE_MAX 255 ++ ++#define CRAT_IOLINK_RESERVED_LENGTH 24 + + struct crat_subtype_iolink { + uint8_t type; +@@ -307,16 +308,13 @@ struct cdit_header { + + #pragma pack() + +-struct kfd_dev; +- + #ifdef CONFIG_ACPI + int kfd_create_crat_image_acpi(void **crat_image, size_t *size); + #endif + void kfd_destroy_crat_image(void *crat_image); +-int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, +- uint32_t proximity_domain); ++int kfd_parse_crat_table(void *crat_image, ++ struct list_head *device_list, ++ uint32_t proximity_domain); + int kfd_create_crat_image_virtual(void **crat_image, size_t *size, +- int flags, struct kfd_dev *kdev, +- uint32_t proximity_domain); +- ++ int flags, struct kfd_dev *kdev, uint32_t proximity_domain); + #endif /* KFD_CRAT_H_INCLUDED */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +index ab37d36..232e28f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016-2017 Advanced Micro Devices, Inc. ++ * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -21,8 +21,6 @@ + */ + + #include <linux/debugfs.h> +-#include <linux/uaccess.h> +- + #include "kfd_priv.h" + + static struct dentry *debugfs_root; +@@ -34,38 +32,6 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file) + return single_open(file, show, NULL); + } + +-static ssize_t kfd_debugfs_hang_hws_write(struct file *file, +- const char __user *user_buf, size_t size, loff_t *ppos) +-{ +- struct kfd_dev *dev; +- char tmp[16]; +- uint32_t gpu_id; +- int ret = -EINVAL; +- +- memset(tmp, 0, 16); +- if (size >= 16) { +- pr_err("Invalid input for gpu id.\n"); +- goto out; +- } +- if (copy_from_user(tmp, user_buf, size)) { +- ret = -EFAULT; +- goto out; +- } +- if (kstrtoint(tmp, 10, &gpu_id)) { +- pr_err("Invalid input for gpu id.\n"); +- goto out; +- } +- dev = kfd_device_by_id(gpu_id); +- if (dev) { +- kfd_debugfs_hang_hws(dev); +- ret = size; +- } else +- pr_err("Cannot find device %d.\n", gpu_id); +- +-out: +- return ret; +-} +- + static const struct file_operations kfd_debugfs_fops = { + .owner = THIS_MODULE, + .open = kfd_debugfs_open, +@@ -74,15 +40,6 @@ static const struct file_operations kfd_debugfs_fops = { + .release = single_release, + }; + +-static const struct file_operations kfd_debugfs_hang_hws_fops = { +- .owner = THIS_MODULE, +- .open = kfd_debugfs_open, +- .read = seq_read, +- .write = kfd_debugfs_hang_hws_write, +- .llseek = seq_lseek, +- .release = single_release, +-}; +- + void kfd_debugfs_init(void) + { + struct dentry *ent; +@@ -108,11 +65,6 @@ void kfd_debugfs_init(void) + ent = debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, + kfd_debugfs_rls_by_device, + &kfd_debugfs_fops); +- +- ent = debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root, +- NULL, +- &kfd_debugfs_hang_hws_fops); +- + if (!ent) + pr_warn("Failed to create rls in kfd debugfs\n"); + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +old mode 100644 +new mode 100755 +index 10095087..a9ad2a8 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -27,17 +27,12 @@ + #include "kfd_priv.h" + #include "kfd_device_queue_manager.h" + #include "kfd_pm4_headers_vi.h" +-#include "cwsr_trap_handler.h" ++#include "cwsr_trap_handler_gfx8.asm" ++#include "cwsr_trap_handler_gfx9.asm" + #include "kfd_iommu.h" + + #define MQD_SIZE_ALIGNED 768 +- +-/* +- * kfd_locked is used to lock the kfd driver during suspend or reset +- * once locked, kfd driver will stop any further GPU execution. +- * create process (open) will return -EAGAIN. +- */ +-static atomic_t kfd_locked = ATOMIC_INIT(0); ++static atomic_t kfd_device_suspended = ATOMIC_INIT(0); + + #ifdef KFD_SUPPORT_IOMMU_V2 + static const struct kfd_device_info kaveri_device_info = { +@@ -54,7 +49,6 @@ static const struct kfd_device_info kaveri_device_info = { + .needs_iommu_device = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info carrizo_device_info = { +@@ -71,7 +65,6 @@ static const struct kfd_device_info carrizo_device_info = { + .needs_iommu_device = true, + .needs_pci_atomics = false, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info raven_device_info = { +@@ -87,7 +80,6 @@ static const struct kfd_device_info raven_device_info = { + .needs_iommu_device = true, + .needs_pci_atomics = true, + .num_sdma_engines = 1, +- .num_sdma_queues_per_engine = 2, + }; + #endif + +@@ -105,7 +97,6 @@ static const struct kfd_device_info hawaii_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info tonga_device_info = { +@@ -121,7 +112,6 @@ static const struct kfd_device_info tonga_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info tonga_vf_device_info = { +@@ -137,7 +127,6 @@ static const struct kfd_device_info tonga_vf_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info fiji_device_info = { +@@ -153,7 +142,6 @@ static const struct kfd_device_info fiji_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info fiji_vf_device_info = { +@@ -169,7 +157,6 @@ static const struct kfd_device_info fiji_vf_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + +@@ -186,7 +173,6 @@ static const struct kfd_device_info polaris10_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info polaris10_vf_device_info = { +@@ -202,7 +188,6 @@ static const struct kfd_device_info polaris10_vf_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info polaris11_device_info = { +@@ -218,7 +203,6 @@ static const struct kfd_device_info polaris11_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info vega10_device_info = { +@@ -232,9 +216,8 @@ static const struct kfd_device_info vega10_device_info = { + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, +- .needs_pci_atomics = false, ++ .needs_pci_atomics = true, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, + }; + + static const struct kfd_device_info vega10_vf_device_info = { +@@ -250,23 +233,6 @@ static const struct kfd_device_info vega10_vf_device_info = { + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 2, +-}; +- +-static const struct kfd_device_info vega20_device_info = { +- .asic_family = CHIP_VEGA20, +- .max_pasid_bits = 16, +- .max_no_of_hqd = 24, +- .doorbell_size = 8, +- .ih_ring_entry_size = 8 * sizeof(uint32_t), +- .event_interrupt_class = &event_interrupt_class_v9, +- .num_of_watch_points = 4, +- .mqd_size_aligned = MQD_SIZE_ALIGNED, +- .supports_cwsr = true, +- .needs_iommu_device = false, +- .needs_pci_atomics = true, +- .num_sdma_engines = 2, +- .num_sdma_queues_per_engine = 8, + }; + + struct kfd_deviceid { +@@ -317,35 +283,35 @@ static const struct kfd_deviceid supported_devices[] = { + { 0x67B9, &hawaii_device_info }, /* Hawaii */ + { 0x67BA, &hawaii_device_info }, /* Hawaii */ + { 0x67BE, &hawaii_device_info }, /* Hawaii */ +- { 0x6920, &tonga_device_info }, /* Tonga */ +- { 0x6921, &tonga_device_info }, /* Tonga */ +- { 0x6928, &tonga_device_info }, /* Tonga */ +- { 0x6929, &tonga_device_info }, /* Tonga */ +- { 0x692B, &tonga_device_info }, /* Tonga */ +- { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ +- { 0x6938, &tonga_device_info }, /* Tonga */ +- { 0x6939, &tonga_device_info }, /* Tonga */ +- { 0x7300, &fiji_device_info }, /* Fiji */ +- { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ +- { 0x67C0, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C1, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C2, &polaris10_device_info }, /* Polaris10 */ ++ { 0x6920, &tonga_device_info }, /* Tonga */ ++ { 0x6921, &tonga_device_info }, /* Tonga */ ++ { 0x6928, &tonga_device_info }, /* Tonga */ ++ { 0x6929, &tonga_device_info }, /* Tonga */ ++ { 0x692B, &tonga_device_info }, /* Tonga */ ++ { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ ++ { 0x6938, &tonga_device_info }, /* Tonga */ ++ { 0x6939, &tonga_device_info }, /* Tonga */ ++ { 0x7300, &fiji_device_info }, /* Fiji */ ++ { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ ++ { 0x67C0, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67C1, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67C2, &polaris10_device_info }, /* Polaris10 */ + { 0x67C4, &polaris10_device_info }, /* Polaris10 */ + { 0x67C7, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C8, &polaris10_device_info }, /* Polaris10 */ +- { 0x67C9, &polaris10_device_info }, /* Polaris10 */ +- { 0x67CA, &polaris10_device_info }, /* Polaris10 */ +- { 0x67CC, &polaris10_device_info }, /* Polaris10 */ +- { 0x67CF, &polaris10_device_info }, /* Polaris10 */ +- { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ ++ { 0x67C8, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67C9, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67CA, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67CC, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67CF, &polaris10_device_info }, /* Polaris10 */ ++ { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ + { 0x67DF, &polaris10_device_info }, /* Polaris10 */ +- { 0x67E0, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E1, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E0, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E1, &polaris11_device_info }, /* Polaris11 */ + { 0x67E3, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E7, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E8, &polaris11_device_info }, /* Polaris11 */ +- { 0x67E9, &polaris11_device_info }, /* Polaris11 */ +- { 0x67EB, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E7, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E8, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67E9, &polaris11_device_info }, /* Polaris11 */ ++ { 0x67EB, &polaris11_device_info }, /* Polaris11 */ + { 0x67EF, &polaris11_device_info }, /* Polaris11 */ + { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6860, &vega10_device_info }, /* Vega10 */ +@@ -357,12 +323,6 @@ static const struct kfd_deviceid supported_devices[] = { + { 0x6868, &vega10_device_info }, /* Vega10 */ + { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ + { 0x687F, &vega10_device_info }, /* Vega10 */ +- { 0x66a0, &vega20_device_info }, /* Vega20 */ +- { 0x66a1, &vega20_device_info }, /* Vega20 */ +- { 0x66a2, &vega20_device_info }, /* Vega20 */ +- { 0x66a3, &vega20_device_info }, /* Vega20 */ +- { 0x66a7, &vega20_device_info }, /* Vega20 */ +- { 0x66af, &vega20_device_info } /* Vega20 */ + }; + + static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, +@@ -392,7 +352,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) + { + struct kfd_dev *kfd; +- int ret; ++ + const struct kfd_device_info *device_info = + lookup_device_info(pdev->device); + +@@ -400,27 +360,24 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + dev_err(kfd_device, "kgd2kfd_probe failed\n"); + return NULL; + } +- ++ ++ if (device_info->needs_pci_atomics) { ++ /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. ++ * 32 and 64-bit requests are possible and must be ++ * supported. ++ */ ++ if (pci_enable_atomic_ops_to_root(pdev) < 0) { ++ dev_info(kfd_device, ++ "skipped device %x:%x, PCI rejects atomics", ++ pdev->vendor, pdev->device); ++ return NULL; ++ } ++ } ++ + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); + if (!kfd) + return NULL; + +- /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. +- * 32 and 64-bit requests are possible and must be +- * supported. +- */ +- ret = pci_enable_atomic_ops_to_root(pdev, +- PCI_EXP_DEVCAP2_ATOMIC_COMP32 | +- PCI_EXP_DEVCAP2_ATOMIC_COMP64); +- if (device_info->needs_pci_atomics && ret < 0) { +- dev_info(kfd_device, +- "skipped device %x:%x, PCI rejects atomics", +- pdev->vendor, pdev->device); +- kfree(kfd); +- return NULL; +- } else if (!ret) +- kfd->pci_atomic_requested = true; +- + kfd->kgd = kgd; + kfd->device_info = device_info; + kfd->pdev = pdev; +@@ -462,6 +419,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + KGD_ENGINE_SDMA1); + kfd->shared_resources = *gpu_resources; + ++ /* Usually first_vmid_kfd = 8, last_vmid_kfd = 15 */ + kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; + kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd +@@ -498,8 +456,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + + if (kfd->kfd2kgd->init_gtt_mem_allocation( + kfd->kgd, size, &kfd->gtt_mem, +- &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, +- false)) { ++ &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){ + dev_err(kfd_device, "Could not allocate %d bytes\n", size); + goto out; + } +@@ -592,52 +549,21 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) + + int kgd2kfd_pre_reset(struct kfd_dev *kfd) + { +- if (!kfd->init_complete) +- return 0; +- kgd2kfd_suspend(kfd); +- +- /* hold dqm->lock to prevent further execution*/ +- mutex_lock(&kfd->dqm->lock); +- +- kfd_signal_reset_event(kfd); + return 0; + } + +-/* +- * Fix me. KFD won't be able to resume existing process for now. +- * We will keep all existing process in a evicted state and +- * wait the process to be terminated. +- */ +- + int kgd2kfd_post_reset(struct kfd_dev *kfd) + { +- int ret, count; +- +- if (!kfd->init_complete) +- return 0; +- +- mutex_unlock(&kfd->dqm->lock); +- +- ret = kfd_resume(kfd); +- if (ret) +- return ret; +- count = atomic_dec_return(&kfd_locked); +- WARN_ONCE(count != 0, "KFD reset ref. error"); + return 0; + } + +-bool kfd_is_locked(void) +-{ +- return (atomic_read(&kfd_locked) > 0); +-} +- + void kgd2kfd_suspend(struct kfd_dev *kfd) + { + if (!kfd->init_complete) + return; + + /* For first KFD device suspend all the KFD processes */ +- if (atomic_inc_return(&kfd_locked) == 1) ++ if (atomic_inc_return(&kfd_device_suspended) == 1) + kfd_suspend_all_processes(); + + kfd->dqm->ops.stop(kfd->dqm); +@@ -656,7 +582,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd) + if (ret) + return ret; + +- count = atomic_dec_return(&kfd_locked); ++ count = atomic_dec_return(&kfd_device_suspended); + WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); + if (count == 0) + ret = kfd_resume_all_processes(); +@@ -704,19 +630,19 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) + + spin_lock(&kfd->interrupt_lock); + +- if (kfd->interrupts_active +- && interrupt_is_wanted(kfd, ih_ring_entry, +- patched_ihre, &is_patched) ++ if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry, ++ patched_ihre, &is_patched) + && enqueue_ih_ring_entry(kfd, +- is_patched ? patched_ihre : ih_ring_entry)) ++ is_patched ? patched_ihre : ih_ring_entry)) + queue_work(kfd->ih_wq, &kfd->interrupt_work); + + spin_unlock(&kfd->interrupt_lock); + } + +-int kgd2kfd_quiesce_mm(struct mm_struct *mm) ++int kgd2kfd_quiesce_mm(struct kfd_dev *kfd, struct mm_struct *mm) + { + struct kfd_process *p; ++ struct kfd_process_device *pdd; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed +@@ -725,17 +651,26 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm) + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) +- return -ESRCH; ++ return -ENODEV; + +- r = kfd_process_evict_queues(p); ++ if (kfd) { ++ r = -ENODEV; ++ pdd = kfd_get_process_device_data(kfd, p); ++ if (pdd) ++ r = kfd->dqm->ops.evict_process_queues(kfd->dqm, ++ &pdd->qpd); ++ } else { ++ r = kfd_process_evict_queues(p); ++ } + + kfd_unref_process(p); + return r; + } + +-int kgd2kfd_resume_mm(struct mm_struct *mm) ++int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm) + { + struct kfd_process *p; ++ struct kfd_process_device *pdd; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed +@@ -744,9 +679,17 @@ int kgd2kfd_resume_mm(struct mm_struct *mm) + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) +- return -ESRCH; ++ return -ENODEV; + +- r = kfd_process_restore_queues(p); ++ if (kfd) { ++ r = -ENODEV; ++ pdd = kfd_get_process_device_data(kfd, p); ++ if (pdd) ++ r = kfd->dqm->ops.restore_process_queues(kfd->dqm, ++ &pdd->qpd); ++ } else { ++ r = kfd_process_restore_queues(p); ++ } + + kfd_unref_process(p); + return r; +@@ -981,26 +924,3 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) + kfree(mem_obj); + return 0; + } +- +-#if defined(CONFIG_DEBUG_FS) +- +-/* This function will send a package to HIQ to hang the HWS +- * which will trigger a GPU reset and bring the HWS back to normal state +- */ +-int kfd_debugfs_hang_hws(struct kfd_dev *dev) +-{ +- int r = 0; +- +- if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { +- pr_err("HWS is not enabled"); +- return -EINVAL; +- } +- +- r = pm_debugfs_hang_hws(&dev->dqm->packets); +- if (!r) +- r = dqm_debugfs_execute_queues(dev->dqm); +- +- return r; +-} +- +-#endif +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index ae6f7d8..8c04f7a2 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -21,11 +21,10 @@ + * + */ + +-#include <linux/ratelimit.h> +-#include <linux/printk.h> + #include <linux/slab.h> + #include <linux/list.h> + #include <linux/types.h> ++#include <linux/printk.h> + #include <linux/bitops.h> + #include <linux/sched.h> + #include "kfd_priv.h" +@@ -61,8 +60,6 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + static void deallocate_sdma_queue(struct device_queue_manager *dqm, + unsigned int sdma_queue_id); + +-static void kfd_process_hw_exception(struct work_struct *work); +- + static inline + enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) + { +@@ -109,7 +106,7 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) + { + return dqm->dev->device_info->num_sdma_engines +- * dqm->dev->device_info->num_sdma_queues_per_engine; ++ * KFD_SDMA_QUEUES_PER_ENGINE; + } + + void program_sh_mem_settings(struct device_queue_manager *dqm, +@@ -200,7 +197,7 @@ static int allocate_vmid(struct device_queue_manager *dqm, + dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, + qpd->vmid, + qpd->page_table_base); +- /* invalidate the VM context after pasid and vmid mapping is set up */ ++ /*invalidate the VM context after pasid and vmid mapping is set up*/ + kfd_flush_tlb(qpd_to_pdd(qpd)); + + return 0; +@@ -209,19 +206,16 @@ static int allocate_vmid(struct device_queue_manager *dqm, + static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, + struct qcm_process_device *qpd) + { +- const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; +- int ret; ++ uint32_t len; + + if (!qpd->ib_kaddr) + return -ENOMEM; + +- ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); +- if (ret) +- return ret; ++ len = qpd->dqm->packets.pmf->release_mem(qpd->ib_base, ++ (uint32_t *)qpd->ib_kaddr); + + return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, +- qpd->ib_base, (uint32_t *)qpd->ib_kaddr, +- pmf->release_mem_size / sizeof(uint32_t)); ++ qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); + } + + static void deallocate_vmid(struct device_queue_manager *dqm, +@@ -290,6 +284,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, + if (retval) { + if (list_empty(&qpd->queues_list)) + deallocate_vmid(dqm, qpd, q); ++ + goto out_unlock; + } + +@@ -359,10 +354,10 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { + int retval; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); +- if (!mqd_mgr) ++ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); ++ if (!mqd) + return -ENOMEM; + + retval = allocate_hqd(dqm, q); +@@ -373,7 +368,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + if (retval) + goto out_deallocate_hqd; + +- retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, ++ retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval) + goto out_deallocate_doorbell; +@@ -387,15 +382,15 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + if (!q->properties.is_active) + return 0; + +- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, +- &q->properties, q->process->mm); ++ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties, ++ q->process->mm); + if (retval) + goto out_uninit_mqd; + + return 0; + + out_uninit_mqd: +- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); ++ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + out_deallocate_doorbell: + deallocate_doorbell(qpd, q); + out_deallocate_hqd: +@@ -412,11 +407,11 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, + struct queue *q) + { + int retval; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, ++ mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); +- if (!mqd_mgr) ++ if (!mqd) + return -ENOMEM; + + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { +@@ -433,14 +428,14 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, + + deallocate_doorbell(qpd, q); + +- retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, ++ retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, + KFD_UNMAP_LATENCY_MS, + q->pipe, q->queue); + if (retval == -ETIME) + qpd->reset_wavefronts = true; + +- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); ++ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + + list_del(&q->list); + if (list_empty(&qpd->queues_list)) { +@@ -480,19 +475,21 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + static int update_queue(struct device_queue_manager *dqm, struct queue *q) + { + int retval; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + struct kfd_process_device *pdd; ++ + bool prev_active = false; + + mutex_lock(&dqm->lock); ++ + pdd = kfd_get_process_device_data(q->device, q->process); + if (!pdd) { + retval = -ENODEV; + goto out_unlock; + } +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, ++ mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); +- if (!mqd_mgr) { ++ if (!mqd) { + retval = -ENOMEM; + goto out_unlock; + } +@@ -500,7 +497,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ +- if (pdd->qpd.evicted) ++ if (pdd->qpd.evicted > 0) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); +@@ -519,7 +516,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + } else if (prev_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) { +- retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, ++ retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); + if (retval) { +@@ -528,7 +525,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + } + } + +- retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); ++ retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + + /* + * check active state vs. the previous state and modify +@@ -546,7 +543,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + else if (q->properties.is_active && + (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || + q->properties.type == KFD_QUEUE_TYPE_SDMA)) +- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, ++ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, + &q->properties, q->process->mm); + + out_unlock: +@@ -557,29 +554,29 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + static struct mqd_manager *get_mqd_manager( + struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) + { +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; + + pr_debug("mqd type %d\n", type); + +- mqd_mgr = dqm->mqd_mgrs[type]; +- if (!mqd_mgr) { +- mqd_mgr = mqd_manager_init(type, dqm->dev); +- if (!mqd_mgr) ++ mqd = dqm->mqds[type]; ++ if (!mqd) { ++ mqd = mqd_manager_init(type, dqm->dev); ++ if (!mqd) + pr_err("mqd manager is NULL"); +- dqm->mqd_mgrs[type] = mqd_mgr; ++ dqm->mqds[type] = mqd; + } + +- return mqd_mgr; ++ return mqd; + } + + static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { + struct queue *q; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + struct kfd_process_device *pdd; + int retval = 0; + +@@ -595,16 +592,16 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_active) + continue; +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, ++ mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); +- if (!mqd_mgr) { /* should not be here */ ++ if (!mqd) { /* should not be here */ + pr_err("Cannot evict queue, mqd mgr is NULL\n"); + retval = -ENOMEM; + goto out; + } + q->properties.is_evicted = true; + q->properties.is_active = false; +- retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, ++ retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, + KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); + if (retval) +@@ -654,9 +651,9 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { + struct queue *q; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + struct kfd_process_device *pdd; +- uint64_t pd_base; ++ uint32_t pd_base; + int retval = 0; + + pdd = qpd_to_pdd(qpd); +@@ -676,7 +673,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; +- pr_debug("Updated PD address to 0x%llx\n", pd_base); ++ pr_debug("Updated PD address to 0x%08x\n", pd_base); + + if (!list_empty(&qpd->queues_list)) { + dqm->dev->kfd2kgd->set_vm_context_page_table_base( +@@ -690,16 +687,16 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, + list_for_each_entry(q, &qpd->queues_list, list) { + if (!q->properties.is_evicted) + continue; +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, ++ mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); +- if (!mqd_mgr) { /* should not be here */ ++ if (!mqd) { /* should not be here */ + pr_err("Cannot restore queue, mqd mgr is NULL\n"); + retval = -ENOMEM; + goto out; + } + q->properties.is_evicted = false; + q->properties.is_active = true; +- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, ++ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, + q->queue, &q->properties, + q->process->mm); + if (retval) +@@ -717,7 +714,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, + { + struct queue *q; + struct kfd_process_device *pdd; +- uint64_t pd_base; ++ uint32_t pd_base; + int retval = 0; + + pdd = qpd_to_pdd(qpd); +@@ -737,7 +734,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; +- pr_debug("Updated PD address to 0x%llx\n", pd_base); ++ pr_debug("Updated PD address to 0x%08x\n", pd_base); + + /* activate all active queues on the qpd */ + list_for_each_entry(q, &qpd->queues_list, list) { +@@ -760,9 +757,9 @@ static int register_process(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) + { + struct device_process_node *n; +- struct kfd_process_device *pdd; +- uint64_t pd_base; + int retval; ++ struct kfd_process_device *pdd; ++ uint32_t pd_base; + + n = kzalloc(sizeof(*n), GFP_KERNEL); + if (!n) +@@ -779,7 +776,7 @@ static int register_process(struct device_queue_manager *dqm, + + /* Update PD Base in QPD */ + qpd->page_table_base = pd_base; +- pr_debug("Updated PD address to 0x%llx\n", pd_base); ++ pr_debug("Updated PD address to 0x%08x\n", pd_base); + + retval = dqm->asic_ops.update_qpd(dqm, qpd); + +@@ -880,7 +877,7 @@ static void uninitialize(struct device_queue_manager *dqm) + + kfree(dqm->allocated_queues); + for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) +- kfree(dqm->mqd_mgrs[i]); ++ kfree(dqm->mqds[i]); + mutex_destroy(&dqm->lock); + kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); + } +@@ -888,7 +885,7 @@ static void uninitialize(struct device_queue_manager *dqm) + static int start_nocpsch(struct device_queue_manager *dqm) + { + init_interrupts(dqm); +- return pm_init(&dqm->packets, dqm); ++ return pm_init(&dqm->packets, dqm, dqm->dev->mec_fw_version); + } + + static int stop_nocpsch(struct device_queue_manager *dqm) +@@ -924,11 +921,11 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) + { +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + int retval; + +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); +- if (!mqd_mgr) ++ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); ++ if (!mqd) + return -ENOMEM; + + retval = allocate_sdma_queue(dqm, &q->sdma_id); +@@ -947,20 +944,19 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); + + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); +- retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, ++ retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval) + goto out_deallocate_doorbell; + +- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, +- NULL); ++ retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); + if (retval) + goto out_uninit_mqd; + + return 0; + + out_uninit_mqd: +- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); ++ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + out_deallocate_doorbell: + deallocate_doorbell(qpd, q); + out_deallocate_sdma_queue: +@@ -1025,8 +1021,6 @@ static int initialize_cpsch(struct device_queue_manager *dqm) + dqm->active_runlist = false; + dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + +- INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); +- + return 0; + } + +@@ -1036,7 +1030,7 @@ static int start_cpsch(struct device_queue_manager *dqm) + + retval = 0; + +- retval = pm_init(&dqm->packets, dqm); ++ retval = pm_init(&dqm->packets, dqm, dqm->dev->mec_fw_version); + if (retval) + goto fail_packet_manager_init; + +@@ -1059,8 +1053,6 @@ static int start_cpsch(struct device_queue_manager *dqm) + init_interrupts(dqm); + + mutex_lock(&dqm->lock); +- /* clear hang status when driver try to start the hw scheduler */ +- dqm->is_hws_hang = false; + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); + mutex_unlock(&dqm->lock); + +@@ -1075,7 +1067,9 @@ static int start_cpsch(struct device_queue_manager *dqm) + static int stop_cpsch(struct device_queue_manager *dqm) + { + mutex_lock(&dqm->lock); ++ + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); ++ + mutex_unlock(&dqm->lock); + + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); +@@ -1136,7 +1130,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) + { + int retval; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + + retval = 0; + +@@ -1163,10 +1157,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + if (retval) + goto out_deallocate_sdma_queue; + +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, ++ mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + +- if (!mqd_mgr) { ++ if (!mqd) { + retval = -ENOMEM; + goto out_deallocate_doorbell; + } +@@ -1183,7 +1177,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; +- retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, ++ retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval) + goto out_deallocate_doorbell; +@@ -1230,13 +1224,6 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + while (*fence_addr != fence_value) { + if (time_after(jiffies, end_jiffies)) { + pr_err("qcm fence wait loop timeout expired\n"); +- /* In HWS case, this is used to halt the driver thread +- * in order not to mess up CP states before doing +- * scandumps for FW debugging. +- */ +- while (halt_if_hws_hang) +- schedule(); +- + return -ETIME; + } + schedule(); +@@ -1281,8 +1268,6 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, + { + int retval = 0; + +- if (dqm->is_hws_hang) +- return -EIO; + if (!dqm->active_runlist) + return retval; + +@@ -1321,13 +1306,9 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, + { + int retval; + +- if (dqm->is_hws_hang) +- return -EIO; + retval = unmap_queues_cpsch(dqm, filter, filter_param); + if (retval) { + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); +- dqm->is_hws_hang = true; +- schedule_work(&dqm->hw_exception_work); + return retval; + } + +@@ -1339,7 +1320,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + struct queue *q) + { + int retval; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + bool preempt_all_queues; + + preempt_all_queues = false; +@@ -1359,9 +1340,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + + } + +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, ++ mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); +- if (!mqd_mgr) { ++ if (!mqd) { + retval = -ENOMEM; + goto failed; + } +@@ -1382,7 +1363,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, + if (retval == -ETIME) + qpd->reset_wavefronts = true; + +- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); ++ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + + /* + * Unconditionally decrement this counter, regardless of the queue's +@@ -1531,7 +1512,7 @@ static int get_wave_state(struct device_queue_manager *dqm, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) + { +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + int r; + + mutex_lock(&dqm->lock); +@@ -1542,19 +1523,19 @@ static int get_wave_state(struct device_queue_manager *dqm, + goto dqm_unlock; + } + +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); +- if (!mqd_mgr) { ++ mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); ++ if (!mqd) { + r = -ENOMEM; + goto dqm_unlock; + } + +- if (!mqd_mgr->get_wave_state) { ++ if (!mqd->get_wave_state) { + r = -EINVAL; + goto dqm_unlock; + } + +- r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, +- ctl_stack_used_size, save_area_used_size); ++ r = mqd->get_wave_state(mqd, q->mqd, ctl_stack, ctl_stack_used_size, ++ save_area_used_size); + + dqm_unlock: + mutex_unlock(&dqm->lock); +@@ -1567,7 +1548,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + int retval; + struct queue *q, *next; + struct kernel_queue *kq, *kq_next; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + struct device_process_node *cur, *next_dpn; + enum kfd_unmap_queues_filter filter = + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; +@@ -1609,7 +1590,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + } + + retval = execute_queues_cpsch(dqm, filter, 0); +- if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { ++ if (retval || qpd->reset_wavefronts) { + pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); + dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); + qpd->reset_wavefronts = false; +@@ -1617,15 +1598,15 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, + + /* lastly, free mqd resources */ + list_for_each_entry_safe(q, next, &qpd->queues_list, list) { +- mqd_mgr = dqm->ops.get_mqd_manager(dqm, ++ mqd = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); +- if (!mqd_mgr) { ++ if (!mqd) { + retval = -ENOMEM; + goto out; + } + list_del(&q->list); + qpd->queue_count--; +- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); ++ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + } + + out: +@@ -1644,13 +1625,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) + return NULL; + + switch (dev->device_info->asic_family) { +- /* HWS is not available on Hawaii. */ + case CHIP_HAWAII: +- /* HWS depends on CWSR for timely dequeue. CWSR is not +- * available on Tonga. +- * +- * FIXME: This argument also applies to Kaveri. +- */ + case CHIP_TONGA: + dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; + break; +@@ -1729,9 +1704,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) + break; + + case CHIP_VEGA10: +- case CHIP_VEGA20: + case CHIP_RAVEN: +- device_queue_manager_init_v9(&dqm->asic_ops); ++ device_queue_manager_init_v9_vega10(&dqm->asic_ops); + break; + default: + WARN(1, "Unexpected ASIC family %u", +@@ -1770,13 +1744,6 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, + return ret; + } + +-static void kfd_process_hw_exception(struct work_struct *work) +-{ +- struct device_queue_manager *dqm = container_of(work, +- struct device_queue_manager, hw_exception_work); +- dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); +-} +- + #if defined(CONFIG_DEBUG_FS) + + static void seq_reg_dump(struct seq_file *m, +@@ -1841,9 +1808,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) + } + + for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { +- for (queue = 0; +- queue < dqm->dev->device_info->num_sdma_queues_per_engine; +- queue++) { ++ for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) { + r = dqm->dev->kfd2kgd->hqd_sdma_dump( + dqm->dev->kgd, pipe, queue, &dump, &n_regs); + if (r) +@@ -1860,16 +1825,4 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) + return r; + } + +-int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) +-{ +- int r = 0; +- +- mutex_lock(&dqm->lock); +- dqm->active_runlist = true; +- r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); +- mutex_unlock(&dqm->lock); +- +- return r; +-} +- + #endif +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +index 1c4ef00..978458a 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +@@ -31,6 +31,7 @@ + + #define KFD_UNMAP_LATENCY_MS (4000) + #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000) ++#define KFD_SDMA_QUEUES_PER_ENGINE (2) + + struct device_process_node { + struct qcm_process_device *qpd; +@@ -174,7 +175,7 @@ struct device_queue_manager { + struct device_queue_manager_ops ops; + struct device_queue_manager_asic_ops asic_ops; + +- struct mqd_manager *mqd_mgrs[KFD_MQD_TYPE_MAX]; ++ struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; + struct packet_manager packets; + struct kfd_dev *dev; + struct mutex lock; +@@ -194,10 +195,6 @@ struct device_queue_manager { + struct kfd_mem_obj *fence_mem; + bool active_runlist; + int sched_policy; +- +- /* hw exception */ +- bool is_hws_hang; +- struct work_struct hw_exception_work; + }; + + void device_queue_manager_init_cik( +@@ -208,7 +205,7 @@ void device_queue_manager_init_vi( + struct device_queue_manager_asic_ops *asic_ops); + void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops); +-void device_queue_manager_init_v9( ++void device_queue_manager_init_v9_vega10( + struct device_queue_manager_asic_ops *asic_ops); + void program_sh_mem_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +@@ -217,11 +214,18 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); + unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); + ++int process_evict_queues(struct device_queue_manager *dqm, ++ struct qcm_process_device *qpd); ++int process_restore_queues(struct device_queue_manager *dqm, ++ struct qcm_process_device *qpd); ++ ++ + static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) + { + return (pdd->lds_base >> 16) & 0xFF; + } + ++/* This function is only useful for GFXv7 and v8 */ + static inline unsigned int + get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) + { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +index 4175153..6198bf2 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016-2018 Advanced Micro Devices, Inc. ++ * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -32,7 +32,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, + static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); + +-void device_queue_manager_init_v9( ++void device_queue_manager_init_v9_vega10( + struct device_queue_manager_asic_ops *asic_ops) + { + asic_ops->update_qpd = update_qpd_v9; +@@ -60,7 +60,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; +- if (noretry && ++ if (vega10_noretry && + !dqm->dev->device_info->needs_iommu_device) + qpd->sh_mem_config |= + 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +index fd60a11..030b014 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +@@ -33,30 +33,26 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); ++static int update_qpd_vi(struct device_queue_manager *dqm, ++ struct qcm_process_device *qpd); ++static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, ++ struct qcm_process_device *qpd); ++ ++/* ++ * Tonga device queue manager functions ++ */ + static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); +-static int update_qpd_vi(struct device_queue_manager *dqm, +- struct qcm_process_device *qpd); + static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +-static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, +- struct qcm_process_device *qpd); + static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); + +-void device_queue_manager_init_vi( +- struct device_queue_manager_asic_ops *asic_ops) +-{ +- asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; +- asic_ops->update_qpd = update_qpd_vi; +- asic_ops->init_sdma_vm = init_sdma_vm; +-} +- + void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops) + { +@@ -65,6 +61,15 @@ void device_queue_manager_init_vi_tonga( + asic_ops->init_sdma_vm = init_sdma_vm_tonga; + } + ++ ++void device_queue_manager_init_vi( ++ struct device_queue_manager_asic_ops *asic_ops) ++{ ++ asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; ++ asic_ops->update_qpd = update_qpd_vi; ++ asic_ops->init_sdma_vm = init_sdma_vm; ++} ++ + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) + { + /* In 64-bit mode, we can only control the top 3 bits of the LDS, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +index ebe79bf..fc41689 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +@@ -115,7 +115,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) + pr_debug("doorbell aperture size == 0x%08lX\n", + kfd->shared_resources.doorbell_aperture_size); + +- pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); ++ pr_debug("doorbell kernel address == 0x%p\n", kfd->doorbell_kernel_ptr); + + return 0; + } +@@ -188,9 +188,9 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + *doorbell_off = kfd->doorbell_id_offset + inx; + + pr_debug("Get kernel queue doorbell\n" +- " doorbell offset == 0x%08X\n" +- " doorbell index == 0x%x\n", +- *doorbell_off, inx); ++ " doorbell offset == 0x%08X\n" ++ " kernel address == 0x%p\n", ++ *doorbell_off, (kfd->doorbell_kernel_ptr + inx)); + + return kfd->doorbell_kernel_ptr + inx; + } +@@ -199,8 +199,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) + { + unsigned int inx; + +- inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) +- * sizeof(u32) / kfd->device_info->doorbell_size; ++ inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); + + mutex_lock(&kfd->doorbell_mutex); + __clear_bit(inx, kfd->doorbell_available_index); +@@ -211,7 +210,7 @@ void write_kernel_doorbell(void __iomem *db, u32 value) + { + if (db) { + writel(value, db); +- pr_debug("Writing %d to doorbell address %p\n", value, db); ++ pr_debug("Writing %d to doorbell address 0x%p\n", value, db); + } + } + +@@ -221,10 +220,14 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) + WARN(((unsigned long)db & 7) != 0, + "Unaligned 64-bit doorbell"); + writeq(value, (u64 __iomem *)db); +- pr_debug("writing %llu to doorbell address %p\n", value, db); ++ pr_debug("writing %llu to doorbell address 0x%p\n", value, db); + } + } + ++/* ++ * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 ++ * to doorbells with the process's doorbell page ++ */ + unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, + struct kfd_process *process, + unsigned int doorbell_id) +@@ -236,8 +239,7 @@ unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, + * units regardless of the ASIC-dependent doorbell size. + */ + return kfd->doorbell_id_offset + +- process->doorbell_index +- * kfd_doorbell_process_slice(kfd) / sizeof(u32) + ++ process->doorbell_index * (kfd_doorbell_process_slice(kfd)/sizeof(u32)) + + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index 1dc1584..a92ca78 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -51,8 +51,8 @@ struct kfd_event_waiter { + */ + struct kfd_signal_page { + uint64_t *kernel_address; ++ uint64_t handle; + uint64_t __user *user_address; +- bool need_to_free_pages; + }; + + +@@ -80,7 +80,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) + KFD_SIGNAL_EVENT_LIMIT * 8); + + page->kernel_address = backing_store; +- page->need_to_free_pages = true; + pr_debug("Allocated new event signal page at %p, for process %p\n", + page, p); + +@@ -100,17 +99,9 @@ static int allocate_event_notification_slot(struct kfd_process *p, + p->signal_page = allocate_signal_page(p); + if (!p->signal_page) + return -ENOMEM; +- /* Oldest user mode expects 256 event slots */ +- p->signal_mapped_size = 256*8; + } + +- /* +- * Compatibility with old user mode: Only use signal slots +- * user mode has mapped, may be less than +- * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase +- * of the event limit without breaking user mode. +- */ +- id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, ++ id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT, + GFP_KERNEL); + if (id < 0) + return id; +@@ -121,6 +112,29 @@ static int allocate_event_notification_slot(struct kfd_process *p, + return 0; + } + ++static struct kfd_signal_page *allocate_signal_page_dgpu( ++ struct kfd_process *p, uint64_t *kernel_address, uint64_t handle) ++{ ++ struct kfd_signal_page *my_page; ++ ++ my_page = kzalloc(sizeof(*my_page), GFP_KERNEL); ++ if (!my_page) ++ return NULL; ++ ++ /* Initialize all events to unsignaled */ ++ memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT, ++ KFD_SIGNAL_EVENT_LIMIT * 8); ++ ++ my_page->kernel_address = kernel_address; ++ my_page->handle = handle; ++ my_page->user_address = NULL; ++ ++ pr_debug("Allocated new event signal page at %p, for process %p\n", ++ my_page, p); ++ ++ return my_page; ++} ++ + /* + * Assumes that p->event_mutex is held and of course that p is not going + * away (current or locked). +@@ -184,8 +198,7 @@ static int create_signal_event(struct file *devkfd, + { + int ret; + +- if (p->signal_mapped_size && +- p->signal_event_count == p->signal_mapped_size / 8) { ++ if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { + if (!p->signal_event_limit_reached) { + pr_warn("Signal event wasn't created because limit was reached\n"); + p->signal_event_limit_reached = true; +@@ -271,9 +284,9 @@ static void shutdown_signal_page(struct kfd_process *p) + struct kfd_signal_page *page = p->signal_page; + + if (page) { +- if (page->need_to_free_pages) ++ if (page->user_address) + free_pages((unsigned long)page->kernel_address, +- get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); ++ get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); + kfree(page); + } + } +@@ -295,34 +308,11 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev) + return ev->type == KFD_EVENT_TYPE_SIGNAL; + } + +-int kfd_event_page_set(struct kfd_process *p, void *kernel_address, +- uint64_t size) +-{ +- struct kfd_signal_page *page; +- +- if (p->signal_page) +- return -EBUSY; +- +- page = kzalloc(sizeof(*page), GFP_KERNEL); +- if (!page) +- return -ENOMEM; +- +- /* Initialize all events to unsignaled */ +- memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT, +- KFD_SIGNAL_EVENT_LIMIT * 8); +- +- page->kernel_address = kernel_address; +- +- p->signal_page = page; +- p->signal_mapped_size = size; +- +- return 0; +-} +- + int kfd_event_create(struct file *devkfd, struct kfd_process *p, + uint32_t event_type, bool auto_reset, uint32_t node_id, + uint32_t *event_id, uint32_t *event_trigger_data, +- uint64_t *event_page_offset, uint32_t *event_slot_index) ++ uint64_t *event_page_offset, uint32_t *event_slot_index, ++ void *kern_addr) + { + int ret = 0; + struct kfd_event *ev = kzalloc(sizeof(*ev), GFP_KERNEL); +@@ -336,10 +326,19 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, + + init_waitqueue_head(&ev->wq); + +- *event_page_offset = 0; +- + mutex_lock(&p->event_mutex); + ++ if (kern_addr && !p->signal_page) { ++ p->signal_page = allocate_signal_page_dgpu(p, kern_addr, ++ *event_page_offset); ++ if (!p->signal_page) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ } ++ ++ *event_page_offset = 0; ++ + switch (event_type) { + case KFD_EVENT_TYPE_SIGNAL: + case KFD_EVENT_TYPE_DEBUG: +@@ -362,6 +361,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, + kfree(ev); + } + ++out: + mutex_unlock(&p->event_mutex); + + return ret; +@@ -390,11 +390,7 @@ static void set_event(struct kfd_event *ev) + { + struct kfd_event_waiter *waiter; + +- /* Auto reset if the list is non-empty and we're waking +- * someone. waitqueue_active is safe here because we're +- * protected by the p->event_mutex, which is also held when +- * updating the wait queues in kfd_wait_on_events. +- */ ++ /* Auto reset if the list is non-empty and we're waking someone. */ + ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); + + list_for_each_entry(waiter, &ev->wq.head, wait.entry) +@@ -781,12 +777,12 @@ int kfd_wait_on_events(struct kfd_process *p, + + int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) + { ++ + unsigned long pfn; + struct kfd_signal_page *page; +- int ret; + +- /* check required size doesn't exceed the allocated size */ +- if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) < ++ /* check required size is logical */ ++ if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) != + get_order(vma->vm_end - vma->vm_start)) { + pr_err("Event page mmap requested illegal size\n"); + return -EINVAL; +@@ -816,12 +812,8 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) + page->user_address = (uint64_t __user *)vma->vm_start; + + /* mapping the page to user process */ +- ret = remap_pfn_range(vma, vma->vm_start, pfn, ++ return remap_pfn_range(vma, vma->vm_start, pfn, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +- if (!ret) +- p->signal_mapped_size = vma->vm_end - vma->vm_start; +- +- return ret; + } + + /* +@@ -1012,30 +1004,3 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + mutex_unlock(&p->event_mutex); + kfd_unref_process(p); + } +- +-void kfd_signal_reset_event(struct kfd_dev *dev) +-{ +- struct kfd_hsa_hw_exception_data hw_exception_data; +- struct kfd_process *p; +- struct kfd_event *ev; +- unsigned int temp; +- uint32_t id, idx; +- +- /* Whole gpu reset caused by GPU hang , and memory is lost */ +- memset(&hw_exception_data, 0, sizeof(hw_exception_data)); +- hw_exception_data.gpu_id = dev->id; +- hw_exception_data.memory_lost = 1; +- +- idx = srcu_read_lock(&kfd_processes_srcu); +- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { +- mutex_lock(&p->event_mutex); +- id = KFD_FIRST_NONSIGNAL_EVENT_ID; +- idr_for_each_entry_continue(&p->event_idr, ev, id) +- if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { +- ev->hw_exception_data = hw_exception_data; +- set_event(ev); +- } +- mutex_unlock(&p->event_mutex); +- } +- srcu_read_unlock(&kfd_processes_srcu, idx); +-} +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h +index c7ac6c7..abca5bf 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h +@@ -66,7 +66,6 @@ struct kfd_event { + /* type specific data */ + union { + struct kfd_hsa_memory_exception_data memory_exception_data; +- struct kfd_hsa_hw_exception_data hw_exception_data; + }; + }; + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +index 8f123a2..2c00711 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +@@ -289,6 +289,7 @@ + + #define MAKE_LDS_APP_BASE_VI() \ + (((uint64_t)(0x1UL) << 61) + 0x0) ++ + #define MAKE_LDS_APP_LIMIT(base) \ + (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) + +@@ -312,7 +313,17 @@ + #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE) + #define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE) + +-static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) ++int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, ++ uint64_t base, uint64_t limit) ++{ ++ if (base < SVM_USER_BASE) { ++ pr_err("Set dgpu vm base 0x%llx failed.\n", base); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) + { + /* + * node id couldn't be 0 - the three MSB bits of +@@ -321,42 +332,19 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) + pdd->lds_base = MAKE_LDS_APP_BASE_VI(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + +- if (!pdd->dev->device_info->needs_iommu_device) { +- /* dGPUs: SVM aperture starting at 0 +- * with small reserved space for kernel. +- * Set them to CANONICAL addresses. +- */ +- pdd->gpuvm_base = SVM_USER_BASE; +- pdd->gpuvm_limit = +- pdd->dev->shared_resources.gpuvm_size - 1; +- } else { +- /* set them to non CANONICAL addresses, and no SVM is +- * allocated. +- */ +- pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1); +- pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base, +- pdd->dev->shared_resources.gpuvm_size); +- } ++ pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1); ++ pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT( ++ pdd->gpuvm_base, pdd->dev->shared_resources.gpuvm_size); + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + } + +-static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) ++void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) + { + pdd->lds_base = MAKE_LDS_APP_BASE_V9(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + +- /* Raven needs SVM to support graphic handle, etc. Leave the small +- * reserved space before SVM on Raven as well, even though we don't +- * have to. +- * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they +- * are used in Thunk to reserve SVM. +- */ +- pdd->gpuvm_base = SVM_USER_BASE; +- pdd->gpuvm_limit = +- pdd->dev->shared_resources.gpuvm_size - 1; +- + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + } +@@ -377,10 +365,10 @@ int kfd_init_apertures(struct kfd_process *process) + pdd = kfd_create_process_device_data(dev, process); + if (!pdd) { + pr_err("Failed to create process device data\n"); +- return -ENOMEM; ++ return -1; + } + /* +- * For 64 bit process apertures will be statically reserved in ++ * For 64 bit process aperture will be statically reserved in + * the x86_64 non canonical process address space + * amdkfd doesn't currently support apertures for 32 bit process + */ +@@ -400,20 +388,21 @@ int kfd_init_apertures(struct kfd_process *process) + kfd_init_apertures_vi(pdd, id); + break; + case CHIP_VEGA10: +- case CHIP_VEGA20: + case CHIP_RAVEN: + kfd_init_apertures_v9(pdd, id); + break; + default: +- WARN(1, "Unexpected ASIC family %u", +- dev->device_info->asic_family); +- return -EINVAL; ++ pr_err("Unknown chip in kfd_init_apertures\n"); ++ return -1; + } + + if (!dev->device_info->needs_iommu_device) { +- /* dGPUs: the reserved space for kernel +- * before SVM ++ /* dGPUs: SVM aperture starting at 0 ++ * with small reserved space for kernel + */ ++ pdd->gpuvm_base = SVM_USER_BASE; ++ pdd->gpuvm_limit = ++ dev->shared_resources.gpuvm_size - 1; + pdd->qpd.cwsr_base = SVM_CWSR_BASE; + pdd->qpd.ib_base = SVM_IB_BASE; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +index f836897..009d6f4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016-2018 Advanced Micro Devices, Inc. ++ * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -25,43 +25,70 @@ + #include "soc15_int.h" + + ++static uint32_t kfd_get_pasid_from_vmid(struct kfd_dev *dev, uint8_t vmid) ++{ ++ uint32_t pasid = 0; ++ const struct kfd2kgd_calls *f2g = dev->kfd2kgd; ++ ++ if (f2g->get_atc_vmid_pasid_mapping_valid(dev->kgd, vmid)) ++ pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); ++ ++ return pasid; ++} ++ + static bool event_interrupt_isr_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry, + uint32_t *patched_ihre, + bool *patched_flag) + { + uint16_t source_id, client_id, pasid, vmid; +- const uint32_t *data = ih_ring_entry; ++ bool result = false; + +- /* Only handle interrupts from KFD VMIDs */ ++ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); ++ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); ++ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); +- if (vmid < dev->vm_info.first_vmid_kfd || +- vmid > dev->vm_info.last_vmid_kfd) +- return 0; + +- /* If there is no valid PASID, it's likely a firmware bug */ +- pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); +- if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) +- return 0; ++ if (pasid) { ++ const uint32_t *data = ih_ring_entry; + +- source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); +- client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); ++ pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", ++ client_id, source_id, pasid); ++ pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", ++ data[0], data[1], data[2], data[3], ++ data[4], data[5], data[6], data[7]); ++ } ++ ++ if ((vmid >= dev->vm_info.first_vmid_kfd && ++ vmid <= dev->vm_info.last_vmid_kfd) && ++ (source_id == SOC15_INTSRC_CP_END_OF_PIPE || ++ source_id == SOC15_INTSRC_SDMA_TRAP || ++ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || ++ source_id == SOC15_INTSRC_CP_BAD_OPCODE || ++ client_id == SOC15_IH_CLIENTID_VMC || ++ client_id == SOC15_IH_CLIENTID_UTCL2)) { ++ ++ /* ++ * KFD want to handle this INT, but MEC firmware did ++ * not send pasid. Try to get it from vmid mapping ++ * and patch the ih entry. It's a temp workaround. ++ */ ++ WARN_ONCE((!pasid), "Fix me.\n"); ++ if (!pasid) { ++ uint32_t temp = le32_to_cpu(ih_ring_entry[3]); ++ ++ pasid = kfd_get_pasid_from_vmid(dev, vmid); ++ memcpy(patched_ihre, ih_ring_entry, ++ dev->device_info->ih_ring_entry_size); ++ patched_ihre[3] = cpu_to_le32(temp | pasid); ++ *patched_flag = true; ++ } ++ result = pasid ? true : false; ++ } ++ ++ /* Do not process in ISR, just request it to be forwarded to WQ. */ ++ return result; + +- pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", +- client_id, source_id, pasid); +- pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", +- data[0], data[1], data[2], data[3], +- data[4], data[5], data[6], data[7]); +- +- /* Interrupt types we care about: various signals and faults. +- * They will be forwarded to a work queue (see below). +- */ +- return source_id == SOC15_INTSRC_CP_END_OF_PIPE || +- source_id == SOC15_INTSRC_SDMA_TRAP || +- source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || +- source_id == SOC15_INTSRC_CP_BAD_OPCODE || +- client_id == SOC15_IH_CLIENTID_VMC || +- client_id == SOC15_IH_CLIENTID_UTCL2; + } + + static void event_interrupt_wq_v9(struct kfd_dev *dev, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +index 7a61f38..5b798f9 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +@@ -75,8 +75,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd) + } + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { +- dev_err(kfd_device, +- "error required iommu flags ats %i, pri %i, pasid %i\n", ++ dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c +index a53d954..97806ed 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c +@@ -140,7 +140,7 @@ static int kfd_import_dmabuf_create_kfd_bo(struct kfd_dev *dev, + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem, +- va_addr, size, 0, 0, ++ va_addr, size, + ipc_obj); + if (idr_handle < 0) { + r = -EFAULT; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +index e78445d..8cf9d44 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +@@ -59,7 +59,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + switch (type) { + case KFD_QUEUE_TYPE_DIQ: + case KFD_QUEUE_TYPE_HIQ: +- kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, ++ kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm, + KFD_MQD_TYPE_HIQ); + break; + default: +@@ -67,7 +67,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + return false; + } + +- if (!kq->mqd_mgr) ++ if (!kq->mqd) + return false; + + prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); +@@ -131,7 +131,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + kq->queue->device = dev; + kq->queue->process = kfd_get_process(current); + +- retval = kq->mqd_mgr->init_mqd(kq->mqd_mgr, &kq->queue->mqd, ++ retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd, + &kq->queue->mqd_mem_obj, + &kq->queue->gart_mqd_addr, + &kq->queue->properties); +@@ -143,9 +143,9 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + pr_debug("Assigning hiq to hqd\n"); + kq->queue->pipe = KFD_CIK_HIQ_PIPE; + kq->queue->queue = KFD_CIK_HIQ_QUEUE; +- kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd, +- kq->queue->pipe, kq->queue->queue, +- &kq->queue->properties, NULL); ++ kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, ++ kq->queue->queue, &kq->queue->properties, ++ NULL); + } else { + /* allocate fence for DIQ */ + +@@ -183,7 +183,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + static void uninitialize(struct kernel_queue *kq) + { + if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) +- kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, ++ kq->mqd->destroy_mqd(kq->mqd, + kq->queue->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, + KFD_UNMAP_LATENCY_MS, +@@ -192,8 +192,7 @@ static void uninitialize(struct kernel_queue *kq) + else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) + kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); + +- kq->mqd_mgr->uninit_mqd(kq->mqd_mgr, kq->queue->mqd, +- kq->queue->mqd_mem_obj); ++ kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj); + + kfd_gtt_sa_free(kq->dev, kq->rptr_mem); + kfd_gtt_sa_free(kq->dev, kq->wptr_mem); +@@ -316,13 +315,7 @@ static void submit_packet(struct kernel_queue *kq) + + static void rollback_packet(struct kernel_queue *kq) + { +- if (kq->dev->device_info->doorbell_size == 8) { +- kq->pending_wptr64 = *kq->wptr64_kernel; +- kq->pending_wptr = *kq->wptr_kernel % +- (kq->queue->properties.queue_size / 4); +- } else { +- kq->pending_wptr = *kq->wptr_kernel; +- } ++ kq->pending_wptr = *kq->queue->properties.write_ptr; + } + + struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, +@@ -356,7 +349,6 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + break; + + case CHIP_VEGA10: +- case CHIP_VEGA20: + case CHIP_RAVEN: + kernel_queue_init_v9(&kq->ops_asic_specific); + break; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +index 384d7a3..82c94a6 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +@@ -80,7 +80,7 @@ struct kernel_queue { + + /* data */ + struct kfd_dev *dev; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd; + struct queue *queue; + uint64_t pending_wptr64; + uint32_t pending_wptr; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +index 19e54ac..2808422 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +@@ -22,6 +22,8 @@ + */ + + #include "kfd_kernel_queue.h" ++#include "kfd_pm4_headers.h" ++#include "kfd_pm4_opcodes.h" + + static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +@@ -51,3 +53,120 @@ static void submit_packet_cik(struct kernel_queue *kq) + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); + } ++ ++static int pm_map_process_cik(struct packet_manager *pm, uint32_t *buffer, ++ struct qcm_process_device *qpd) ++{ ++ struct pm4_map_process *packet; ++ ++ packet = (struct pm4_map_process *)buffer; ++ ++ memset(buffer, 0, sizeof(struct pm4_map_process)); ++ ++ packet->header.u32all = pm_build_pm4_header(IT_MAP_PROCESS, ++ sizeof(struct pm4_map_process)); ++ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; ++ packet->bitfields2.process_quantum = 1; ++ packet->bitfields2.pasid = qpd->pqm->process->pasid; ++ packet->bitfields3.page_table_base = qpd->page_table_base; ++ packet->bitfields10.gds_size = qpd->gds_size; ++ packet->bitfields10.num_gws = qpd->num_gws; ++ packet->bitfields10.num_oac = qpd->num_oac; ++ packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; ++ ++ packet->sh_mem_config = qpd->sh_mem_config; ++ packet->sh_mem_bases = qpd->sh_mem_bases; ++ packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; ++ packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; ++ ++ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); ++ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); ++ ++ return 0; ++} ++ ++static int pm_map_process_scratch_cik(struct packet_manager *pm, ++ uint32_t *buffer, struct qcm_process_device *qpd) ++{ ++ struct pm4_map_process_scratch_kv *packet; ++ ++ packet = (struct pm4_map_process_scratch_kv *)buffer; ++ ++ memset(buffer, 0, sizeof(struct pm4_map_process_scratch_kv)); ++ ++ packet->header.u32all = pm_build_pm4_header(IT_MAP_PROCESS, ++ sizeof(struct pm4_map_process_scratch_kv)); ++ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; ++ packet->bitfields2.process_quantum = 1; ++ packet->bitfields2.pasid = qpd->pqm->process->pasid; ++ packet->bitfields3.page_table_base = qpd->page_table_base; ++ packet->bitfields14.gds_size = qpd->gds_size; ++ packet->bitfields14.num_gws = qpd->num_gws; ++ packet->bitfields14.num_oac = qpd->num_oac; ++ packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; ++ ++ packet->sh_mem_config = qpd->sh_mem_config; ++ packet->sh_mem_bases = qpd->sh_mem_bases; ++ packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; ++ packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; ++ ++ packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; ++ ++ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); ++ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); ++ ++ return 0; ++} ++ ++static uint32_t pm_get_map_process_packet_size_cik(void) ++{ ++ return sizeof(struct pm4_map_process); ++} ++static uint32_t pm_get_map_process_scratch_packet_size_cik(void) ++{ ++ return sizeof(struct pm4_map_process_scratch_kv); ++} ++ ++ ++static struct packet_manager_funcs kfd_cik_pm_funcs = { ++ .map_process = pm_map_process_cik, ++ .runlist = pm_runlist_vi, ++ .set_resources = pm_set_resources_vi, ++ .map_queues = pm_map_queues_vi, ++ .unmap_queues = pm_unmap_queues_vi, ++ .query_status = pm_query_status_vi, ++ .release_mem = pm_release_mem_vi, ++ .get_map_process_packet_size = pm_get_map_process_packet_size_cik, ++ .get_runlist_packet_size = pm_get_runlist_packet_size_vi, ++ .get_set_resources_packet_size = pm_get_set_resources_packet_size_vi, ++ .get_map_queues_packet_size = pm_get_map_queues_packet_size_vi, ++ .get_unmap_queues_packet_size = pm_get_unmap_queues_packet_size_vi, ++ .get_query_status_packet_size = pm_get_query_status_packet_size_vi, ++ .get_release_mem_packet_size = pm_get_release_mem_packet_size_vi, ++}; ++ ++static struct packet_manager_funcs kfd_cik_scratch_pm_funcs = { ++ .map_process = pm_map_process_scratch_cik, ++ .runlist = pm_runlist_vi, ++ .set_resources = pm_set_resources_vi, ++ .map_queues = pm_map_queues_vi, ++ .unmap_queues = pm_unmap_queues_vi, ++ .query_status = pm_query_status_vi, ++ .release_mem = pm_release_mem_vi, ++ .get_map_process_packet_size = ++ pm_get_map_process_scratch_packet_size_cik, ++ .get_runlist_packet_size = pm_get_runlist_packet_size_vi, ++ .get_set_resources_packet_size = pm_get_set_resources_packet_size_vi, ++ .get_map_queues_packet_size = pm_get_map_queues_packet_size_vi, ++ .get_unmap_queues_packet_size = pm_get_unmap_queues_packet_size_vi, ++ .get_query_status_packet_size = pm_get_query_status_packet_size_vi, ++ .get_release_mem_packet_size = pm_get_release_mem_packet_size_vi, ++}; ++ ++void kfd_pm_func_init_cik(struct packet_manager *pm, uint16_t fw_ver) ++{ ++ if (fw_ver >= KFD_SCRATCH_KV_FW_VER) ++ pm->pmf = &kfd_cik_scratch_pm_funcs; ++ else ++ pm->pmf = &kfd_cik_pm_funcs; ++} +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +index 33830b1..5fe4f60 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016-2018 Advanced Micro Devices, Inc. ++ * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -44,7 +44,7 @@ static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); +- if (retval) ++ if (retval != 0) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; +@@ -71,7 +71,8 @@ static int pm_map_process_v9(struct packet_manager *pm, + uint32_t *buffer, struct qcm_process_device *qpd) + { + struct pm4_mes_map_process *packet; +- uint64_t vm_page_table_base_addr = qpd->page_table_base; ++ uint64_t vm_page_table_base_addr = ++ (uint64_t)(qpd->page_table_base) << 12; + + packet = (struct pm4_mes_map_process *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); +@@ -125,6 +126,7 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + ++ + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); +@@ -293,7 +295,7 @@ static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer, + } + + +-static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) ++static uint32_t pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) + { + struct pm4_mec_release_mem *packet; + +@@ -318,22 +320,58 @@ static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) + + packet->data_lo = 0; + +- return 0; ++ return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int); ++} ++ ++static uint32_t pm_get_map_process_packet_size_v9(void) ++{ ++ return sizeof(struct pm4_mes_map_process); ++} ++ ++static uint32_t pm_get_runlist_packet_size_v9(void) ++{ ++ return sizeof(struct pm4_mes_runlist); ++} ++ ++static uint32_t pm_get_map_queues_packet_size_v9(void) ++{ ++ return sizeof(struct pm4_mes_map_queues); ++} ++ ++static uint32_t pm_get_unmap_queues_packet_size_v9(void) ++{ ++ return sizeof(struct pm4_mes_unmap_queues); ++} ++ ++static uint32_t pm_get_query_status_packet_size_v9(void) ++{ ++ return sizeof(struct pm4_mes_query_status); ++} ++ ++static uint32_t pm_get_release_mem_packet_size_v9(void) ++{ ++ return sizeof(struct pm4_mec_release_mem); + } + +-const struct packet_manager_funcs kfd_v9_pm_funcs = { +- .map_process = pm_map_process_v9, +- .runlist = pm_runlist_v9, +- .set_resources = pm_set_resources_vi, +- .map_queues = pm_map_queues_v9, +- .unmap_queues = pm_unmap_queues_v9, +- .query_status = pm_query_status_v9, +- .release_mem = pm_release_mem_v9, +- .map_process_size = sizeof(struct pm4_mes_map_process), +- .runlist_size = sizeof(struct pm4_mes_runlist), +- .set_resources_size = sizeof(struct pm4_mes_set_resources), +- .map_queues_size = sizeof(struct pm4_mes_map_queues), +- .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), +- .query_status_size = sizeof(struct pm4_mes_query_status), +- .release_mem_size = sizeof(struct pm4_mec_release_mem) ++static struct packet_manager_funcs kfd_v9_pm_funcs = { ++ .map_process = pm_map_process_v9, ++ .runlist = pm_runlist_v9, ++ .set_resources = pm_set_resources_vi, ++ .map_queues = pm_map_queues_v9, ++ .unmap_queues = pm_unmap_queues_v9, ++ .query_status = pm_query_status_v9, ++ .release_mem = pm_release_mem_v9, ++ .get_map_process_packet_size = pm_get_map_process_packet_size_v9, ++ .get_runlist_packet_size = pm_get_runlist_packet_size_v9, ++ .get_set_resources_packet_size = pm_get_set_resources_packet_size_vi, ++ .get_map_queues_packet_size = pm_get_map_queues_packet_size_v9, ++ .get_unmap_queues_packet_size = pm_get_unmap_queues_packet_size_v9, ++ .get_query_status_packet_size = pm_get_query_status_packet_size_v9, ++ .get_release_mem_packet_size = pm_get_release_mem_packet_size_v9, + }; ++ ++void kfd_pm_func_init_v9(struct packet_manager *pm, uint16_t fw_ver) ++{ ++ pm->pmf = &kfd_v9_pm_funcs; ++} ++ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +index bf20c6d..9022ecb 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +@@ -67,25 +67,12 @@ static void submit_packet_vi(struct kernel_queue *kq) + kq->pending_wptr); + } + +-unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) +-{ +- union PM4_MES_TYPE_3_HEADER header; +- +- header.u32All = 0; +- header.opcode = opcode; +- header.count = packet_size / 4 - 2; +- header.type = PM4_TYPE_3; +- +- return header.u32All; +-} +- +-static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, +- struct qcm_process_device *qpd) ++static int pm_map_process_vi(struct packet_manager *pm, ++ uint32_t *buffer, struct qcm_process_device *qpd) + { + struct pm4_mes_map_process *packet; + + packet = (struct pm4_mes_map_process *)buffer; +- + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, +@@ -112,16 +99,27 @@ static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + +-static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, ++ ++unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) ++{ ++ union PM4_MES_TYPE_3_HEADER header; ++ ++ header.u32All = 0; ++ header.opcode = opcode; ++ header.count = packet_size / 4 - 2; ++ header.type = PM4_TYPE_3; ++ ++ return header.u32All; ++} ++ ++int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) + { + struct pm4_mes_runlist *packet; ++ + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + +- if (WARN_ON(!ib)) +- return -EFAULT; +- + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number +@@ -134,6 +132,7 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + ++ + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); +@@ -151,35 +150,7 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + +-int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, +- struct scheduling_resources *res) +-{ +- struct pm4_mes_set_resources *packet; +- +- packet = (struct pm4_mes_set_resources *)buffer; +- memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); +- +- packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, +- sizeof(struct pm4_mes_set_resources)); +- +- packet->bitfields2.queue_type = +- queue_type__mes_set_resources__hsa_interface_queue_hiq; +- packet->bitfields2.vmid_mask = res->vmid_mask; +- packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; +- packet->bitfields7.oac_mask = res->oac_mask; +- packet->bitfields8.gds_heap_base = res->gds_heap_base; +- packet->bitfields8.gds_heap_size = res->gds_heap_size; +- +- packet->gws_mask_lo = lower_32_bits(res->gws_mask); +- packet->gws_mask_hi = upper_32_bits(res->gws_mask); +- +- packet->queue_mask_lo = lower_32_bits(res->queue_mask); +- packet->queue_mask_hi = upper_32_bits(res->queue_mask); +- +- return 0; +-} +- +-static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, ++int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) + { + struct pm4_mes_map_queues *packet; +@@ -238,7 +209,35 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + +-static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, ++int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, ++ struct scheduling_resources *res) ++{ ++ struct pm4_mes_set_resources *packet; ++ ++ packet = (struct pm4_mes_set_resources *)buffer; ++ memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); ++ ++ packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, ++ sizeof(struct pm4_mes_set_resources)); ++ ++ packet->bitfields2.queue_type = ++ queue_type__mes_set_resources__hsa_interface_queue_hiq; ++ packet->bitfields2.vmid_mask = res->vmid_mask; ++ packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; ++ packet->bitfields7.oac_mask = res->oac_mask; ++ packet->bitfields8.gds_heap_base = res->gds_heap_base; ++ packet->bitfields8.gds_heap_size = res->gds_heap_size; ++ ++ packet->gws_mask_lo = lower_32_bits(res->gws_mask); ++ packet->gws_mask_hi = upper_32_bits(res->gws_mask); ++ ++ packet->queue_mask_lo = lower_32_bits(res->queue_mask); ++ packet->queue_mask_hi = upper_32_bits(res->queue_mask); ++ ++ return 0; ++} ++ ++int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, +@@ -303,7 +302,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + + } + +-static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, ++int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) + { + struct pm4_mes_query_status *packet; +@@ -311,6 +310,7 @@ static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + ++ + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + +@@ -328,15 +328,16 @@ static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + return 0; + } + +-static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) ++ ++uint32_t pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) + { + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; +- memset(buffer, 0, sizeof(*packet)); ++ memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, +- sizeof(*packet)); ++ sizeof(struct pm4_mec_release_mem)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; +@@ -354,22 +355,63 @@ static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) + + packet->data_lo = 0; + +- return 0; ++ return sizeof(struct pm4_mec_release_mem) / sizeof(unsigned int); ++} ++ ++uint32_t pm_get_map_process_packet_size_vi(void) ++{ ++ return sizeof(struct pm4_mes_map_process); ++} ++ ++uint32_t pm_get_runlist_packet_size_vi(void) ++{ ++ return sizeof(struct pm4_mes_runlist); ++} ++ ++uint32_t pm_get_set_resources_packet_size_vi(void) ++{ ++ return sizeof(struct pm4_mes_set_resources); ++} ++ ++uint32_t pm_get_map_queues_packet_size_vi(void) ++{ ++ return sizeof(struct pm4_mes_map_queues); ++} ++ ++uint32_t pm_get_unmap_queues_packet_size_vi(void) ++{ ++ return sizeof(struct pm4_mes_unmap_queues); ++} ++ ++uint32_t pm_get_query_status_packet_size_vi(void) ++{ ++ return sizeof(struct pm4_mes_query_status); + } + +-const struct packet_manager_funcs kfd_vi_pm_funcs = { +- .map_process = pm_map_process_vi, +- .runlist = pm_runlist_vi, +- .set_resources = pm_set_resources_vi, +- .map_queues = pm_map_queues_vi, +- .unmap_queues = pm_unmap_queues_vi, +- .query_status = pm_query_status_vi, +- .release_mem = pm_release_mem_vi, +- .map_process_size = sizeof(struct pm4_mes_map_process), +- .runlist_size = sizeof(struct pm4_mes_runlist), +- .set_resources_size = sizeof(struct pm4_mes_set_resources), +- .map_queues_size = sizeof(struct pm4_mes_map_queues), +- .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), +- .query_status_size = sizeof(struct pm4_mes_query_status), +- .release_mem_size = sizeof(struct pm4_mec_release_mem) ++uint32_t pm_get_release_mem_packet_size_vi(void) ++{ ++ return sizeof(struct pm4_mec_release_mem); ++} ++ ++ ++static struct packet_manager_funcs kfd_vi_pm_funcs = { ++ .map_process = pm_map_process_vi, ++ .runlist = pm_runlist_vi, ++ .set_resources = pm_set_resources_vi, ++ .map_queues = pm_map_queues_vi, ++ .unmap_queues = pm_unmap_queues_vi, ++ .query_status = pm_query_status_vi, ++ .release_mem = pm_release_mem_vi, ++ .get_map_process_packet_size = pm_get_map_process_packet_size_vi, ++ .get_runlist_packet_size = pm_get_runlist_packet_size_vi, ++ .get_set_resources_packet_size = pm_get_set_resources_packet_size_vi, ++ .get_map_queues_packet_size = pm_get_map_queues_packet_size_vi, ++ .get_unmap_queues_packet_size = pm_get_unmap_queues_packet_size_vi, ++ .get_query_status_packet_size = pm_get_query_status_packet_size_vi, ++ .get_release_mem_packet_size = pm_get_release_mem_packet_size_vi, + }; ++ ++void kfd_pm_func_init_vi(struct packet_manager *pm, uint16_t fw_ver) ++{ ++ pm->pmf = &kfd_vi_pm_funcs; ++} +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +index 261657f..34d44ff 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c +@@ -63,7 +63,7 @@ MODULE_PARM_DESC(hws_max_conc_proc, + + int cwsr_enable = 1; + module_param(cwsr_enable, int, 0444); +-MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = off, 1 = on (default))"); ++MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); + + int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; + module_param(max_num_of_queues_per_device, int, 0444); +@@ -75,6 +75,8 @@ module_param(send_sigterm, int, 0444); + MODULE_PARM_DESC(send_sigterm, + "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); + ++static int amdkfd_init_completed; ++ + int debug_largebar; + module_param(debug_largebar, int, 0444); + MODULE_PARM_DESC(debug_largebar, +@@ -85,23 +87,16 @@ module_param(ignore_crat, int, 0444); + MODULE_PARM_DESC(ignore_crat, + "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); + +-int noretry = 1; +-module_param(noretry, int, 0644); ++int vega10_noretry = 1; ++module_param_named(noretry, vega10_noretry, int, 0644); + MODULE_PARM_DESC(noretry, +- "Set sh_mem_config.retry_disable on GFXv9+ dGPUs (0 = retry enabled, 1 = retry disabled (default))"); ++ "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled, 1 = retry disabled (default))"); + + int priv_cp_queues; + module_param(priv_cp_queues, int, 0644); + MODULE_PARM_DESC(priv_cp_queues, + "Enable privileged mode for CP queues (0 = off (default), 1 = on)"); + +-int halt_if_hws_hang; +-module_param(halt_if_hws_hang, int, 0644); +-MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); +- +- +-static int amdkfd_init_completed; +- + int kgd2kfd_init(unsigned int interface_version, + const struct kgd2kfd_calls **g2f) + { +@@ -154,7 +149,7 @@ static int __init kfd_module_init(void) + + err = kfd_ipc_init(); + if (err < 0) +- goto err_ipc; ++ goto err_topology; + + err = kfd_process_create_wq(); + if (err < 0) +@@ -171,8 +166,6 @@ static int __init kfd_module_init(void) + return 0; + + err_create_wq: +-err_ipc: +- kfd_topology_shutdown(); + err_topology: + kfd_chardev_exit(); + err_ioctl: +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +index d39e81c..8279b74 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +@@ -81,7 +81,6 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, + case CHIP_POLARIS11: + return mqd_manager_init_vi_tonga(type, dev); + case CHIP_VEGA10: +- case CHIP_VEGA20: + case CHIP_RAVEN: + return mqd_manager_init_v9(type, dev); + default: +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +index 336ea9c..dcaeda8 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +@@ -94,8 +94,6 @@ struct mqd_manager { + u32 *ctl_stack_used_size, + u32 *save_area_used_size); + +- bool (*check_queue_active)(struct queue *q); +- + #if defined(CONFIG_DEBUG_FS) + int (*debugfs_show_mqd)(struct seq_file *m, void *data); + #endif +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +index 2441834..bd44a23 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +@@ -42,31 +42,6 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd) + return (struct cik_sdma_rlc_registers *)mqd; + } + +-static bool check_sdma_queue_active(struct queue *q) +-{ +- uint32_t rptr, wptr; +- struct cik_sdma_rlc_registers *m = get_sdma_mqd(q->mqd); +- +- rptr = m->sdma_rlc_rb_rptr; +- wptr = m->sdma_rlc_rb_wptr; +- pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); +- +- return (rptr != wptr); +-} +- +-static bool check_queue_active(struct queue *q) +-{ +- uint32_t rptr, wptr; +- struct cik_mqd *m = get_mqd(q->mqd); +- +- rptr = m->cp_hqd_pq_rptr; +- wptr = m->cp_hqd_pq_wptr; +- +- pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); +- +- return (rptr != wptr); +-} +- + static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) + { +@@ -516,7 +491,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +- mqd->check_queue_active = check_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; + #endif +@@ -528,7 +502,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +- mqd->check_queue_active = check_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; + #endif +@@ -540,7 +513,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +- mqd->check_queue_active = check_sdma_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; + #endif +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +index dcd24c4..f4e8efc 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016-2018 Advanced Micro Devices, Inc. ++ * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -41,49 +41,6 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) + return (struct v9_sdma_mqd *)mqd; + } + +-static bool check_sdma_queue_active(struct queue *q) +-{ +- uint32_t rptr, wptr; +- uint32_t rptr_hi, wptr_hi; +- struct v9_sdma_mqd *m = get_sdma_mqd(q->mqd); +- +- rptr = m->sdmax_rlcx_rb_rptr; +- wptr = m->sdmax_rlcx_rb_wptr; +- rptr_hi = m->sdmax_rlcx_rb_rptr_hi; +- wptr_hi = m->sdmax_rlcx_rb_wptr_hi; +- pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); +- pr_debug("rptr_hi=%d, wptr_hi=%d\n", rptr_hi, wptr_hi); +- +- return (rptr != wptr || rptr_hi != wptr_hi); +-} +- +-static bool check_queue_active(struct queue *q) +-{ +- uint32_t rptr, wptr; +- uint32_t cntl_stack_offset, cntl_stack_size; +- struct v9_mqd *m = get_mqd(q->mqd); +- +- rptr = m->cp_hqd_pq_rptr; +- wptr = m->cp_hqd_pq_wptr_lo % q->properties.queue_size; +- cntl_stack_offset = m->cp_hqd_cntl_stack_offset; +- cntl_stack_size = m->cp_hqd_cntl_stack_size; +- +- pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); +- pr_debug("m->cp_hqd_cntl_stack_offset=0x%08x\n", cntl_stack_offset); +- pr_debug("m->cp_hqd_cntl_stack_size=0x%08x\n", cntl_stack_size); +- +- if ((rptr == 0 && wptr == 0) || +- cntl_stack_offset == 0xffffffff || +- cntl_stack_size > 0x5000) +- return false; +- +- /* Process is idle if both conditions are meet: +- * queue's rptr equals to wptr +- * control stack is empty, cntl_stack_offset = cntl_stack_size +- */ +- return (rptr != wptr || cntl_stack_offset != cntl_stack_size); +-} +- + static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) + { +@@ -158,7 +115,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), + &((*mqd_mem_obj)->gtt_mem), + &((*mqd_mem_obj)->gpu_addr), +- (void *)&((*mqd_mem_obj)->cpu_ptr), true); ++ (void *)&((*mqd_mem_obj)->cpu_ptr)); + } else + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), + mqd_mem_obj); +@@ -202,7 +159,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + +- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { ++ if (mm->dev->cwsr_enabled) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = +@@ -260,9 +217,8 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + +- m->cp_hqd_ib_control = +- 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | +- 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; ++ m->cp_hqd_ib_control = 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | ++ 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; + + /* + * HW does not clamp this field correctly. Maximum EOP queue size +@@ -287,13 +243,13 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | + 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | + 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; +- m->cp_hqd_pq_doorbell_control |= 1 << +- CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; ++ m->cp_hqd_pq_doorbell_control |= ++ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + } + if (priv_cp_queues) + m->cp_hqd_pq_control |= + 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT; +- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) ++ if (mm->dev->cwsr_enabled) + m->cp_hqd_ctx_save_control = 0; + + update_cu_mask(mm, mqd, q); +@@ -532,7 +488,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; +- mqd->check_queue_active = check_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; + #endif +@@ -544,7 +499,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +- mqd->check_queue_active = check_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; + #endif +@@ -556,7 +510,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +- mqd->check_queue_active = check_sdma_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; + #endif +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +index 246fe6c..eff7580 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +@@ -44,45 +44,6 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd) + return (struct vi_sdma_mqd *)mqd; + } + +-static bool check_sdma_queue_active(struct queue *q) +-{ +- uint32_t rptr, wptr; +- struct vi_sdma_mqd *m = get_sdma_mqd(q->mqd); +- +- rptr = m->sdmax_rlcx_rb_rptr; +- wptr = m->sdmax_rlcx_rb_wptr; +- pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); +- +- return (rptr != wptr); +-} +- +-static bool check_queue_active(struct queue *q) +-{ +- uint32_t rptr, wptr; +- uint32_t cntl_stack_offset, cntl_stack_size; +- struct vi_mqd *m = get_mqd(q->mqd); +- +- rptr = m->cp_hqd_pq_rptr; +- wptr = m->cp_hqd_pq_wptr; +- cntl_stack_offset = m->cp_hqd_cntl_stack_offset; +- cntl_stack_size = m->cp_hqd_cntl_stack_size; +- +- pr_debug("rptr=%d, wptr=%d\n", rptr, wptr); +- pr_debug("m->cp_hqd_cntl_stack_offset=0x%08x\n", cntl_stack_offset); +- pr_debug("m->cp_hqd_cntl_stack_size=0x%08x\n", cntl_stack_size); +- +- if ((rptr == 0 && wptr == 0) || +- cntl_stack_offset == 0xffffffff || +- cntl_stack_size > 0x5000) +- return false; +- +- /* Process is idle if both conditions are meet: +- * queue's rptr equals to wptr +- * control stack is empty, cntl_stack_offset = cntl_stack_size +- */ +- return (rptr != wptr || cntl_stack_offset != cntl_stack_size); +-} +- + static void update_cu_mask(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) + { +@@ -198,7 +159,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + +- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { ++ if (mm->dev->cwsr_enabled) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = +@@ -293,7 +254,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, + if (priv_cp_queues) + m->cp_hqd_pq_control |= + 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT; +- if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) ++ if (mm->dev->cwsr_enabled) + m->cp_hqd_ctx_save_control = + atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT | + mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT; +@@ -537,7 +498,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; +- mqd->check_queue_active = check_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; + #endif +@@ -549,7 +509,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +- mqd->check_queue_active = check_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; + #endif +@@ -561,7 +520,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +- mqd->check_queue_active = check_sdma_queue_active; + #if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; + #endif +@@ -586,3 +544,4 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, + mqd->update_mqd = update_mqd_tonga; + return mqd; + } ++ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +index c6080ed3..98c89d2 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +@@ -26,6 +26,7 @@ + #include "kfd_device_queue_manager.h" + #include "kfd_kernel_queue.h" + #include "kfd_priv.h" ++#include "kfd_pm4_opcodes.h" + + static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, + unsigned int buffer_size_bytes) +@@ -44,7 +45,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + unsigned int process_count, queue_count, compute_queue_count; + unsigned int map_queue_size; + unsigned int max_proc_per_quantum = 1; +- struct kfd_dev *dev = pm->dqm->dev; ++ ++ struct kfd_dev *dev = pm->dqm->dev; + + process_count = pm->dqm->processes_count; + queue_count = pm->dqm->queue_count; +@@ -55,20 +57,21 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ ++ + *over_subscription = false; + + if (dev->max_proc_per_quantum > 1) + max_proc_per_quantum = dev->max_proc_per_quantum; + + if ((process_count > max_proc_per_quantum) || +- compute_queue_count > get_queues_num(pm->dqm)) { ++ compute_queue_count > get_queues_num(pm->dqm)) { + *over_subscription = true; + pr_debug("Over subscribed runlist\n"); + } + +- map_queue_size = pm->pmf->map_queues_size; ++ map_queue_size = pm->pmf->get_map_queues_packet_size(); + /* calculate run list ib allocation size */ +- *rlib_size = process_count * pm->pmf->map_process_size + ++ *rlib_size = process_count * pm->pmf->get_map_process_packet_size() + + queue_count * map_queue_size; + + /* +@@ -76,7 +79,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, + * when over subscription + */ + if (*over_subscription) +- *rlib_size += pm->pmf->runlist_size; ++ *rlib_size += pm->pmf->get_runlist_packet_size(); + + pr_debug("runlist ib size %d\n", *rlib_size); + } +@@ -157,7 +160,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + return retval; + + proccesses_mapped++; +- inc_wptr(&rl_wptr, pm->pmf->map_process_size, ++ inc_wptr(&rl_wptr, pm->pmf->get_map_process_packet_size(), + alloc_size_bytes); + + list_for_each_entry(kq, &qpd->priv_queue_list, list) { +@@ -175,7 +178,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + return retval; + + inc_wptr(&rl_wptr, +- pm->pmf->map_queues_size, ++ pm->pmf->get_map_queues_packet_size(), + alloc_size_bytes); + } + +@@ -190,12 +193,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + &rl_buffer[rl_wptr], + q, + qpd->is_debug); +- + if (retval) + return retval; + + inc_wptr(&rl_wptr, +- pm->pmf->map_queues_size, ++ pm->pmf->get_map_queues_packet_size(), + alloc_size_bytes); + } + } +@@ -215,38 +217,37 @@ static int pm_create_runlist_ib(struct packet_manager *pm, + return retval; + } + +-int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) ++int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, ++ uint16_t fw_ver) + { +- switch (dqm->dev->device_info->asic_family) { ++ pm->dqm = dqm; ++ mutex_init(&pm->lock); ++ pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); ++ if (!pm->priv_queue) { ++ mutex_destroy(&pm->lock); ++ return -ENOMEM; ++ } ++ pm->allocated = false; ++ ++ switch (pm->dqm->dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: +- /* PM4 packet structures on CIK are the same as on VI */ ++ kfd_pm_func_init_cik(pm, fw_ver); ++ break; + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: +- pm->pmf = &kfd_vi_pm_funcs; ++ kfd_pm_func_init_vi(pm, fw_ver); + break; + case CHIP_VEGA10: +- case CHIP_VEGA20: + case CHIP_RAVEN: +- pm->pmf = &kfd_v9_pm_funcs; ++ kfd_pm_func_init_v9(pm, fw_ver); + break; + default: +- WARN(1, "Unexpected ASIC family %u", +- dqm->dev->device_info->asic_family); +- return -EINVAL; +- } +- +- pm->dqm = dqm; +- mutex_init(&pm->lock); +- pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); +- if (!pm->priv_queue) { +- mutex_destroy(&pm->lock); +- return -ENOMEM; ++ BUG(); + } +- pm->allocated = false; + + return 0; + } +@@ -263,7 +264,7 @@ int pm_send_set_resources(struct packet_manager *pm, + uint32_t *buffer, size; + int retval = 0; + +- size = pm->pmf->set_resources_size; ++ size = pm->pmf->get_set_resources_packet_size(); + mutex_lock(&pm->lock); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), +@@ -300,7 +301,8 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + + pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); + +- packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); ++ packet_size_dwords = pm->pmf->get_runlist_packet_size() / ++ sizeof(uint32_t); + mutex_lock(&pm->lock); + + retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, +@@ -309,7 +311,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) + goto fail_acquire_packet_buffer; + + retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, +- rl_ib_size / sizeof(uint32_t), false); ++ rl_ib_size / sizeof(uint32_t), false); + if (retval) + goto fail_create_runlist; + +@@ -337,7 +339,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, + if (WARN_ON(!fence_address)) + return -EFAULT; + +- size = pm->pmf->query_status_size; ++ size = pm->pmf->get_query_status_packet_size(); + mutex_lock(&pm->lock); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); +@@ -366,7 +368,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + uint32_t *buffer, size; + int retval = 0; + +- size = pm->pmf->unmap_queues_size; ++ size = pm->pmf->get_unmap_queues_packet_size(); + mutex_lock(&pm->lock); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); +@@ -398,51 +400,17 @@ void pm_release_ib(struct packet_manager *pm) + mutex_unlock(&pm->lock); + } + +-#if defined(CONFIG_DEBUG_FS) +- + int pm_debugfs_runlist(struct seq_file *m, void *data) + { + struct packet_manager *pm = data; + +- mutex_lock(&pm->lock); +- + if (!pm->allocated) { + seq_puts(m, " No active runlist\n"); +- goto out; ++ return 0; + } + + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false); + +-out: +- mutex_unlock(&pm->lock); + return 0; + } +- +-int pm_debugfs_hang_hws(struct packet_manager *pm) +-{ +- uint32_t *buffer, size; +- int r = 0; +- +- size = pm->pmf->query_status_size; +- mutex_lock(&pm->lock); +- pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, +- size / sizeof(uint32_t), (unsigned int **)&buffer); +- if (!buffer) { +- pr_err("Failed to allocate buffer on kernel queue\n"); +- r = -ENOMEM; +- goto out; +- } +- memset(buffer, 0x55, size); +- pm->priv_queue->ops.submit_packet(pm->priv_queue); +- +- pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", +- buffer[0], buffer[1], buffer[2], buffer[3], +- buffer[4], buffer[5], buffer[6]); +-out: +- mutex_unlock(&pm->lock); +- return r; +-} +- +- +-#endif +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c +index 87344cc..fae8e8c 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_peerdirect.c +@@ -49,9 +49,9 @@ + #include <linux/slab.h> + #include <linux/scatterlist.h> + #include <linux/module.h> +-#include <drm/amd_rdma.h> + + #include "kfd_priv.h" ++#include "amd_rdma.h" + + + +@@ -137,6 +137,7 @@ static void (*pfn_ib_unregister_peer_memory_client)(void *reg_handle); + + static const struct amd_rdma_interface *rdma_interface; + ++static invalidate_peer_memory ib_invalidate_callback; + static void *ib_reg_handle; + + struct amd_mem_context { +@@ -168,6 +169,9 @@ static void free_callback(void *client_priv) + + pr_debug("mem_context->core_context 0x%p\n", mem_context->core_context); + ++ /* Call back IB stack asking to invalidate memory */ ++ (*ib_invalidate_callback) (ib_reg_handle, mem_context->core_context); ++ + /* amdkfd will free resources when we return from this callback. + * Set flag to inform that there is nothing to do on "put_pages", etc. + */ +@@ -474,7 +478,7 @@ void kfd_init_peer_direct(void) + strcpy(amd_mem_client.version, AMD_PEER_BRIDGE_DRIVER_VERSION); + + ib_reg_handle = pfn_ib_register_peer_memory_client(&amd_mem_client, +- NULL); ++ &ib_invalidate_callback); + + if (!ib_reg_handle) { + pr_err("Cannot register peer memory client\n"); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 7869a9d..b2ef0f5 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -30,15 +30,16 @@ + #include <linux/atomic.h> + #include <linux/workqueue.h> + #include <linux/spinlock.h> +-#include <linux/kfd_ioctl.h> + #include <linux/idr.h> ++#include <linux/kfd_ioctl.h> ++#include <linux/pid.h> ++#include <linux/interval_tree.h> + #include <linux/seq_file.h> + #include <linux/kref.h> + #include <linux/kfifo.h> +-#include <linux/pid.h> +-#include <linux/interval_tree.h> + #include <kgd_kfd_interface.h> + ++#include "amd_rdma.h" + #include "amd_shared.h" + + #define KFD_SYSFS_FILE_MODE 0444 +@@ -49,7 +50,8 @@ + /* Use upper bits of mmap offset to store KFD driver specific information. + * BITS[63:62] - Encode MMAP type + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to +- * BITS[45:0] - MMAP offset value ++ * BITS[45:40] - Reserved. Not Used. ++ * BITS[39:0] - MMAP offset value. Used by TTM. + * + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these + * defines are w.r.t to PAGE_SIZE +@@ -68,7 +70,7 @@ + #define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ + >> KFD_MMAP_GPU_ID_SHIFT) + +-#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) ++#define KFD_MMAP_OFFSET_VALUE_MASK (0xFFFFFFFFFFULL >> PAGE_SHIFT) + #define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) + + /* +@@ -81,6 +83,7 @@ + #define KFD_CIK_HIQ_PIPE 4 + #define KFD_CIK_HIQ_QUEUE 0 + ++ + /* Macro for allocating structures */ + #define kfd_alloc_struct(ptr_to_struct) \ + ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) +@@ -113,14 +116,14 @@ extern int max_num_of_queues_per_device; + /* Kernel module parameter to specify the scheduling policy */ + extern int sched_policy; + ++extern int cwsr_enable; ++ + /* + * Kernel module parameter to specify the maximum process + * number per HW scheduler + */ + extern int hws_max_conc_proc; + +-extern int cwsr_enable; +- + /* + * Kernel module parameter to specify whether to send sigterm to HSA process on + * unhandled exception +@@ -142,18 +145,13 @@ extern int ignore_crat; + /* + * Set sh_mem_config.retry_disable on Vega10 + */ +-extern int noretry; ++extern int vega10_noretry; + + /* + * Enable privileged mode for all CP queues including user queues + */ + extern int priv_cp_queues; + +-/* +- * Halt if HWS hang is detected +- */ +-extern int halt_if_hws_hang; +- + /** + * enum kfd_sched_policy + * +@@ -210,7 +208,6 @@ struct kfd_device_info { + bool needs_pci_atomics; + /* obtain from adev->sdma.num_instances */ + unsigned int num_sdma_engines; +- unsigned int num_sdma_queues_per_engine; + }; + + struct kfd_mem_obj { +@@ -294,8 +291,6 @@ struct kfd_dev { + bool cwsr_enabled; + const void *cwsr_isa; + unsigned int cwsr_isa_size; +- +- bool pci_atomic_requested; + }; + + struct kfd_ipc_obj; +@@ -306,41 +301,6 @@ struct kfd_bo { + struct kfd_dev *dev; + struct list_head cb_data_head; + struct kfd_ipc_obj *kfd_ipc_obj; +- /* page-aligned VA address */ +- uint64_t cpuva; +- unsigned int mem_type; +-}; +- +-struct cma_system_bo { +- struct kgd_mem *mem; +- struct sg_table *sg; +- struct kfd_dev *dev; +- struct list_head list; +-}; +- +-/* Similar to iov_iter */ +-struct cma_iter { +- /* points to current entry of range array */ +- struct kfd_memory_range *array; +- /* total number of entries in the initial array */ +- unsigned long nr_segs; +- /* total amount of data pointed by kfd array*/ +- unsigned long total; +- /* offset into the entry pointed by cma_iter.array */ +- unsigned long offset; +- struct kfd_process *p; +- struct mm_struct *mm; +- struct task_struct *task; +- /* current kfd_bo associated with cma_iter.array.va_addr */ +- struct kfd_bo *cur_bo; +- /* offset w.r.t cur_bo */ +- unsigned long bo_offset; +- /* If cur_bo is a userptr BO, then a shadow system BO is created +- * using its underlying pages. cma_bo holds this BO. cma_list is a +- * list cma_bos created in one session +- */ +- struct cma_system_bo *cma_bo; +- struct list_head cma_list; + }; + + /* KGD2KFD callbacks */ +@@ -444,11 +404,7 @@ enum KFD_QUEUE_PRIORITY { + * @is_interop: Defines if this is a interop queue. Interop queue means that + * the queue can access both graphics and compute resources. + * +- * @is_evicted: Defines if the queue is evicted. Only active queues +- * are evicted, rendering them inactive. +- * +- * @is_active: Defines if the queue is active or not. @is_active and +- * @is_evicted are protected by the DQM lock. ++ * @is_active: Defines if the queue is active or not. + * + * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid + * of the queue. +@@ -470,7 +426,7 @@ struct queue_properties { + void __iomem *doorbell_ptr; + uint32_t doorbell_off; + bool is_interop; +- bool is_evicted; ++ bool is_evicted; /* true -> queue is evicted */ + bool is_active; + /* Not relevant for user mode queues in cp scheduling */ + unsigned int vmid; +@@ -589,6 +545,7 @@ struct qcm_process_device { + struct list_head priv_queue_list; + + unsigned int queue_count; ++ /* a data field only meaningful for non-HWS case */ + unsigned int vmid; + bool is_debug; + unsigned int evicted; /* eviction counter, 0=active */ +@@ -602,11 +559,11 @@ struct qcm_process_device { + * All the memory management data should be here too + */ + uint64_t gds_context_area; +- uint64_t page_table_base; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + uint32_t sh_mem_ape1_base; + uint32_t sh_mem_ape1_limit; ++ uint32_t page_table_base; + uint32_t gds_size; + uint32_t num_gws; + uint32_t num_oac; +@@ -619,11 +576,11 @@ struct qcm_process_device { + uint64_t tma_addr; + + /* IB memory */ +- uint64_t ib_base; ++ uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */ + void *ib_kaddr; + + /*doorbell resources per process per device*/ +- unsigned long *doorbell_bitmap; ++ unsigned long *doorbell_bitmap; + }; + + /* KFD Memory Eviction */ +@@ -635,10 +592,11 @@ struct qcm_process_device { + /* Approx. time before evicting the process again */ + #define PROCESS_ACTIVE_TIME_MS 10 + +-int kgd2kfd_quiesce_mm(struct mm_struct *mm); +-int kgd2kfd_resume_mm(struct mm_struct *mm); + int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence); ++int kfd_process_evict_queues(struct kfd_process *p); ++int kfd_process_restore_queues(struct kfd_process *p); ++ + + /* 8 byte handle containing GPU ID in the most significant 4 bytes and + * idr_handle in the least significant 4 bytes +@@ -754,14 +712,13 @@ struct kfd_process { + struct idr event_idr; + /* Event page */ + struct kfd_signal_page *signal_page; +- size_t signal_mapped_size; + size_t signal_event_count; + bool signal_event_limit_reached; + + struct rb_root_cached bo_interval_tree; + + /* Information used for memory eviction */ +- void *kgd_process_info; ++ void *process_info; + /* Eviction fence that is attached to all the BOs of this process. The + * fence will be triggered during eviction and new one will be created + * during restore +@@ -804,32 +761,29 @@ struct amdkfd_ioctl_desc { + int kfd_process_create_wq(void); + void kfd_process_destroy_wq(void); + struct kfd_process *kfd_create_process(struct file *filep); +-struct kfd_process *kfd_get_process(const struct task_struct *); ++struct kfd_process *kfd_get_process(const struct task_struct *task); + struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); + struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); + void kfd_unref_process(struct kfd_process *p); +-int kfd_process_evict_queues(struct kfd_process *p); +-int kfd_process_restore_queues(struct kfd_process *p); + void kfd_suspend_all_processes(void); + int kfd_resume_all_processes(void); + + int kfd_process_device_init_vm(struct kfd_process_device *pdd, + struct file *drm_file); + struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, +- struct kfd_process *p); ++ struct kfd_process *p); + struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, + struct kfd_process *p); + struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + struct kfd_process *p); + +-int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, +- struct vm_area_struct *vma); ++int kfd_reserved_mem_mmap(struct kfd_process *process, ++ struct vm_area_struct *vma); + + /* KFD process API for creating and translating handles */ + int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, + void *mem, uint64_t start, +- uint64_t length, uint64_t cpuva, +- unsigned int mem_type, ++ uint64_t length, + struct kfd_ipc_obj *ipc_obj); + void *kfd_process_device_translate_handle(struct kfd_process_device *p, + int handle); +@@ -864,7 +818,7 @@ void kfd_pasid_free(unsigned int pasid); + size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); + int kfd_doorbell_init(struct kfd_dev *kfd); + void kfd_doorbell_fini(struct kfd_dev *kfd); +-int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, ++int kfd_doorbell_mmap(struct kfd_dev *kfd, struct kfd_process *process, + struct vm_area_struct *vma); + void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + unsigned int *doorbell_off); +@@ -921,6 +875,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd); + + /* amdkfd Apertures */ + int kfd_init_apertures(struct kfd_process *process); ++int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, ++ uint64_t base, uint64_t limit); + + /* Queue Context Management */ + int init_queue(struct queue **q, const struct queue_properties *properties); +@@ -975,6 +931,8 @@ int pqm_get_wave_state(struct process_queue_manager *pqm, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); ++int kgd2kfd_quiesce_mm(struct kfd_dev *kfd, struct mm_struct *mm); ++int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm); + + int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + unsigned int fence_value, +@@ -985,6 +943,8 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + #define KFD_FENCE_COMPLETED (100) + #define KFD_FENCE_INIT (10) + ++struct packet_manager_func; ++ + struct packet_manager { + struct device_queue_manager *dqm; + struct kernel_queue *priv_queue; +@@ -993,11 +953,11 @@ struct packet_manager { + struct kfd_mem_obj *ib_buffer_obj; + unsigned int ib_size_bytes; + +- const struct packet_manager_funcs *pmf; ++ struct packet_manager_funcs *pmf; + }; + + struct packet_manager_funcs { +- /* Support ASIC-specific packet formats for PM4 packets */ ++ /* Support different firmware versions for PM4 packets */ + int (*map_process)(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd); + int (*runlist)(struct packet_manager *pm, uint32_t *buffer, +@@ -1013,22 +973,20 @@ struct packet_manager_funcs { + unsigned int sdma_engine); + int (*query_status)(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value); +- int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); +- +- /* Packet sizes */ +- int map_process_size; +- int runlist_size; +- int set_resources_size; +- int map_queues_size; +- int unmap_queues_size; +- int query_status_size; +- int release_mem_size; +-}; ++ uint32_t (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); + +-extern const struct packet_manager_funcs kfd_vi_pm_funcs; +-extern const struct packet_manager_funcs kfd_v9_pm_funcs; ++ uint32_t (*get_map_process_packet_size)(void); ++ uint32_t (*get_runlist_packet_size)(void); ++ uint32_t (*get_set_resources_packet_size)(void); ++ uint32_t (*get_map_queues_packet_size)(void); ++ uint32_t (*get_unmap_queues_packet_size)(void); ++ uint32_t (*get_query_status_packet_size)(void); ++ uint32_t (*get_release_mem_packet_size)(void); + +-int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); ++}; ++ ++int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm, ++ uint16_t fw_ver); + void pm_uninit(struct packet_manager *pm); + int pm_send_set_resources(struct packet_manager *pm, + struct scheduling_resources *res); +@@ -1043,10 +1001,37 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + + void pm_release_ib(struct packet_manager *pm); + +-/* Following PM funcs can be shared among VI and AI */ ++/* Following PM funcs can be shared among CIK and VI */ + unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); ++int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, ++ uint64_t ib, size_t ib_size_in_dwords, bool chain); ++int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, ++ struct queue *q, bool is_static); + int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); ++int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, ++ enum kfd_queue_type type, ++ enum kfd_unmap_queues_filter filter, ++ uint32_t filter_param, bool reset, ++ unsigned int sdma_engine); ++int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, ++ uint64_t fence_address, uint32_t fence_value); ++uint32_t pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer); ++ ++uint32_t pm_get_map_process_packet_size_vi(void); ++uint32_t pm_get_runlist_packet_size_vi(void); ++uint32_t pm_get_set_resources_packet_size_vi(void); ++uint32_t pm_get_map_queues_packet_size_vi(void); ++uint32_t pm_get_unmap_queues_packet_size_vi(void); ++uint32_t pm_get_query_status_packet_size_vi(void); ++uint32_t pm_get_release_mem_packet_size_vi(void); ++ ++ ++void kfd_pm_func_init_vi(struct packet_manager *pm, uint16_t fw_ver); ++void kfd_pm_func_init_cik(struct packet_manager *pm, uint16_t fw_ver); ++ ++void kfd_pm_func_init_v9(struct packet_manager *pm, uint16_t fw_ver); ++ + + uint64_t kfd_get_number_elems(struct kfd_dev *kfd); + +@@ -1071,24 +1056,21 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, + void kfd_signal_hw_exception_event(unsigned int pasid); + int kfd_set_event(struct kfd_process *p, uint32_t event_id); + int kfd_reset_event(struct kfd_process *p, uint32_t event_id); +-int kfd_event_page_set(struct kfd_process *p, void *kernel_address, +- uint64_t size); + int kfd_event_create(struct file *devkfd, struct kfd_process *p, + uint32_t event_type, bool auto_reset, uint32_t node_id, + uint32_t *event_id, uint32_t *event_trigger_data, +- uint64_t *event_page_offset, uint32_t *event_slot_index); ++ uint64_t *event_page_offset, uint32_t *event_slot_index, ++ void *kern_addr); + int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); + + void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, + struct kfd_vm_fault_info *info); + +-void kfd_signal_reset_event(struct kfd_dev *dev); +- + void kfd_flush_tlb(struct kfd_process_device *pdd); + + int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); + +-bool kfd_is_locked(void); ++#define KFD_SCRATCH_KV_FW_VER 413 + + /* PeerDirect support */ + void kfd_init_peer_direct(void); +@@ -1109,10 +1091,6 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data); + int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); + int pm_debugfs_runlist(struct seq_file *m, void *data); + +-int kfd_debugfs_hang_hws(struct kfd_dev *dev); +-int pm_debugfs_hang_hws(struct packet_manager *pm); +-int dqm_debugfs_execute_queues(struct device_queue_manager *dqm); +- + #else + + static inline void kfd_debugfs_init(void) {} +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index da67302..c627b63 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -30,7 +30,6 @@ + #include <linux/notifier.h> + #include <linux/compat.h> + #include <linux/mman.h> +-#include <linux/file.h> + #include <asm/page.h> + #include "kfd_ipc.h" + +@@ -61,6 +60,9 @@ static struct workqueue_struct *kfd_process_wq; + */ + static struct workqueue_struct *kfd_restore_wq; + ++#define MIN_IDR_ID 1 ++#define MAX_IDR_ID 0 /*0 - for unlimited*/ ++ + static struct kfd_process *find_process(const struct task_struct *thread, + bool ref); + static void kfd_process_ref_release(struct kref *ref); +@@ -78,12 +80,7 @@ int kfd_process_create_wq(void) + if (!kfd_restore_wq) + kfd_restore_wq = alloc_ordered_workqueue("kfd_restore_wq", 0); + +- if (!kfd_process_wq || !kfd_restore_wq) { +- kfd_process_destroy_wq(); +- return -ENOMEM; +- } +- +- return 0; ++ return kfd_process_wq && kfd_restore_wq ? 0 : -ENOMEM; + } + + void kfd_process_destroy_wq(void) +@@ -121,11 +118,9 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, + struct kgd_mem *mem = NULL; + int handle; + int err; +- unsigned int mem_type; + + err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, +- pdd->vm, NULL, &mem, NULL, +- flags); ++ pdd->vm, &mem, NULL, flags); + if (err) + goto err_alloc_mem; + +@@ -139,18 +134,13 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, + goto sync_memory_failed; + } + +- mem_type = flags & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | +- KFD_IOC_ALLOC_MEM_FLAGS_GTT | +- KFD_IOC_ALLOC_MEM_FLAGS_USERPTR | +- KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL); +- + /* Create an obj handle so kfd_process_device_remove_obj_handle + * will take care of the bo removal when the process finishes. + * We do not need to take p->mutex, because the process is just + * created and the ioctls have not had the chance to run. + */ + handle = kfd_process_device_create_obj_handle( +- pdd, mem, gpu_va, size, 0, mem_type, NULL); ++ pdd, mem, gpu_va, size, NULL); + + if (handle < 0) { + err = handle; +@@ -185,16 +175,14 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, + /* kfd_process_device_reserve_ib_mem - Reserve memory inside the + * process for IB usage The memory reserved is for KFD to submit + * IB to AMDGPU from kernel. If the memory is reserved +- * successfully, ib_kaddr will have the CPU/kernel +- * address. Check ib_kaddr before accessing the memory. ++ * successfully, ib_kaddr_assigned will have the CPU/kernel ++ * address. Check ib_kaddr_assigned before accessing the memory. + */ + static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) + { + struct qcm_process_device *qpd = &pdd->qpd; +- uint32_t flags = ALLOC_MEM_FLAGS_GTT | +- ALLOC_MEM_FLAGS_NO_SUBSTITUTE | +- ALLOC_MEM_FLAGS_WRITABLE | +- ALLOC_MEM_FLAGS_EXECUTABLE; ++ uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED | ++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTE_ACCESS; + void *kaddr; + int ret; + +@@ -215,6 +203,7 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd) + struct kfd_process *kfd_create_process(struct file *filep) + { + struct kfd_process *process; ++ + struct task_struct *thread = current; + + if (!thread->mm) +@@ -255,8 +244,6 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread) + return ERR_PTR(-EINVAL); + + process = find_process(thread, false); +- if (!process) +- return ERR_PTR(-EINVAL); + + return process; + } +@@ -352,9 +339,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) + + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { +- pr_debug("Releasing pdd (topology id %d) for process (pasid %d)\n", +- pdd->dev->id, p->pasid); +- ++ /* Destroy the GPUVM VM context */ + if (pdd->drm_file) + fput(pdd->drm_file); + else if (pdd->vm) +@@ -407,6 +392,9 @@ static void kfd_process_ref_release(struct kref *ref) + { + struct kfd_process *p = container_of(ref, struct kfd_process, ref); + ++ if (WARN_ON(!kfd_process_wq)) ++ return; ++ + INIT_WORK(&p->release_work, kfd_process_wq_release); + queue_work(kfd_process_wq, &p->release_work); + } +@@ -487,19 +475,17 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) + if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) + continue; + +- offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) +- << PAGE_SHIFT; +- qpd->tba_addr = (int64_t)vm_mmap(filep, 0, +- KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, +- MAP_SHARED, offset); ++ offset = (dev->id | KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT; ++ qpd->tba_addr = (uint64_t)vm_mmap(filep, 0, ++ KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, ++ MAP_SHARED, offset); + + if (IS_ERR_VALUE(qpd->tba_addr)) { +- int err = qpd->tba_addr; +- +- pr_err("Failure to set tba address. error %d.\n", err); ++ pr_err("Failure to set tba address. error -%d.\n", ++ (int)qpd->tba_addr); + qpd->tba_addr = 0; + qpd->cwsr_kaddr = NULL; +- return err; ++ return -ENOMEM; + } + + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); +@@ -516,8 +502,9 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) + { + struct kfd_dev *dev = pdd->dev; + struct qcm_process_device *qpd = &pdd->qpd; +- uint32_t flags = ALLOC_MEM_FLAGS_GTT | +- ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_EXECUTABLE; ++ uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED | ++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE | ALLOC_MEM_FLAGS_READONLY | ++ ALLOC_MEM_FLAGS_EXECUTE_ACCESS; + void *kaddr; + int ret; + +@@ -675,12 +662,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + if (!pdd) + return NULL; + +- if (init_doorbell_bitmap(&pdd->qpd, dev)) { +- pr_err("Failed to init doorbell for process\n"); +- kfree(pdd); +- return NULL; +- } +- + pdd->dev = dev; + INIT_LIST_HEAD(&pdd->qpd.queues_list); + INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); +@@ -694,8 +675,19 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, + + /* Init idr used for memory handle translation */ + idr_init(&pdd->alloc_idr); ++ if (init_doorbell_bitmap(&pdd->qpd, dev)) { ++ pr_err("Failed to init doorbell for process\n"); ++ goto err_create_pdd; ++ } + + return pdd; ++ ++err_create_pdd: ++ kfree(pdd->qpd.doorbell_bitmap); ++ idr_destroy(&pdd->alloc_idr); ++ list_del(&pdd->per_device_list); ++ kfree(pdd); ++ return NULL; + } + + /** +@@ -720,18 +712,17 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, + int ret; + + if (pdd->vm) +- return drm_file ? -EBUSY : 0; ++ return 0; + + p = pdd->process; + dev = pdd->dev; + + if (drm_file) + ret = dev->kfd2kgd->acquire_process_vm( +- dev->kgd, drm_file, p->pasid, +- &pdd->vm, &p->kgd_process_info, &p->ef); ++ dev->kgd, drm_file, &pdd->vm, &p->process_info, &p->ef); + else + ret = dev->kfd2kgd->create_process_vm( +- dev->kgd, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef); ++ dev->kgd, &pdd->vm, &p->process_info, &p->ef); + if (ret) { + pr_err("Failed to create process VM object\n"); + return ret; +@@ -815,8 +806,7 @@ bool kfd_has_process_device_data(struct kfd_process *p) + */ + int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, + void *mem, uint64_t start, +- uint64_t length, uint64_t cpuva, +- unsigned int mem_type, ++ uint64_t length, + struct kfd_ipc_obj *ipc_obj) + { + int handle; +@@ -837,12 +827,15 @@ int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, + buf_obj->mem = mem; + buf_obj->dev = pdd->dev; + buf_obj->kfd_ipc_obj = ipc_obj; +- buf_obj->cpuva = cpuva; +- buf_obj->mem_type = mem_type; + + INIT_LIST_HEAD(&buf_obj->cb_data_head); + +- handle = idr_alloc(&pdd->alloc_idr, buf_obj, 0, 0, GFP_KERNEL); ++ idr_preload(GFP_KERNEL); ++ ++ handle = idr_alloc(&pdd->alloc_idr, buf_obj, MIN_IDR_ID, MAX_IDR_ID, ++ GFP_NOWAIT); ++ ++ idr_preload_end(); + + if (handle < 0) + kfree(buf_obj); +@@ -945,6 +938,42 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) + return ret_p; + } + ++void kfd_suspend_all_processes(void) ++{ ++ struct kfd_process *p; ++ unsigned int temp; ++ int idx = srcu_read_lock(&kfd_processes_srcu); ++ ++ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { ++ cancel_delayed_work_sync(&p->eviction_work); ++ cancel_delayed_work_sync(&p->restore_work); ++ ++ if (kfd_process_evict_queues(p)) ++ pr_err("Failed to suspend process %d\n", p->pasid); ++ dma_fence_signal(p->ef); ++ dma_fence_put(p->ef); ++ p->ef = NULL; ++ } ++ srcu_read_unlock(&kfd_processes_srcu, idx); ++} ++ ++int kfd_resume_all_processes(void) ++{ ++ struct kfd_process *p; ++ unsigned int temp; ++ int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); ++ ++ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { ++ if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { ++ pr_err("Restore process %d failed during resume\n", ++ p->pasid); ++ ret = -EFAULT; ++ } ++ } ++ srcu_read_unlock(&kfd_processes_srcu, idx); ++ return ret; ++} ++ + /* This increments the process->ref counter. */ + struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) + { +@@ -1036,14 +1065,15 @@ static void evict_process_worker(struct work_struct *work) + "Eviction fence mismatch\n"); + + /* Narrow window of overlap between restore and evict work +- * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos +- * unreserves KFD BOs, it is possible to evicted again. But +- * restore has few more steps of finish. So lets wait for any +- * previous restore work to complete ++ * item is possible. Once ++ * amdgpu_amdkfd_gpuvm_restore_process_bos unreserves KFD BOs, ++ * it is possible to evicted again. But restore has few more ++ * steps of finish. So lets wait for any previous restore work ++ * to complete + */ + flush_delayed_work(&p->restore_work); + +- pr_info("Started evicting pasid %d\n", p->pasid); ++ pr_info("Started evicting process of pasid %d\n", p->pasid); + ret = kfd_process_evict_queues(p); + if (!ret) { + dma_fence_signal(p->ef); +@@ -1052,9 +1082,10 @@ static void evict_process_worker(struct work_struct *work) + queue_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_RESTORE_TIME_MS)); + +- pr_info("Finished evicting pasid %d\n", p->pasid); ++ pr_info("Finished evicting process of pasid %d\n", p->pasid); + } else +- pr_err("Failed to evict queues of pasid %d\n", p->pasid); ++ pr_err("Failed to quiesce user queues. Cannot evict pasid %d\n", ++ p->pasid); + } + + static void restore_process_worker(struct work_struct *work) +@@ -1080,7 +1111,7 @@ static void restore_process_worker(struct work_struct *work) + struct kfd_process_device, + per_device_list); + +- pr_info("Started restoring pasid %d\n", p->pasid); ++ pr_info("Started restoring process of pasid %d\n", p->pasid); + + /* Setting last_restore_timestamp before successful restoration. + * Otherwise this would have to be set by KGD (restore_process_bos) +@@ -1093,11 +1124,10 @@ static void restore_process_worker(struct work_struct *work) + */ + + p->last_restore_timestamp = get_jiffies_64(); +- ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info, +- &p->ef); ++ ret = pdd->dev->kfd2kgd->restore_process_bos(p->process_info, &p->ef); + if (ret) { +- pr_info("Failed to restore BOs of pasid %d, retry after %d ms\n", +- p->pasid, PROCESS_BACK_OFF_TIME_MS); ++ pr_info("Restore failed, try again after %d ms\n", ++ PROCESS_BACK_OFF_TIME_MS); + ret = queue_delayed_work(kfd_restore_wq, &p->restore_work, + msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS)); + WARN(!ret, "reschedule restore work failed\n"); +@@ -1105,54 +1135,21 @@ static void restore_process_worker(struct work_struct *work) + } + + ret = kfd_process_restore_queues(p); +- if (!ret) +- pr_info("Finished restoring pasid %d\n", p->pasid); +- else +- pr_err("Failed to restore queues of pasid %d\n", p->pasid); +-} +- +-void kfd_suspend_all_processes(void) +-{ +- struct kfd_process *p; +- unsigned int temp; +- int idx = srcu_read_lock(&kfd_processes_srcu); +- +- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { +- cancel_delayed_work_sync(&p->eviction_work); +- cancel_delayed_work_sync(&p->restore_work); +- +- if (kfd_process_evict_queues(p)) +- pr_err("Failed to suspend process %d\n", p->pasid); +- dma_fence_signal(p->ef); +- dma_fence_put(p->ef); +- p->ef = NULL; +- } +- srcu_read_unlock(&kfd_processes_srcu, idx); +-} +- +-int kfd_resume_all_processes(void) +-{ +- struct kfd_process *p; +- unsigned int temp; +- int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu); ++ if (ret) ++ pr_err("Failed to resume user queues\n"); + +- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { +- if (!queue_delayed_work(kfd_restore_wq, &p->restore_work, 0)) { +- pr_err("Restore process %d failed during resume\n", +- p->pasid); +- ret = -EFAULT; +- } +- } +- srcu_read_unlock(&kfd_processes_srcu, idx); +- return ret; ++ pr_info("Finished restoring process of pasid %d\n", p->pasid); + } + +-int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, +- struct vm_area_struct *vma) ++int kfd_reserved_mem_mmap(struct kfd_process *process, ++ struct vm_area_struct *vma) + { ++ struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); + struct kfd_process_device *pdd; + struct qcm_process_device *qpd; + ++ if (!dev) ++ return -EINVAL; + if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { + pr_err("Incorrect CWSR mapping size.\n"); + return -EINVAL; +@@ -1178,6 +1175,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, + KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot); + } + ++ + void kfd_flush_tlb(struct kfd_process_device *pdd) + { + struct kfd_dev *dev = pdd->dev; +@@ -1212,7 +1210,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) + r = pqm_debugfs_mqds(m, &p->pqm); + mutex_unlock(&p->mutex); + +- if (r) ++ if (r != 0) + break; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +index 8933323..52882e0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +@@ -188,7 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + case KFD_QUEUE_TYPE_SDMA: + if (dev->dqm->sdma_queue_count + >= get_num_sdma_queues(dev->dqm)) { +- pr_debug("Over-subscription is not allowed for SDMA.\n"); ++ pr_debug("Over-subscription is not allowed for SDMA\n"); + retval = -EPERM; + goto err_create_queue; + } +@@ -206,7 +206,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + case KFD_QUEUE_TYPE_COMPUTE: + /* check if there is over subscription */ + if ((dev->dqm->sched_policy == +- KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ++ KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || + (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { + pr_debug("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); +@@ -241,8 +241,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, + } + + if (retval != 0) { +- pr_err("Pasid %d DQM create queue %d failed. ret %d\n", +- pqm->process->pasid, type, retval); ++ pr_err("DQM create queue failed\n"); + goto err_create_queue; + } + +@@ -318,16 +317,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) + + if (pqn->q) { + dqm = pqn->q->device->dqm; ++ kfree(pqn->q->properties.cu_mask); ++ pqn->q->properties.cu_mask = NULL; + retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); + if (retval) { +- pr_err("Pasid %d destroy queue %d failed, ret %d\n", +- pqm->process->pasid, +- pqn->q->properties.queue_id, retval); +- if (retval != -ETIME) +- goto err_destroy_queue; ++ pr_debug("Destroy queue failed, returned %d\n", retval); ++ goto err_destroy_queue; + } +- kfree(pqn->q->properties.cu_mask); +- pqn->q->properties.cu_mask = NULL; + uninit_queue(pqn->q); + } + +@@ -439,7 +435,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) + struct process_queue_node *pqn; + struct queue *q; + enum KFD_MQD_TYPE mqd_type; +- struct mqd_manager *mqd_mgr; ++ struct mqd_manager *mqd_manager; + int r = 0; + + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { +@@ -462,11 +458,11 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) + q->properties.type, q->device->id); + continue; + } +- mqd_mgr = q->device->dqm->ops.get_mqd_manager( ++ mqd_manager = q->device->dqm->ops.get_mqd_manager( + q->device->dqm, mqd_type); + } else if (pqn->kq) { + q = pqn->kq->queue; +- mqd_mgr = pqn->kq->mqd_mgr; ++ mqd_manager = pqn->kq->mqd; + switch (q->properties.type) { + case KFD_QUEUE_TYPE_DIQ: + seq_printf(m, " DIQ on device %x\n", +@@ -486,7 +482,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) + continue; + } + +- r = mqd_mgr->debugfs_show_mqd(m, q->mqd); ++ r = mqd_manager->debugfs_show_mqd(m, q->mqd); + if (r != 0) + break; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +index 6dcd621..a5315d4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +@@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q) + pr_debug("Queue Address: 0x%llX\n", q->queue_address); + pr_debug("Queue Id: %u\n", q->queue_id); + pr_debug("Queue Process Vmid: %u\n", q->vmid); +- pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr); +- pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr); ++ pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr); ++ pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr); + pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr); + pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off); + } +@@ -53,8 +53,8 @@ void print_queue(struct queue *q) + pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address); + pr_debug("Queue Id: %u\n", q->properties.queue_id); + pr_debug("Queue Process Vmid: %u\n", q->properties.vmid); +- pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr); +- pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr); ++ pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr); ++ pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr); + pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr); + pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off); + pr_debug("Queue MQD Address: 0x%p\n", q->mqd); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c +index 3454514..985855f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_rdma.c +@@ -25,7 +25,7 @@ + #include <linux/pid.h> + #include <linux/err.h> + #include <linux/slab.h> +-#include <drm/amd_rdma.h> ++#include "amd_rdma.h" + #include "kfd_priv.h" + + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +index 7702156..320c8d3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +@@ -196,7 +196,6 @@ struct kfd_topology_device *kfd_create_topology_device( + return dev; + } + +- + #define sysfs_show_gen_prop(buffer, fmt, ...) \ + snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) + #define sysfs_show_32bit_prop(buffer, name, value) \ +@@ -740,7 +739,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, + } + + /* All hardware blocks have the same number of attributes. */ +- num_attrs = ARRAY_SIZE(perf_attr_iommu); ++ num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); + list_for_each_entry(perf, &dev->perf_props, list) { + perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) + * num_attrs + sizeof(struct attribute_group), +@@ -891,8 +890,7 @@ static void kfd_debug_print_topology(void) + up_read(&topology_lock); + } + +-/* Helper function for intializing platform_xx members of +- * kfd_system_properties. Uses OEM info from the last CPU/APU node. ++/* Helper function for intializing platform_xx members of kfd_system_properties + */ + static void kfd_update_system_properties(void) + { +@@ -1015,12 +1013,13 @@ int kfd_topology_init(void) + */ + #ifdef CONFIG_ACPI + ret = kfd_create_crat_image_acpi(&crat_image, &image_size); +- if (!ret) { ++ if (ret == 0) { + ret = kfd_parse_crat_table(crat_image, + &temp_topology_device_list, + proximity_domain); + if (ret || +- kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { ++ kfd_is_acpi_crat_invalid(&temp_topology_device_list)) { ++ + kfd_release_topology_device_list( + &temp_topology_device_list); + kfd_destroy_crat_image(crat_image); +@@ -1030,8 +1029,8 @@ int kfd_topology_init(void) + #endif + if (!crat_image) { + ret = kfd_create_crat_image_virtual(&crat_image, &image_size, +- COMPUTE_UNIT_CPU, NULL, +- proximity_domain); ++ COMPUTE_UNIT_CPU, NULL, ++ proximity_domain); + cpu_only_node = 1; + if (ret) { + pr_err("Error creating VCRAT table for CPU\n"); +@@ -1039,8 +1038,8 @@ int kfd_topology_init(void) + } + + ret = kfd_parse_crat_table(crat_image, +- &temp_topology_device_list, +- proximity_domain); ++ &temp_topology_device_list, ++ proximity_domain); + if (ret) { + pr_err("Error parsing VCRAT table for CPU\n"); + goto err; +@@ -1052,12 +1051,12 @@ int kfd_topology_init(void) + + down_write(&topology_lock); + kfd_topology_update_device_list(&temp_topology_device_list, +- &topology_device_list); ++ &topology_device_list); + atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + ret = kfd_topology_update_sysfs(); + up_write(&topology_lock); + +- if (!ret) { ++ if (ret == 0) { + sys_props.generation_count++; + kfd_update_system_properties(); + kfd_debug_print_topology(); +@@ -1145,6 +1144,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) + break; + } + up_write(&topology_lock); ++ + return out_dev; + } + +@@ -1182,40 +1182,17 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) + + static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) + { +- struct kfd_iolink_properties *link, *cpu_link; +- struct kfd_topology_device *cpu_dev; +- uint32_t cap; +- uint32_t cpu_flag = CRAT_IOLINK_FLAGS_ENABLED; +- uint32_t flag = CRAT_IOLINK_FLAGS_ENABLED; ++ struct kfd_iolink_properties *link; + + if (!dev || !dev->gpu) + return; + +- pcie_capability_read_dword(dev->gpu->pdev, +- PCI_EXP_DEVCAP2, &cap); +- +- if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | +- PCI_EXP_DEVCAP2_ATOMIC_COMP64))) +- cpu_flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | +- CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; +- +- if (!dev->gpu->pci_atomic_requested || +- dev->gpu->device_info->asic_family == CHIP_HAWAII) +- flag |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | ++ /* GPU only creates direck links so apply flags setting to all */ ++ if (dev->gpu->device_info->asic_family == CHIP_HAWAII) ++ list_for_each_entry(link, &dev->io_link_props, list) ++ link->flags = CRAT_IOLINK_FLAGS_ENABLED | ++ CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT | + CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT; +- +- /* GPU only creates direct links so apply flags setting to all */ +- list_for_each_entry(link, &dev->io_link_props, list) { +- link->flags = flag; +- cpu_dev = kfd_topology_device_by_proximity_domain( +- link->node_to); +- if (cpu_dev) { +- list_for_each_entry(cpu_link, +- &cpu_dev->io_link_props, list) +- if (cpu_link->node_to == link->node_from) +- cpu_link->flags = cpu_flag; +- } +- } + } + + int kfd_topology_add_device(struct kfd_dev *gpu) +@@ -1235,7 +1212,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + + pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + +- proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); ++ proximity_domain = atomic_inc_return(& ++ topology_crat_proximity_domain); + + /* Check to see if this gpu device exists in the topology_device_list. + * If so, assign the gpu to that device, +@@ -1246,16 +1224,15 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + dev = kfd_assign_gpu(gpu); + if (!dev) { + res = kfd_create_crat_image_virtual(&crat_image, &image_size, +- COMPUTE_UNIT_GPU, gpu, +- proximity_domain); ++ COMPUTE_UNIT_GPU, ++ gpu, proximity_domain); + if (res) { + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", + gpu_id); + return res; + } + res = kfd_parse_crat_table(crat_image, +- &temp_topology_device_list, +- proximity_domain); ++ &temp_topology_device_list, proximity_domain); + if (res) { + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", + gpu_id); +@@ -1272,13 +1249,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + res = kfd_topology_update_sysfs(); + up_write(&topology_lock); + +- if (!res) ++ if (res == 0) + sys_props.generation_count++; + else + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n", + gpu_id, res); + dev = kfd_assign_gpu(gpu); +- if (WARN_ON(!dev)) { ++ if (!dev) { ++ pr_err("Could not assign GPU\n"); + res = -ENODEV; + goto err; + } +@@ -1331,22 +1309,20 @@ int kfd_topology_add_device(struct kfd_dev *gpu) + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; + case CHIP_VEGA10: +- case CHIP_VEGA20: + case CHIP_RAVEN: + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; + default: +- WARN(1, "Unexpected ASIC family %u", +- dev->gpu->device_info->asic_family); ++ BUG(); + } + + /* Fix errors in CZ CRAT. +- * simd_count: Carrizo CRAT reports wrong simd_count, probably +- * because it doesn't consider masked out CUs +- * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd +- * capability flag: Carrizo CRAT doesn't report IOMMU flags ++ * simd_count: Carrizo CRAT reports wrong simd_count, probably because ++ * it doesn't consider masked out CUs ++ * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd. ++ * capability flag: Carrizo CRAT doesn't report IOMMU flags. + */ + if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { + dev->node_props.simd_count = +@@ -1386,7 +1362,7 @@ int kfd_topology_remove_device(struct kfd_dev *gpu) + + up_write(&topology_lock); + +- if (!res) ++ if (res == 0) + kfd_notify_gpu_change(gpu_id, 0); + + return res; +@@ -1427,7 +1403,7 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask) + { + int first_cpu_of_numa_node; + +- if (!cpumask || cpumask == cpu_none_mask) ++ if (!cpumask || (cpumask == cpu_none_mask)) + return -1; + first_cpu_of_numa_node = cpumask_first(cpumask); + if (first_cpu_of_numa_node >= nr_cpu_ids) +@@ -1470,7 +1446,7 @@ int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data) + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = dqm_debugfs_hqds(m, dev->gpu->dqm); +- if (r) ++ if (r != 0) + break; + } + +@@ -1495,7 +1471,7 @@ int kfd_debugfs_rls_by_device(struct seq_file *m, void *data) + + seq_printf(m, "Node %u, gpu_id %x:\n", i++, dev->gpu->id); + r = pm_debugfs_runlist(m, &dev->gpu->dqm->packets); +- if (r) ++ if (r != 0) + break; + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +index 2b36baf..f4d29c4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +@@ -46,6 +46,9 @@ + #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 + #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 + #define HSA_CAP_DOORBELL_TYPE_2_0 0x2 ++#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 ++#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 ++#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000 + #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 + + struct kfd_node_properties { +@@ -166,9 +169,9 @@ struct kfd_topology_device { + struct attribute attr_gpuid; + struct attribute attr_name; + struct attribute attr_props; +- uint8_t oem_id[CRAT_OEMID_LENGTH]; +- uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; +- uint32_t oem_revision; ++ uint8_t oem_id[CRAT_OEMID_LENGTH]; ++ uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; ++ uint32_t oem_revision; + }; + + struct kfd_system_properties { +@@ -187,8 +190,4 @@ struct kfd_topology_device *kfd_create_topology_device( + struct list_head *device_list); + void kfd_release_topology_device_list(struct list_head *device_list); + +-extern bool amd_iommu_pc_supported(void); +-extern u8 amd_iommu_pc_get_max_banks(u16 devid); +-extern u8 amd_iommu_pc_get_max_counters(u16 devid); +- + #endif /* __KFD_TOPOLOGY_H__ */ +diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h +index 0bc0b25..e00d03d 100644 +--- a/drivers/gpu/drm/amd/amdkfd/soc15_int.h ++++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016-2018 Advanced Micro Devices, Inc. ++ * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -22,8 +22,45 @@ + + #ifndef HSA_SOC15_INT_H_INCLUDED + #define HSA_SOC15_INT_H_INCLUDED ++/* ++ * vega10+ IH clients ++ */ ++enum soc15_ih_client_id { ++ SOC15_IH_CLIENTID_IH = 0x00, ++ SOC15_IH_CLIENTID_ACP = 0x01, ++ SOC15_IH_CLIENTID_ATHUB = 0x02, ++ SOC15_IH_CLIENTID_BIF = 0x03, ++ SOC15_IH_CLIENTID_DCE = 0x04, ++ SOC15_IH_CLIENTID_ISP = 0x05, ++ SOC15_IH_CLIENTID_PCIE0 = 0x06, ++ SOC15_IH_CLIENTID_RLC = 0x07, ++ SOC15_IH_CLIENTID_SDMA0 = 0x08, ++ SOC15_IH_CLIENTID_SDMA1 = 0x09, ++ SOC15_IH_CLIENTID_SE0SH = 0x0a, ++ SOC15_IH_CLIENTID_SE1SH = 0x0b, ++ SOC15_IH_CLIENTID_SE2SH = 0x0c, ++ SOC15_IH_CLIENTID_SE3SH = 0x0d, ++ SOC15_IH_CLIENTID_SYSHUB = 0x0e, ++ SOC15_IH_CLIENTID_THM = 0x0f, ++ SOC15_IH_CLIENTID_UVD = 0x10, ++ SOC15_IH_CLIENTID_VCE0 = 0x11, ++ SOC15_IH_CLIENTID_VMC = 0x12, ++ SOC15_IH_CLIENTID_XDMA = 0x13, ++ SOC15_IH_CLIENTID_GRBM_CP = 0x14, ++ SOC15_IH_CLIENTID_ATS = 0x15, ++ SOC15_IH_CLIENTID_ROM_SMUIO = 0x16, ++ SOC15_IH_CLIENTID_DF = 0x17, ++ SOC15_IH_CLIENTID_VCE1 = 0x18, ++ SOC15_IH_CLIENTID_PWR = 0x19, ++ SOC15_IH_CLIENTID_UTCL2 = 0x1b, ++ SOC15_IH_CLIENTID_EA = 0x1c, ++ SOC15_IH_CLIENTID_UTCL2LOG = 0x1d, ++ SOC15_IH_CLIENTID_MP0 = 0x1e, ++ SOC15_IH_CLIENTID_MP1 = 0x1f, ++ ++ SOC15_IH_CLIENTID_MAX ++}; + +-#include "soc15_ih_clientid.h" + + #define SOC15_INTSRC_CP_END_OF_PIPE 181 + #define SOC15_INTSRC_CP_BAD_OPCODE 183 +-- +2.7.4 + |