aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1623-drm-amdgpu-Add-Vega10-support-for-KFD.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1623-drm-amdgpu-Add-Vega10-support-for-KFD.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1623-drm-amdgpu-Add-Vega10-support-for-KFD.patch1544
1 files changed, 0 insertions, 1544 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1623-drm-amdgpu-Add-Vega10-support-for-KFD.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1623-drm-amdgpu-Add-Vega10-support-for-KFD.patch
deleted file mode 100644
index 3f836132..00000000
--- a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1623-drm-amdgpu-Add-Vega10-support-for-KFD.patch
+++ /dev/null
@@ -1,1544 +0,0 @@
-From 9256a2b22c170e1dcd239a31628abbbf846f21f2 Mon Sep 17 00:00:00 2001
-From: Felix Kuehling <Felix.Kuehling@amd.com>
-Date: Tue, 14 Mar 2017 23:36:55 -0400
-Subject: [PATCH 1623/4131] drm/amdgpu: Add Vega10 support for KFD
-
-Change-Id: I37da6493d070c7e490e18a478ffeb42c1b158a82
-Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
----
- drivers/gpu/drm/amd/amdgpu/Makefile | 1 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 48 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 22 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 4 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1124 +++++++++++++++++++++
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 47 +-
- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 +
- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 16 +
- 9 files changed, 1232 insertions(+), 47 deletions(-)
- create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
-index 7ff4af3..81423ee 100755
---- a/drivers/gpu/drm/amd/amdgpu/Makefile
-+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
-@@ -110,6 +110,7 @@ amdgpu-y += \
- amdgpu_amdkfd.o \
- amdgpu_amdkfd_gfx_v7.o \
- amdgpu_amdkfd_gfx_v8.o \
-+ amdgpu_amdkfd_gfx_v9.o \
- amdgpu_amdkfd_gpuvm.o
-
- # add cgs
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-index a30b616..3a1776b 100755
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-@@ -28,7 +28,6 @@
- #include "amdgpu.h"
- #include "amdgpu_gfx.h"
- #include <linux/module.h>
--#include <linux/mmu_context.h>
-
- #define AMDKFD_SKIP_UNCOMPILED_CODE 1
-
-@@ -82,6 +81,9 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
- case CHIP_POLARIS11:
- kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
- break;
-+ case CHIP_VEGA10:
-+ kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
-+ break;
- default:
- return false;
- }
-@@ -142,6 +144,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
- &gpu_resources.doorbell_physical_address,
- &gpu_resources.doorbell_aperture_size,
- &gpu_resources.doorbell_start_offset);
-+ if (adev->asic_type >= CHIP_VEGA10) {
-+ /* On SOC15 the BIF is involved in routing
-+ * doorbells using the low 12 bits of the
-+ * address. Communicate the assignments to
-+ * KFD. KFD uses two doorbell pages per
-+ * process in case of 64-bit doorbells so we
-+ * can use each doorbell assignment twice.
-+ */
-+ gpu_resources.sdma_doorbell[0][0] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE0;
-+ gpu_resources.sdma_doorbell[0][1] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
-+ gpu_resources.sdma_doorbell[1][0] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE1;
-+ gpu_resources.sdma_doorbell[1][1] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
-+ /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
-+ * SDMA, IH and VCN. So don't use them for the CP.
-+ */
-+ gpu_resources.reserved_doorbell_mask = 0x1f0;
-+ gpu_resources.reserved_doorbell_val = 0x0f0;
-+ }
-
- kgd2kfd->device_init(adev->kfd, &gpu_resources);
- }
-@@ -610,28 +634,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
- return r;
- }
-
--bool read_user_wptr(struct mm_struct *mm, uint32_t __user *wptr,
-- uint32_t *wptr_val)
--{
-- bool wptr_valid = false;
--
-- if (mm && wptr) {
-- if (mm == current->mm) {
-- /* Running in the correct user process context */
-- wptr_valid = !get_user(*wptr_val, wptr);
-- } else if (current->mm == NULL) {
-- /* A kernel thread can temporarily use a user
-- * process context for AIO
-- */
-- use_mm(mm);
-- wptr_valid = !get_user(*wptr_val, wptr);
-- unuse_mm(mm);
-- }
-- }
--
-- return wptr_valid;
--}
--
- bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev,
- u32 vmid)
- {
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-index 262a4fe..23e2f12 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-@@ -28,6 +28,7 @@
- #include <linux/types.h>
- #include <linux/mm.h>
- #include <linux/workqueue.h>
-+#include <linux/mmu_context.h>
- #include <kgd_kfd_interface.h>
- #include "amdgpu.h"
-
-@@ -59,10 +60,10 @@ struct kgd_mem {
- struct delayed_work work; /* for restore evicted mem */
- struct mm_struct *mm; /* for restore */
-
-- uint32_t pte_flags;
--
-+ uint64_t pte_flags;
-
- /* flags bitfield */
-+ bool coherent : 1;
- bool no_substitute : 1;
- bool aql_queue : 1;
- bool busy : 1;
-@@ -141,6 +142,7 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
- int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info);
- struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
- struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
-+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
- int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem,
- uint64_t src_offset, struct kgd_mem *dst_mem,
- uint64_t dest_offset, uint64_t size, struct fence **f,
-@@ -168,8 +170,20 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
- size_t buffer_size, uint32_t *metadata_size,
- uint32_t *flags);
-
--bool read_user_wptr(struct mm_struct *mm, uint32_t __user *wptr,
-- uint32_t *wptr_val);
-+#define read_user_wptr(mmptr, wptr, dst) \
-+ ({ \
-+ bool valid = false; \
-+ if ((mmptr) && (wptr)) { \
-+ if ((mmptr) == current->mm) { \
-+ valid = !get_user((dst), (wptr)); \
-+ } else if (current->mm == NULL) { \
-+ use_mm(mmptr); \
-+ valid = !get_user((dst), (wptr)); \
-+ unuse_mm(mmptr); \
-+ } \
-+ } \
-+ valid; \
-+ })
-
- /* GPUVM API */
- int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-index ae860ec..ac9d6ec 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-@@ -398,7 +398,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- /* Copy userspace write pointer value to register.
- * Doorbell logic is active and will monitor subsequent changes.
- */
-- if (read_user_wptr(mm, wptr, &wptr_val))
-+ if (read_user_wptr(mm, wptr, wptr_val))
- WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
-
- /* Write CP_HQD_ACTIVE last. */
-@@ -485,7 +485,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, m->sdma_rlc_doorbell);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
-
-- if (read_user_wptr(mm, wptr, &data))
-+ if (read_user_wptr(mm, wptr, data))
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
- else
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-index fbea6a6..2c6a199 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-@@ -403,7 +403,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- /* Copy userspace write pointer value to register.
- * Doorbell logic is active and will monitor subsequent changes.
- */
-- if (read_user_wptr(mm, wptr, &wptr_val))
-+ if (read_user_wptr(mm, wptr, wptr_val))
- WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
-
- /* Write CP_HQD_ACTIVE last. */
-@@ -489,7 +489,7 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, m->sdmax_rlcx_doorbell);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-
-- if (read_user_wptr(mm, wptr, &data))
-+ if (read_user_wptr(mm, wptr, data))
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
- else
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-new file mode 100644
-index 0000000..e9b2db9
---- /dev/null
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-@@ -0,0 +1,1124 @@
-+/*
-+ * Copyright 2014 Advanced Micro Devices, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#define pr_fmt(fmt) "kfd2kgd: " fmt
-+
-+#include <linux/module.h>
-+#include <linux/fdtable.h>
-+#include <linux/uaccess.h>
-+#include <linux/firmware.h>
-+#include <drm/drmP.h>
-+#include "amdgpu.h"
-+#include "amdgpu_amdkfd.h"
-+#include "amdgpu_ucode.h"
-+#include "amdgpu_amdkfd_gfx_v8.h"
-+#include "vega10/soc15ip.h"
-+#include "vega10/GC/gc_9_0_offset.h"
-+#include "vega10/GC/gc_9_0_sh_mask.h"
-+#include "vega10/vega10_enum.h"
-+#include "vega10/SDMA0/sdma0_4_0_offset.h"
-+#include "vega10/SDMA0/sdma0_4_0_sh_mask.h"
-+#include "vega10/SDMA1/sdma1_4_0_offset.h"
-+#include "vega10/SDMA1/sdma1_4_0_sh_mask.h"
-+#include "vega10/ATHUB/athub_1_0_offset.h"
-+#include "vega10/ATHUB/athub_1_0_sh_mask.h"
-+#include "vega10/OSSSYS/osssys_4_0_offset.h"
-+#include "vega10/OSSSYS/osssys_4_0_sh_mask.h"
-+#include "soc15_common.h"
-+#include "v9_structs.h"
-+#include "soc15.h"
-+
-+/* HACK: MMHUB and GC both have VM-related register with the same
-+ * names but different offsets. Define the MMHUB register we need here
-+ * with a prefix. A proper solution would be to move the functions
-+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
-+ * respectively.
-+ */
-+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
-+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
-+
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
-+
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
-+
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
-+
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
-+
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
-+
-+#define V9_PIPE_PER_MEC (4)
-+#define V9_QUEUES_PER_PIPE_MEC (8)
-+
-+enum hqd_dequeue_request_type {
-+ NO_ACTION = 0,
-+ DRAIN_PIPE,
-+ RESET_WAVES,
-+ SAVE_WAVES
-+};
-+
-+static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
-+ mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
-+ mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
-+ mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
-+ mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
-+};
-+
-+
-+static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
-+ void *vm, struct kgd_mem **mem);
-+static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem);
-+
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem);
-+
-+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
-+
-+/*
-+ * Register access functions
-+ */
-+
-+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t sh_mem_config,
-+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
-+ uint32_t sh_mem_bases);
-+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-+ unsigned int vmid);
-+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-+ uint32_t hpd_size, uint64_t hpd_gpu_addr);
-+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm);
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm);
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-+ uint32_t pipe_id, uint32_t queue_id);
-+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-+static int kgd_hqd_destroy(struct kgd_dev *kgd,
-+ enum kfd_preempt_type reset_type,
-+ unsigned int utimeout, uint32_t pipe_id,
-+ uint32_t queue_id);
-+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-+ unsigned int utimeout);
-+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
-+static int kgd_address_watch_disable(struct kgd_dev *kgd);
-+static int kgd_address_watch_execute(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ uint32_t cntl_val,
-+ uint32_t addr_hi,
-+ uint32_t addr_lo);
-+static int kgd_wave_control_execute(struct kgd_dev *kgd,
-+ uint32_t gfx_index_val,
-+ uint32_t sq_cmd);
-+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ unsigned int reg_offset);
-+
-+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-+ uint8_t vmid);
-+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-+ uint8_t vmid);
-+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
-+static void set_num_of_requests(struct kgd_dev *kgd,
-+ uint8_t num_of_requests);
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid);
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype);
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base);
-+
-+/* Because of REG_GET_FIELD() being used, we put this function in the
-+ * asic specific file.
-+ */
-+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
-+ struct tile_config *config)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+
-+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
-+#if 0
-+/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but
-+ * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu
-+ * changes commented out related code, doing the same here for now but
-+ * need to sync with Ken et al
-+ */
-+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFBANK);
-+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFRANKS);
-+#endif
-+
-+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
-+ config->num_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
-+ config->macro_tile_config_ptr =
-+ adev->gfx.config.macrotile_mode_array;
-+ config->num_macro_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
-+
-+ return 0;
-+}
-+
-+static const struct kfd2kgd_calls kfd2kgd = {
-+ .init_gtt_mem_allocation = alloc_gtt_mem,
-+ .free_gtt_mem = free_gtt_mem,
-+ .get_local_mem_info = get_local_mem_info,
-+ .get_gpu_clock_counter = get_gpu_clock_counter,
-+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
-+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
-+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
-+ .create_process_gpumem = create_process_gpumem,
-+ .destroy_process_gpumem = destroy_process_gpumem,
-+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
-+ .open_graphic_handle = open_graphic_handle,
-+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
-+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-+ .init_pipeline = kgd_init_pipeline,
-+ .init_interrupts = kgd_init_interrupts,
-+ .hqd_load = kgd_hqd_load,
-+ .hqd_sdma_load = kgd_hqd_sdma_load,
-+ .hqd_dump = kgd_hqd_dump,
-+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
-+ .hqd_is_occupied = kgd_hqd_is_occupied,
-+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
-+ .hqd_destroy = kgd_hqd_destroy,
-+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
-+ .address_watch_disable = kgd_address_watch_disable,
-+ .address_watch_execute = kgd_address_watch_execute,
-+ .wave_control_execute = kgd_wave_control_execute,
-+ .address_watch_get_offset = kgd_address_watch_get_offset,
-+ .get_atc_vmid_pasid_mapping_pasid =
-+ get_atc_vmid_pasid_mapping_pasid,
-+ .get_atc_vmid_pasid_mapping_valid =
-+ get_atc_vmid_pasid_mapping_valid,
-+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
-+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
-+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
-+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
-+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
-+ .get_fw_version = get_fw_version,
-+ .set_num_of_requests = set_num_of_requests,
-+ .get_cu_info = get_cu_info,
-+ .alloc_memory_of_scratch = alloc_memory_of_scratch,
-+ .write_config_static_mem = write_config_static_mem,
-+ .mmap_bo = amdgpu_amdkfd_gpuvm_mmap_bo,
-+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
-+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
-+ .pin_get_sg_table_bo = amdgpu_amdkfd_gpuvm_pin_get_sg_table,
-+ .unpin_put_sg_table_bo = amdgpu_amdkfd_gpuvm_unpin_put_sg_table,
-+ .get_dmabuf_info = amdgpu_amdkfd_get_dmabuf_info,
-+ .import_dmabuf = amdgpu_amdkfd_gpuvm_import_dmabuf,
-+ .export_dmabuf = amdgpu_amdkfd_gpuvm_export_dmabuf,
-+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
-+ .submit_ib = amdgpu_amdkfd_submit_ib,
-+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
-+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
-+ .copy_mem_to_mem = amdgpu_amdkfd_copy_mem_to_mem
-+};
-+
-+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions()
-+{
-+ return (struct kfd2kgd_calls *)&kfd2kgd;
-+}
-+
-+static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
-+ void *vm, struct kgd_mem **mem)
-+{
-+ return 0;
-+}
-+
-+/* Destroys the GPU allocation and frees the kgd_mem structure */
-+static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem)
-+{
-+
-+}
-+
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem)
-+{
-+ return 0;
-+}
-+
-+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-+{
-+ return (struct amdgpu_device *)kgd;
-+}
-+
-+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
-+ uint32_t queue, uint32_t vmid)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ mutex_lock(&adev->srbm_mutex);
-+ soc15_grbm_select(adev, mec, pipe, queue, vmid);
-+}
-+
-+static void unlock_srbm(struct kgd_dev *kgd)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ soc15_grbm_select(adev, 0, 0, 0, 0);
-+ mutex_unlock(&adev->srbm_mutex);
-+}
-+
-+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
-+ uint32_t queue_id)
-+{
-+ uint32_t mec = (++pipe_id / V9_PIPE_PER_MEC) + 1;
-+ uint32_t pipe = (pipe_id % V9_PIPE_PER_MEC);
-+
-+ lock_srbm(kgd, mec, pipe, queue_id, 0);
-+}
-+
-+static uint32_t get_queue_mask(uint32_t pipe_id, uint32_t queue_id)
-+{
-+ /* assumes that pipe0 is used by graphics and that the correct
-+ * MEC is selected by acquire_queue already
-+ */
-+ unsigned int bit = ((pipe_id+1) * V9_QUEUES_PER_PIPE_MEC +
-+ queue_id) & 31;
-+
-+ return ((uint32_t)1) << bit;
-+}
-+
-+static void release_queue(struct kgd_dev *kgd)
-+{
-+ unlock_srbm(kgd);
-+}
-+
-+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t sh_mem_config,
-+ uint32_t sh_mem_ape1_base,
-+ uint32_t sh_mem_ape1_limit,
-+ uint32_t sh_mem_bases)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ lock_srbm(kgd, 0, 0, 0, vmid);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
-+ /* APE1 no longer exists on GFX9 */
-+
-+ unlock_srbm(kgd);
-+}
-+
-+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-+ unsigned int vmid)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ /*
-+ * We have to assume that there is no outstanding mapping.
-+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
-+ * a mapping is in progress or because a mapping finished
-+ * and the SW cleared it.
-+ * So the protocol is to always wait & clear.
-+ */
-+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
-+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
-+
-+ /*
-+ * need to do this twice, once for gfx and once for mmhub
-+ * for ATC add 16 to VMID for mmhub, for IH different registers.
-+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
-+ */
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
-+ pasid_mapping);
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(
-+ ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
-+ (1U << vmid)))
-+ cpu_relax();
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
-+ 1U << vmid);
-+
-+ /* Mapping vmid to pasid also for IH block */
-+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
-+ pasid_mapping);
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
-+ pasid_mapping);
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(
-+ ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
-+ (1U << (vmid + 16))))
-+ cpu_relax();
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
-+ 1U << (vmid + 16));
-+
-+ /* Mapping vmid to pasid also for IH block */
-+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
-+ pasid_mapping);
-+ return 0;
-+}
-+
-+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-+ uint32_t hpd_size, uint64_t hpd_gpu_addr)
-+{
-+ return 0;
-+}
-+
-+/* TODO - RING0 form of field is obsolete, seems to date back to SI
-+ * but still works
-+ */
-+
-+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t mec;
-+ uint32_t pipe;
-+
-+ mec = (++pipe_id / V9_PIPE_PER_MEC) + 1;
-+ pipe = (pipe_id % V9_PIPE_PER_MEC);
-+
-+ lock_srbm(kgd, mec, pipe, 0, 0);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
-+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
-+
-+ unlock_srbm(kgd);
-+
-+ return 0;
-+}
-+
-+static uint32_t get_sdma_base_addr(unsigned int engine_id,
-+ unsigned int queue_id)
-+{
-+ static const uint32_t base[2] = {
-+ SOC15_REG_OFFSET(SDMA0, 0,
-+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
-+ SOC15_REG_OFFSET(SDMA1, 0,
-+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
-+ };
-+ uint32_t retval;
-+
-+ BUG_ON(engine_id > 1);
-+
-+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
-+ mmSDMA0_RLC0_RB_CNTL);
-+
-+ pr_debug("sdma base address: 0x%x\n", retval);
-+
-+ return retval;
-+}
-+
-+static inline struct v9_mqd *get_mqd(void *mqd)
-+{
-+ return (struct v9_mqd *)mqd;
-+}
-+
-+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
-+{
-+ return (struct v9_sdma_mqd *)mqd;
-+}
-+
-+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_mqd *m;
-+ uint32_t *mqd_hqd;
-+ uint32_t reg, hqd_base;
-+
-+ m = get_mqd(mqd);
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
-+ mqd_hqd = &m->cp_mqd_base_addr_lo;
-+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
-+
-+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID);
-+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-+ WREG32(reg, mqd_hqd[reg - hqd_base]);
-+
-+ if (wptr) {
-+ /* Don't read wptr with get_user because the user
-+ * context may not be accessible (if this function
-+ * runs in a work queue). Instead trigger a one-shot
-+ * polling read from memory in the CP. This assumes
-+ * that wptr is GPU-accessible in the queue's VMID via
-+ * ATC or SVM. WPTR==RPTR before starting the poll so
-+ * the CP starts fetching new commands from the right
-+ * place.
-+ *
-+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
-+ * tricky. Assume that the queue didn't overflow. The
-+ * number of valid bits in the 32-bit RPTR depends on
-+ * the queue size. The remaining bits are taken from
-+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
-+ * queue size.
-+ */
-+ uint32_t queue_size =
-+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
-+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
-+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
-+
-+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
-+ guessed_wptr += queue_size;
-+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
-+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
-+ lower_32_bits(guessed_wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
-+ upper_32_bits(guessed_wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
-+ lower_32_bits((uint64_t)wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
-+ upper_32_bits((uint64_t)wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-+ get_queue_mask(pipe_id, queue_id));
-+ }
-+
-+ /* Start the EOP fetcher */
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
-+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
-+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
-+
-+ /* Write CP_HQD_ACTIVE last. */
-+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
-+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE); reg++)
-+ WREG32(reg, mqd_hqd[reg - hqd_base]);
-+
-+ release_queue(kgd);
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t i = 0, reg;
-+#define HQD_N_REGS 56
-+#define DUMP_REG(addr) do { \
-+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
-+ break; \
-+ (*dump)[i][0] = (addr) << 2; \
-+ (*dump)[i++][1] = RREG32(addr); \
-+ } while (0)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
-+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-+ DUMP_REG(reg);
-+
-+ release_queue(kgd);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_sdma_mqd *m;
-+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
-+ uint32_t temp, timeout = 2000;
-+ uint32_t data;
-+ uint64_t data64;
-+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
-+
-+ m = get_sdma_mqd(mqd);
-+ sdma_base_addr = get_sdma_base_addr(m->sdma_engine_id,
-+ m->sdma_queue_id);
-+ sdmax_gfx_context_cntl = m->sdma_engine_id ?
-+ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
-+ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
-+
-+ while (true) {
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
-+ break;
-+ if (timeout == 0)
-+ return -ETIME;
-+ msleep(10);
-+ timeout -= 10;
-+ }
-+ data = RREG32(sdmax_gfx_context_cntl);
-+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-+ RESUME_CTX, 0);
-+ WREG32(sdmax_gfx_context_cntl, data);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
-+ m->sdmax_rlcx_doorbell_offset);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, m->sdmax_rlcx_doorbell);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, m->sdmax_rlcx_rb_rptr_hi);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
-+ if (read_user_wptr(mm, wptr64, data64)) {
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-+ lower_32_bits(data64));
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
-+ upper_32_bits(data64));
-+ } else {
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-+ m->sdmax_rlcx_rb_rptr);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
-+ m->sdmax_rlcx_rb_rptr_hi);
-+ }
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, m->sdmax_rlcx_rb_base_hi);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, m->sdmax_rlcx_rb_rptr_addr_lo);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, m->sdmax_rlcx_rb_rptr_addr_hi);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, m->sdmax_rlcx_rb_cntl);
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t sdma_base_addr = get_sdma_base_addr(engine_id, queue_id);
-+ uint32_t i = 0, reg;
-+#undef HQD_N_REGS
-+#define HQD_N_REGS (19+6+7+10)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
-+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
-+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
-+ return 0;
-+}
-+
-+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-+ uint32_t pipe_id, uint32_t queue_id)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t act;
-+ bool retval = false;
-+ uint32_t low, high;
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
-+ if (act) {
-+ low = lower_32_bits(queue_address >> 8);
-+ high = upper_32_bits(queue_address >> 8);
-+
-+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
-+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
-+ retval = true;
-+ }
-+ release_queue(kgd);
-+ return retval;
-+}
-+
-+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_sdma_mqd *m;
-+ uint32_t sdma_base_addr;
-+ uint32_t sdma_rlc_rb_cntl;
-+
-+ m = get_sdma_mqd(mqd);
-+ sdma_base_addr = get_sdma_base_addr(m->sdma_engine_id,
-+ m->sdma_queue_id);
-+
-+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
-+
-+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
-+ return true;
-+
-+ return false;
-+}
-+
-+static int kgd_hqd_destroy(struct kgd_dev *kgd,
-+ enum kfd_preempt_type reset_type,
-+ unsigned int utimeout, uint32_t pipe_id,
-+ uint32_t queue_id)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ enum hqd_dequeue_request_type type;
-+ unsigned long end_jiffies;
-+ uint32_t temp;
-+#if 0
-+ unsigned long flags;
-+ int retry;
-+#endif
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ switch (reset_type) {
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
-+ type = DRAIN_PIPE;
-+ break;
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
-+ type = RESET_WAVES;
-+ break;
-+ default:
-+ type = DRAIN_PIPE;
-+ break;
-+ }
-+
-+#if 0 /* Is this still needed? */
-+ /* Workaround: If IQ timer is active and the wait time is close to or
-+ * equal to 0, dequeueing is not safe. Wait until either the wait time
-+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
-+ * cleared before continuing. Also, ensure wait times are set to at
-+ * least 0x3.
-+ */
-+ local_irq_save(flags);
-+ preempt_disable();
-+ retry = 5000; /* wait for 500 usecs at maximum */
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_IQ_TIMER);
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
-+ pr_debug("HW is processing IQ\n");
-+ goto loop;
-+ }
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
-+ == 3) /* SEM-rearm is safe */
-+ break;
-+ /* Wait time 3 is safe for CP, but our MMIO read/write
-+ * time is close to 1 microsecond, so check for 10 to
-+ * leave more buffer room
-+ */
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
-+ >= 10)
-+ break;
-+ pr_debug("IQ timer is active\n");
-+ } else
-+ break;
-+loop:
-+ if (!retry) {
-+ pr_err("CP HQD IQ timer status time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ retry = 1000;
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
-+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
-+ break;
-+ pr_debug("Dequeue request is pending\n");
-+
-+ if (!retry) {
-+ pr_err("CP HQD dequeue request time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ local_irq_restore(flags);
-+ preempt_enable();
-+#endif
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
-+
-+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
-+ while (true) {
-+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
-+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
-+ break;
-+ if (time_after(jiffies, end_jiffies)) {
-+ pr_err("cp queue preemption time out.\n");
-+ release_queue(kgd);
-+ return -ETIME;
-+ }
-+ usleep_range(500, 1000);
-+ }
-+
-+ release_queue(kgd);
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-+ unsigned int utimeout)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_sdma_mqd *m;
-+ uint32_t sdma_base_addr;
-+ uint32_t temp;
-+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
-+
-+ m = get_sdma_mqd(mqd);
-+ sdma_base_addr = get_sdma_base_addr(m->sdma_engine_id,
-+ m->sdma_queue_id);
-+
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
-+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
-+
-+ while (true) {
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
-+ break;
-+ if (time_after(jiffies, end_jiffies))
-+ return -ETIME;
-+ usleep_range(500, 1000);
-+ }
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
-+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
-+
-+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
-+ m->sdmax_rlcx_rb_rptr_hi =
-+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
-+
-+ return 0;
-+}
-+
-+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-+ uint8_t vmid)
-+{
-+ uint32_t reg;
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-+ + vmid);
-+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-+}
-+
-+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-+ uint8_t vmid)
-+{
-+ uint32_t reg;
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-+ + vmid);
-+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-+}
-+
-+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ uint32_t req = (1 << vmid) |
-+ (1 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* light */
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
-+
-+ /* Use light weight invalidation.
-+ *
-+ * TODO 1: agree on the right set of invalidation registers for
-+ * KFD use. Use the last one for now. Invalidate both GC and
-+ * MMHUB.
-+ *
-+ * TODO 2: support range-based invalidation, requires kfg2kgd
-+ * interface change
-+ */
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), 0xffffffff);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), 0x0000001f);
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), 0xffffffff);
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), 0x0000001f);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), req);
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
-+ (1 << vmid)))
-+ cpu_relax();
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
-+ (1 << vmid)))
-+ cpu_relax();
-+}
-+
-+static int kgd_address_watch_disable(struct kgd_dev *kgd)
-+{
-+ WARN_ONCE(1, "Not implemented");
-+
-+#if 0 /* TODO: Update to SOC15 register */
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ union TCP_WATCH_CNTL_BITS cntl;
-+ unsigned int i;
-+
-+ cntl.u32All = 0;
-+
-+ cntl.bitfields.valid = 0;
-+ cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
-+ cntl.bitfields.atc = 1;
-+
-+ /* Turning off this address until we set all the registers */
-+ for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
-+ WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+#endif
-+
-+ return 0;
-+}
-+
-+static int kgd_address_watch_execute(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ uint32_t cntl_val,
-+ uint32_t addr_hi,
-+ uint32_t addr_lo)
-+{
-+ WARN_ONCE(1, "Not implemented");
-+
-+#if 0 /* TODO: Update to SOC15 register */
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ union TCP_WATCH_CNTL_BITS cntl;
-+
-+ cntl.u32All = cntl_val;
-+
-+ /* Turning off this watch point until we set all the registers */
-+ cntl.bitfields.valid = 0;
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI],
-+ addr_hi);
-+
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO],
-+ addr_lo);
-+
-+ /* Enable the watch point */
-+ cntl.bitfields.valid = 1;
-+
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+#endif
-+
-+ return 0;
-+}
-+
-+static int kgd_wave_control_execute(struct kgd_dev *kgd,
-+ uint32_t gfx_index_val,
-+ uint32_t sq_cmd)
-+{
-+ WARN_ONCE(1, "Not implemented");
-+
-+#if 0 /* TODO: Update to SOC15 register */
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t data = 0;
-+
-+ mutex_lock(&adev->grbm_idx_mutex);
-+
-+ WREG32(mmGRBM_GFX_INDEX, gfx_index_val);
-+ WREG32(mmSQ_CMD, sq_cmd);
-+
-+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
-+ INSTANCE_BROADCAST_WRITES, 1);
-+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
-+ SH_BROADCAST_WRITES, 1);
-+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
-+ SE_BROADCAST_WRITES, 1);
-+
-+ WREG32(mmGRBM_GFX_INDEX, data);
-+ mutex_unlock(&adev->grbm_idx_mutex);
-+#endif
-+
-+ return 0;
-+}
-+
-+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ unsigned int reg_offset)
-+{
-+ return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
-+}
-+
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype)
-+{
-+ /* No longer needed on GFXv9. These values are now hard-coded,
-+ * except for the MTYPE which comes from the page table.
-+ */
-+
-+ return 0;
-+}
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid)
-+{
-+ /* No longer needed on GFXv9. The scratch base address is
-+ * passed to the shader by the CP. It's the user mode driver's
-+ * responsibility.
-+ */
-+
-+ return 0;
-+}
-+
-+/* FIXME: Does this need to be ASIC-specific code? */
-+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ const union amdgpu_firmware_header *hdr;
-+
-+ switch (type) {
-+ case KGD_ENGINE_PFP:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->gfx.pfp_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_ME:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->gfx.me_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_CE:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->gfx.ce_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_MEC1:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->gfx.mec_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_MEC2:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->gfx.mec2_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_RLC:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->gfx.rlc_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_SDMA1:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->sdma.instance[0].fw->data;
-+ break;
-+
-+ case KGD_ENGINE_SDMA2:
-+ hdr = (const union amdgpu_firmware_header *)
-+ adev->sdma.instance[1].fw->data;
-+ break;
-+
-+ default:
-+ return 0;
-+ }
-+
-+ if (hdr == NULL)
-+ return 0;
-+
-+ /* Only 12 bit in use*/
-+ return hdr->common.ucode_version;
-+}
-+
-+static void set_num_of_requests(struct kgd_dev *kgd,
-+ uint8_t num_of_requests)
-+{
-+ pr_debug("in %s this is a stub\n", __func__);
-+}
-+
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
-+ AMDGPU_PTE_VALID;
-+
-+ /* TODO: Don't use hardcoded VMIDs */
-+ if (vmid < 8 || vmid > 15) {
-+ pr_err("trying to set page table base for wrong VMID %u\n",
-+ vmid);
-+ return;
-+ }
-+
-+ /* TODO: take advantage of per-process address space size. For
-+ * now, all processes share the same address space size, like
-+ * on GFX8 and older.
-+ */
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), adev->vm_manager.max_pfn - 1);
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), 0);
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), adev->vm_manager.max_pfn - 1);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), 0);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
-+}
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-index 5853b59..29dcc29 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-@@ -38,8 +38,6 @@
- #include "oss/oss_3_0_d.h"
- #include "gmc/gmc_8_1_sh_mask.h"
- #include "gmc/gmc_8_1_d.h"
--#include "vi_structs.h"
--#include "vid.h"
-
- /* Special VM and GART address alignment needed for VI pre-Fiji due to
- * a HW bug. */
-@@ -496,7 +494,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
- uint64_t size, void *vm, struct kgd_mem **mem,
- uint64_t *offset, void **kptr,
- u32 domain, u64 flags, struct sg_table *sg, bool aql_queue,
-- bool readonly, bool execute, bool no_sub, bool userptr)
-+ bool readonly, bool execute, bool coherent, bool no_sub,
-+ bool userptr)
- {
- struct amdgpu_device *adev;
- int ret;
-@@ -504,7 +503,7 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
- uint64_t user_addr = 0;
- int byte_align;
- u32 alloc_domain;
-- uint32_t pte_flags;
-+ uint32_t get_pte_flags;
- struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
-
- BUG_ON(kgd == NULL);
-@@ -534,17 +533,21 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
- }
- INIT_LIST_HEAD(&(*mem)->bo_va_list);
- mutex_init(&(*mem)->lock);
--
-+ (*mem)->coherent = coherent;
- (*mem)->no_substitute = no_sub;
- (*mem)->aql_queue = aql_queue;
-
-- pte_flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_VALID;
-+ get_pte_flags = AMDGPU_VM_PAGE_READABLE;
- if (!readonly)
-- pte_flags |= AMDGPU_PTE_WRITEABLE;
-+ get_pte_flags |= AMDGPU_VM_PAGE_WRITEABLE;
- if (execute)
-- pte_flags |= AMDGPU_PTE_EXECUTABLE;
-+ get_pte_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
-+ if (coherent)
-+ get_pte_flags |= AMDGPU_VM_MTYPE_UC;
-+ else
-+ get_pte_flags |= AMDGPU_VM_MTYPE_NC;
-
-- (*mem)->pte_flags = pte_flags;
-+ (*mem)->pte_flags = amdgpu_vm_get_pte_flags(adev, get_pte_flags);
-
- alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain;
-
-@@ -971,7 +974,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
- }
-
- static int map_bo_to_gpuvm(struct amdgpu_device *adev,
-- struct kfd_bo_va_list *entry, uint32_t pte_flags,
-+ struct kfd_bo_va_list *entry, uint64_t pte_flags,
- struct amdgpu_sync *sync)
- {
- int ret;
-@@ -1060,7 +1063,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- uint64_t *offset, void **kptr,
- uint32_t flags)
- {
-- bool aql_queue, public, readonly, execute, no_sub, userptr;
-+ bool aql_queue, public, readonly, execute, coherent, no_sub, userptr;
- u64 alloc_flag;
- uint32_t domain;
- uint64_t *temp_offset;
-@@ -1079,6 +1082,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- public = (flags & ALLOC_MEM_FLAGS_PUBLIC) ? true : false;
- readonly = (flags & ALLOC_MEM_FLAGS_READONLY) ? true : false;
- execute = (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS) ? true : false;
-+ coherent = (flags & ALLOC_MEM_FLAGS_COHERENT) ? true : false;
- no_sub = (flags & ALLOC_MEM_FLAGS_NO_SUBSTITUTE) ? true : false;
- userptr = (flags & ALLOC_MEM_FLAGS_USERPTR) ? true : false;
-
-@@ -1119,16 +1123,16 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
- va, va + size, domain_string(domain),
- BOOL_TO_STR(aql_queue));
-
-- pr_debug("\t alloc_flag 0x%llx public %s readonly %s execute %s no_sub %s\n",
-+ pr_debug("\t alloc_flag 0x%llx public %s readonly %s execute %s coherent %s no_sub %s\n",
- alloc_flag, BOOL_TO_STR(public),
- BOOL_TO_STR(readonly), BOOL_TO_STR(execute),
-- BOOL_TO_STR(no_sub));
-+ BOOL_TO_STR(coherent), BOOL_TO_STR(no_sub));
-
- return __alloc_memory_of_gpu(kgd, va, size, vm, mem,
- temp_offset, kptr, domain,
- alloc_flag, sg,
- aql_queue, readonly, execute,
-- no_sub, userptr);
-+ coherent, no_sub, userptr);
- }
-
- int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-@@ -1347,6 +1351,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- static u64 get_vm_pd_gpu_offset(void *vm)
- {
- struct amdgpu_vm *avm = (struct amdgpu_vm *) vm;
-+ struct amdgpu_device *adev =
-+ amdgpu_ttm_adev(avm->root.bo->tbo.bdev);
- u64 offset;
-
- BUG_ON(avm == NULL);
-@@ -1357,6 +1363,12 @@ static u64 get_vm_pd_gpu_offset(void *vm)
-
- amdgpu_bo_unreserve(avm->root.bo);
-
-+ /* On some ASICs the FB doesn't start at 0. Adjust FB offset
-+ * to an actual MC address.
-+ */
-+ if (adev->gart.gart_funcs->adjust_mc_addr)
-+ offset = adev->gart.gart_funcs->adjust_mc_addr(adev, offset);
-+
- return offset;
- }
-
-@@ -1854,8 +1866,11 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
-
- INIT_LIST_HEAD(&(*mem)->bo_va_list);
- mutex_init(&(*mem)->lock);
-- (*mem)->pte_flags = AMDGPU_PTE_READABLE | AMDGPU_PTE_VALID
-- | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE;
-+ (*mem)->pte_flags = amdgpu_vm_get_pte_flags(adev,
-+ AMDGPU_VM_PAGE_READABLE |
-+ AMDGPU_VM_PAGE_WRITEABLE |
-+ AMDGPU_VM_PAGE_EXECUTABLE |
-+ AMDGPU_VM_MTYPE_NC);
-
- (*mem)->bo = amdgpu_bo_ref(bo);
- (*mem)->va = va;
-diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
-index b33ff31..5ee3429 100644
---- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
-+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
-@@ -4397,6 +4397,19 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
-
- cu_info->number = active_cu_number;
- cu_info->ao_cu_mask = ao_cu_mask;
-+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
-+
-+ switch (adev->asic_type) {
-+ case CHIP_VEGA10: /* TODO: check if any of this changed */
-+ cu_info->max_waves_per_simd = 10;
-+ cu_info->max_scratch_slots_per_cu = 32;
-+ cu_info->wave_front_size = 64;
-+ cu_info->lds_size = 64;
-+ break;
-+ default:
-+ dev_warn(adev->dev, "CU info asic_type [0x%x] not supported\n",
-+ adev->asic_type);
-+ }
-
- return 0;
- }
-diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-index 37e1edb..cff1ee6 100644
---- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-@@ -114,6 +114,21 @@ struct kgd2kfd_shared_resources {
- /* Bit n == 1 means Queue n is available for KFD */
- DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
-
-+ /* Doorbell assignments (SOC15 and later chips only). Only
-+ * specific doorbells are routed to each SDMA engine. Others
-+ * are routed to IH and VCN. They are not usable by the CP.
-+ *
-+ * Any doorbell number D that satisfies the following condition
-+ * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
-+ *
-+ * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
-+ * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means
-+ * mask would be set to 0x1f8 and val set to 0x0f0.
-+ */
-+ unsigned int sdma_doorbell[2][2];
-+ unsigned int reserved_doorbell_mask;
-+ unsigned int reserved_doorbell_val;
-+
- /* Base address of doorbell aperture. */
- phys_addr_t doorbell_physical_address;
-
-@@ -155,6 +170,7 @@ struct tile_config {
- #define ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
- #define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
- #define ALLOC_MEM_FLAGS_EXECUTE_ACCESS (1 << 26)
-+#define ALLOC_MEM_FLAGS_COHERENT (1 << 25)
-
- /**
- * struct kfd2kgd_calls
---
-2.7.4
-