aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1353-compilation-fix-for-amdkfd-porting.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1353-compilation-fix-for-amdkfd-porting.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1353-compilation-fix-for-amdkfd-porting.patch8695
1 files changed, 0 insertions, 8695 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1353-compilation-fix-for-amdkfd-porting.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1353-compilation-fix-for-amdkfd-porting.patch
deleted file mode 100644
index a27db153..00000000
--- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1353-compilation-fix-for-amdkfd-porting.patch
+++ /dev/null
@@ -1,8695 +0,0 @@
-From 817ccd6f0987f83ddbf989602f0fbf320157f0a9 Mon Sep 17 00:00:00 2001
-From: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com>
-Date: Thu, 18 Oct 2018 12:42:04 +0530
-Subject: [PATCH 1353/4131] compilation fix for amdkfd porting
-
-Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com>
-Signed-off-by: Chaudhary Amit Kumar <chaudharyamit.kumar@amd.com>
----
- drivers/gpu/drm/amd/amdgpu/Makefile | 8 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 346 ++-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 185 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 196 ++
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 537 ++++-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 590 ++++-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.h | 62 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 1227 ++++++++++
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2578 +++++++++++++++++++++
- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 -
- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 46 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 2 +
- drivers/gpu/drm/amd/amdgpu/soc15d.h | 1 +
- drivers/gpu/drm/amd/amdgpu/vid.h | 2 +
- drivers/gpu/drm/amd/amdkfd/Makefile | 2 -
- drivers/gpu/drm/amd/amdkfd/backport/backport.h | 7 -
- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 -
- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 4 -
- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 12 -
- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 28 -
- drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 10 -
- drivers/gpu/drm/amd/amdkfd/kfd_ipc.c | 8 -
- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 4 -
- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 4 -
- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 4 -
- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 2 -
- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +-
- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 52 +-
- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 22 -
- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 6 -
- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 -
- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 231 +-
- drivers/gpu/drm/amd/include/v9_structs.h | 48 +-
- drivers/gpu/drm/amd/include/vi_structs.h | 2 +
- drivers/pci/pci.c | 81 +
- include/drm/amd_rdma.h | 70 +
- include/linux/pci.h | 1 +
- include/uapi/linux/kfd_ioctl.h | 442 +++-
- include/uapi/linux/pci_regs.h | 3 +
- kernel/fork.c | 1 +
- 44 files changed, 6315 insertions(+), 537 deletions(-)
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/Makefile
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
- create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
- create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.h
- create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
- create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/soc15d.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/vid.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdkfd/Makefile
- mode change 100644 => 100755 drivers/gpu/drm/amd/amdkfd/kfd_priv.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/include/kgd_kfd_interface.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/include/v9_structs.h
- mode change 100644 => 100755 drivers/gpu/drm/amd/include/vi_structs.h
- mode change 100644 => 100755 drivers/pci/pci.c
- create mode 100644 include/drm/amd_rdma.h
- mode change 100644 => 100755 include/linux/pci.h
- mode change 100644 => 100755 include/uapi/linux/pci_regs.h
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
-old mode 100644
-new mode 100755
-index 57b8d5f..6b373d0
---- a/drivers/gpu/drm/amd/amdgpu/Makefile
-+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
-@@ -32,12 +32,11 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
- amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
- amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
- amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
-- amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sem.o
-+ amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sem.o amdgpu_amdkfd_fence.o
-
- # add asic specific block
- amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
- ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
-- amdgpu_amdkfd_gfx_v7.o
-
- amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
-
-@@ -109,7 +108,10 @@ amdgpu-y += \
- # add amdkfd interfaces
- amdgpu-y += \
- amdgpu_amdkfd.o \
-- amdgpu_amdkfd_gfx_v8.o
-+ amdgpu_amdkfd_gfx_v7.o \
-+ amdgpu_amdkfd_gfx_v8.o \
-+ amdgpu_amdkfd_gfx_v9.o \
-+ amdgpu_amdkfd_gpuvm.o
-
- # add cgs
- amdgpu-y += amdgpu_cgs.o
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
-old mode 100644
-new mode 100755
-index fe23de8..bcf95e7
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
-@@ -184,6 +184,7 @@ struct amdgpu_cs_parser;
- struct amdgpu_job;
- struct amdgpu_irq_src;
- struct amdgpu_fpriv;
-+struct kfd_vm_fault_info;
- struct amdgpu_bo_va_mapping;
-
- enum amdgpu_cp_irq {
-@@ -403,6 +404,7 @@ struct amdgpu_gem_object {
- struct amdgpu_bo *bo;
- };
-
-+struct kgd_mem;
- #define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_gem_object, base)->bo
-
- void amdgpu_gem_object_free(struct drm_gem_object *obj);
-@@ -543,6 +545,9 @@ struct amdgpu_mc {
- u64 private_aperture_end;
- /* protects concurrent invalidation */
- spinlock_t invalidate_lock;
-+
-+ struct kfd_vm_fault_info *vm_fault_info;
-+ atomic_t vm_fault_info_updated;
- };
-
- /*
-@@ -961,6 +966,7 @@ struct amdgpu_gfx_config {
- };
-
- struct amdgpu_cu_info {
-+ uint32_t simd_per_cu;
- uint32_t max_waves_per_simd;
- uint32_t wave_front_size;
- uint32_t max_scratch_slots_per_cu;
-@@ -1649,6 +1655,7 @@ struct amdgpu_device {
- /* record hw reset is performed */
- bool has_hw_reset;
- u8 reset_magic[AMDGPU_RESET_MAGIC_NUM];
-+ spinlock_t tlb_invalidation_lock;
-
- /* record last mm index being written through WREG32*/
- unsigned long last_mm_index;
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-old mode 100644
-new mode 100755
-index 7ec1915..ec8141f
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-@@ -20,23 +20,29 @@
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-+#undef pr_fmt
-+#define pr_fmt(fmt) "kfd2kgd: " fmt
-+
- #include "amdgpu_amdkfd.h"
--#include "amd_shared.h"
-+#include <linux/dma-buf.h>
- #include <drm/drmP.h>
- #include "amdgpu.h"
- #include "amdgpu_gfx.h"
- #include <linux/module.h>
-
--const struct kfd2kgd_calls *kfd2kgd;
-+#define AMDKFD_SKIP_UNCOMPILED_CODE 1
-+
- const struct kgd2kfd_calls *kgd2kfd;
--bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-+bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
-+
-+unsigned int global_compute_vmid_bitmap = 0xFF00;
-
- int amdgpu_amdkfd_init(void)
- {
- int ret;
-
- #if defined(CONFIG_HSA_AMD_MODULE)
-- int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-+ int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
-
- kgd2kfd_init_p = symbol_request(kgd2kfd_init);
-
-@@ -57,56 +63,68 @@ int amdgpu_amdkfd_init(void)
- #else
- ret = -ENOENT;
- #endif
--
-+ amdgpu_amdkfd_gpuvm_init_mem_limits();
- return ret;
- }
-
--bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
-+void amdgpu_amdkfd_fini(void)
- {
-+ if (kgd2kfd) {
-+ kgd2kfd->exit();
-+ symbol_put(kgd2kfd_init);
-+ }
-+}
-+
-+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
-+{
-+ const struct kfd2kgd_calls *kfd2kgd;
-+
-+ if (!kgd2kfd)
-+ return;
-+
- switch (adev->asic_type) {
- #ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_KAVERI:
-+ case CHIP_HAWAII:
- kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
- break;
- #endif
- case CHIP_CARRIZO:
-+ case CHIP_TONGA:
-+ case CHIP_FIJI:
-+ case CHIP_POLARIS10:
-+ case CHIP_POLARIS11:
- kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
- break;
-+ case CHIP_VEGA10:
-+ case CHIP_RAVEN:
-+ kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
-+ break;
- default:
-- return false;
-- }
--
-- return true;
--}
--
--void amdgpu_amdkfd_fini(void)
--{
-- if (kgd2kfd) {
-- kgd2kfd->exit();
-- symbol_put(kgd2kfd_init);
-+ dev_info(adev->dev, "kfd not supported on this ASIC\n");
-+ return;
- }
--}
-
--void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
--{
-- if (kgd2kfd)
-- adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
-- adev->pdev, kfd2kgd);
-+ adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
-+ adev->pdev, kfd2kgd);
- }
-
- void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
- {
- int i;
- int last_valid_bit;
-+
- if (adev->kfd) {
- struct kgd2kfd_shared_resources gpu_resources = {
-- .compute_vmid_bitmap = 0xFF00,
-+ .compute_vmid_bitmap = global_compute_vmid_bitmap,
- .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
-- .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
-+ .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
-+ .gpuvm_size = (uint64_t)amdgpu_vm_size << 30
- };
-
- /* this is going to have a few of the MSBs set that we need to
-- * clear */
-+ * clear
-+ */
- bitmap_complement(gpu_resources.queue_bitmap,
- adev->gfx.mec.queue_bitmap,
- KGD_MAX_QUEUES);
-@@ -120,7 +138,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
- gpu_resources.queue_bitmap);
-
- /* According to linux/bitmap.h we shouldn't use bitmap_clear if
-- * nbits is not compile time constant */
-+ * nbits is not compile time constant
-+ */
- last_valid_bit = 1 /* only first MEC can have compute queues */
- * adev->gfx.mec.num_pipe_per_mec
- * adev->gfx.mec.num_queue_per_pipe;
-@@ -131,6 +150,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
- &gpu_resources.doorbell_physical_address,
- &gpu_resources.doorbell_aperture_size,
- &gpu_resources.doorbell_start_offset);
-+ if (adev->asic_type >= CHIP_VEGA10) {
-+ /* On SOC15 the BIF is involved in routing
-+ * doorbells using the low 12 bits of the
-+ * address. Communicate the assignments to
-+ * KFD. KFD uses two doorbell pages per
-+ * process in case of 64-bit doorbells so we
-+ * can use each doorbell assignment twice.
-+ */
-+ gpu_resources.sdma_doorbell[0][0] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE0;
-+ gpu_resources.sdma_doorbell[0][1] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
-+ gpu_resources.sdma_doorbell[1][0] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE1;
-+ gpu_resources.sdma_doorbell[1][1] =
-+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
-+ /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
-+ * SDMA, IH and VCN. So don't use them for the CP.
-+ */
-+ gpu_resources.reserved_doorbell_mask = 0x1f0;
-+ gpu_resources.reserved_doorbell_val = 0x0f0;
-+ }
-
- kgd2kfd->device_init(adev->kfd, &gpu_resources);
- }
-@@ -167,24 +208,81 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
- return r;
- }
-
-+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
-+ uint32_t vmid, uint64_t gpu_addr,
-+ uint32_t *ib_cmd, uint32_t ib_len)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+ struct amdgpu_job *job;
-+ struct amdgpu_ib *ib;
-+ struct amdgpu_ring *ring;
-+ struct dma_fence *f = NULL;
-+ int ret;
-+
-+ switch (engine) {
-+ case KGD_ENGINE_MEC1:
-+ ring = &adev->gfx.compute_ring[0];
-+ break;
-+ case KGD_ENGINE_SDMA1:
-+ ring = &adev->sdma.instance[0].ring;
-+ break;
-+ case KGD_ENGINE_SDMA2:
-+ ring = &adev->sdma.instance[1].ring;
-+ break;
-+ default:
-+ pr_err("Invalid engine in IB submission: %d\n", engine);
-+ ret = -EINVAL;
-+ goto err;
-+ }
-+
-+ ret = amdgpu_job_alloc(adev, 1, &job, NULL);
-+ if (ret)
-+ goto err;
-+
-+ ib = &job->ibs[0];
-+ memset(ib, 0, sizeof(struct amdgpu_ib));
-+
-+ ib->gpu_addr = gpu_addr;
-+ ib->ptr = ib_cmd;
-+ ib->length_dw = ib_len;
-+ /* This works for NO_HWS. TODO: need to handle without knowing VMID */
-+ job->vm_id = vmid;
-+
-+ ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
-+ if (ret) {
-+ DRM_ERROR("amdgpu: failed to schedule IB.\n");
-+ goto err_ib_sched;
-+ }
-+
-+ ret = dma_fence_wait(f, false);
-+
-+err_ib_sched:
-+ dma_fence_put(f);
-+ amdgpu_job_free(job);
-+err:
-+ return ret;
-+}
-+
-+u32 pool_to_domain(enum kgd_memory_pool p)
-+{
-+ switch (p) {
-+ case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM;
-+ default: return AMDGPU_GEM_DOMAIN_GTT;
-+ }
-+}
-+
- int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
- void **mem_obj, uint64_t *gpu_addr,
- void **cpu_ptr)
- {
- struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-- struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
-+ struct amdgpu_bo *bo = NULL;
- int r;
--
-- BUG_ON(kgd == NULL);
-- BUG_ON(gpu_addr == NULL);
-- BUG_ON(cpu_ptr == NULL);
--
-- *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-- if ((*mem) == NULL)
-- return -ENOMEM;
-+ uint64_t gpu_addr_tmp = 0;
-+ void *cpu_ptr_tmp = NULL;
-
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
-- AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
-+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, &bo);
- if (r) {
- dev_err(adev->dev,
- "failed to allocate BO for amdkfd (%d)\n", r);
-@@ -192,64 +290,87 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
- }
-
- /* map the buffer */
-- r = amdgpu_bo_reserve((*mem)->bo, true);
-+ r = amdgpu_bo_reserve(bo, true);
- if (r) {
- dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
- goto allocate_mem_reserve_bo_failed;
- }
-
-- r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
-- &(*mem)->gpu_addr);
-+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
-+ &gpu_addr_tmp);
- if (r) {
- dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
- goto allocate_mem_pin_bo_failed;
- }
-- *gpu_addr = (*mem)->gpu_addr;
-
-- r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
-+ r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
- if (r) {
- dev_err(adev->dev,
- "(%d) failed to map bo to kernel for amdkfd\n", r);
- goto allocate_mem_kmap_bo_failed;
- }
-- *cpu_ptr = (*mem)->cpu_ptr;
-
-- amdgpu_bo_unreserve((*mem)->bo);
-+ *mem_obj = bo;
-+ *gpu_addr = gpu_addr_tmp;
-+ *cpu_ptr = cpu_ptr_tmp;
-+
-+ amdgpu_bo_unreserve(bo);
-
- return 0;
-
- allocate_mem_kmap_bo_failed:
-- amdgpu_bo_unpin((*mem)->bo);
-+ amdgpu_bo_unpin(bo);
- allocate_mem_pin_bo_failed:
-- amdgpu_bo_unreserve((*mem)->bo);
-+ amdgpu_bo_unreserve(bo);
- allocate_mem_reserve_bo_failed:
-- amdgpu_bo_unref(&(*mem)->bo);
-+ amdgpu_bo_unref(&bo);
-
- return r;
- }
-
- void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
- {
-- struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
-+ struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
-
-- BUG_ON(mem == NULL);
--
-- amdgpu_bo_reserve(mem->bo, true);
-- amdgpu_bo_kunmap(mem->bo);
-- amdgpu_bo_unpin(mem->bo);
-- amdgpu_bo_unreserve(mem->bo);
-- amdgpu_bo_unref(&(mem->bo));
-- kfree(mem);
-+ amdgpu_bo_reserve(bo, true);
-+ amdgpu_bo_kunmap(bo);
-+ amdgpu_bo_unpin(bo);
-+ amdgpu_bo_unreserve(bo);
-+ amdgpu_bo_unref(&(bo));
- }
-
--uint64_t get_vmem_size(struct kgd_dev *kgd)
-+void get_local_mem_info(struct kgd_dev *kgd,
-+ struct kfd_local_mem_info *mem_info)
- {
-- struct amdgpu_device *adev =
-- (struct amdgpu_device *)kgd;
-+ uint64_t address_mask;
-+ resource_size_t aper_limit;
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
-- BUG_ON(kgd == NULL);
-+ address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
-+ ~((1ULL << 32) - 1);
-+ aper_limit = adev->mc.aper_base + adev->mc.aper_size;
-+
-+ memset(mem_info, 0, sizeof(*mem_info));
-+ if (!(adev->mc.aper_base & address_mask ||
-+ aper_limit & address_mask)) {
-+ mem_info->local_mem_size_public = adev->mc.visible_vram_size;
-+ mem_info->local_mem_size_private = adev->mc.real_vram_size -
-+ adev->mc.visible_vram_size;
-+ } else {
-+ mem_info->local_mem_size_public = 0;
-+ mem_info->local_mem_size_private = adev->mc.real_vram_size;
-+ }
-+ mem_info->vram_width = adev->mc.vram_width;
-
-- return adev->mc.real_vram_size;
-+ pr_debug("Address base: 0x%llx limit 0x%llx public 0x%llx private 0x%llx\n",
-+ adev->mc.aper_base, aper_limit,
-+ mem_info->local_mem_size_public,
-+ mem_info->local_mem_size_private);
-+
-+ if (amdgpu_sriov_vf(adev))
-+ mem_info->mem_clk_max = adev->clock.default_mclk / 100;
-+ else
-+ mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
- }
-
- uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
-@@ -271,3 +392,106 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
-
- return amdgpu_dpm_get_sclk(adev, false) / 100;
- }
-+
-+void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+ struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
-+
-+ memset(cu_info, 0, sizeof(*cu_info));
-+ if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
-+ return;
-+
-+ cu_info->cu_active_number = acu_info.number;
-+ cu_info->cu_ao_mask = acu_info.ao_cu_mask;
-+ memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
-+ sizeof(acu_info.bitmap));
-+ cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
-+ cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
-+ cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
-+ cu_info->simd_per_cu = acu_info.simd_per_cu;
-+ cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
-+ cu_info->wave_front_size = acu_info.wave_front_size;
-+ cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
-+ cu_info->lds_size = acu_info.lds_size;
-+}
-+
-+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
-+ struct kgd_dev **dma_buf_kgd,
-+ uint64_t *bo_size, void *metadata_buffer,
-+ size_t buffer_size, uint32_t *metadata_size,
-+ uint32_t *flags)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+ struct dma_buf *dma_buf;
-+ struct drm_gem_object *obj;
-+ struct amdgpu_bo *bo;
-+ uint64_t metadata_flags;
-+ int r = -EINVAL;
-+
-+ dma_buf = dma_buf_get(dma_buf_fd);
-+ if (IS_ERR(dma_buf))
-+ return PTR_ERR(dma_buf);
-+
-+ if (dma_buf->ops != &drm_gem_prime_dmabuf_ops)
-+ /* Can't handle non-graphics buffers */
-+ goto out_put;
-+
-+ obj = dma_buf->priv;
-+ if (obj->dev->driver != adev->ddev->driver)
-+ /* Can't handle buffers from different drivers */
-+ goto out_put;
-+
-+ adev = obj->dev->dev_private;
-+ bo = gem_to_amdgpu_bo(obj);
-+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-+ AMDGPU_GEM_DOMAIN_GTT |
-+ AMDGPU_GEM_DOMAIN_DGMA)))
-+ /* Only VRAM, GTT and DGMA BOs are supported */
-+ goto out_put;
-+
-+ r = 0;
-+ if (dma_buf_kgd)
-+ *dma_buf_kgd = (struct kgd_dev *)adev;
-+ if (bo_size)
-+ *bo_size = amdgpu_bo_size(bo);
-+ if (metadata_size)
-+ *metadata_size = bo->metadata_size;
-+ if (metadata_buffer)
-+ r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
-+ metadata_size, &metadata_flags);
-+ if (flags) {
-+ /* If the preferred domain is DGMA, set flags to VRAM because
-+ * KFD doesn't support allocating DGMA memory
-+ */
-+ *flags = (bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-+ AMDGPU_GEM_DOMAIN_DGMA)) ?
-+ ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
-+
-+ if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
-+ *flags |= ALLOC_MEM_FLAGS_PUBLIC;
-+ }
-+
-+out_put:
-+ dma_buf_put(dma_buf);
-+ return r;
-+}
-+
-+uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+ uint64_t usage =
-+ amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
-+ return usage;
-+}
-+
-+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev,
-+ u32 vmid)
-+{
-+ if (adev->kfd) {
-+ if ((1 << vmid) & global_compute_vmid_bitmap)
-+ return true;
-+ }
-+
-+ return false;
-+}
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-old mode 100644
-new mode 100755
-index 6d3a10b..b259ba7
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-@@ -27,20 +27,109 @@
-
- #include <linux/types.h>
- #include <linux/mm.h>
-+#include <linux/workqueue.h>
-+#include <linux/mmu_context.h>
- #include <kgd_kfd_interface.h>
-+#include "amdgpu.h"
-+
-+extern const struct kgd2kfd_calls *kgd2kfd;
-
- struct amdgpu_device;
-
-+struct kfd_bo_va_list {
-+ struct list_head bo_list;
-+ struct amdgpu_bo_va *bo_va;
-+ void *kgd_dev;
-+ bool is_mapped;
-+ bool map_fail;
-+ uint64_t va;
-+ uint64_t pte_flags;
-+};
-+
- struct kgd_mem {
-+ struct mutex lock;
- struct amdgpu_bo *bo;
-- uint64_t gpu_addr;
-- void *cpu_ptr;
-+ struct list_head bo_va_list;
-+ /* protected by amdkfd_process_info.lock */
-+ struct ttm_validate_buffer validate_list;
-+ struct ttm_validate_buffer resv_list;
-+ uint32_t domain;
-+ unsigned int mapped_to_gpu_memory;
-+ void *kptr;
-+ uint64_t va;
-+
-+ uint32_t mapping_flags;
-+
-+ atomic_t invalid;
-+ struct amdkfd_process_info *process_info;
-+ struct page **user_pages;
-+
-+ struct amdgpu_sync sync;
-+
-+ /* flags bitfield */
-+ bool coherent : 1;
-+ bool no_substitute : 1;
-+ bool aql_queue : 1;
-+};
-+
-+/* KFD Memory Eviction */
-+struct amdgpu_amdkfd_fence {
-+ struct dma_fence base;
-+ void *mm;
-+ spinlock_t lock;
-+ char timeline_name[TASK_COMM_LEN];
-+};
-+
-+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-+ void *mm);
-+bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm);
-+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
-+
-+struct amdkfd_process_info {
-+ /* List head of all VMs that belong to a KFD process */
-+ struct list_head vm_list_head;
-+ /* List head for all KFD BOs that belong to a KFD process. */
-+ struct list_head kfd_bo_list;
-+ /* List of userptr BOs that are valid or invalid */
-+ struct list_head userptr_valid_list;
-+ struct list_head userptr_inval_list;
-+ /* Lock to protect kfd_bo_list */
-+ struct mutex lock;
-+
-+ /* Number of VMs */
-+ unsigned int n_vms;
-+ /* Eviction Fence */
-+ struct amdgpu_amdkfd_fence *eviction_fence;
-+
-+ /* MMU-notifier related fields */
-+ atomic_t evicted_bos;
-+ struct delayed_work work;
-+ struct pid *pid;
-+};
-+
-+/* struct amdkfd_vm -
-+ * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
-+ * belonging to a KFD process. All the VMs belonging to the same process point
-+ * to the same amdkfd_process_info.
-+ */
-+struct amdkfd_vm {
-+ /* Keep base as the first parameter for pointer compatibility between
-+ * amdkfd_vm and amdgpu_vm.
-+ */
-+ struct amdgpu_vm base;
-+
-+ /* List node in amdkfd_process_info.vm_list_head*/
-+ struct list_head vm_list_node;
-+
-+ struct amdgpu_device *adev;
-+ /* Points to the KFD process VM info*/
-+ struct amdkfd_process_info *process_info;
- };
-
-+
- int amdgpu_amdkfd_init(void);
- void amdgpu_amdkfd_fini(void);
-
--bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev);
-
- void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
- int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
-@@ -50,17 +139,105 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
- void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
- void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
-
-+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
-+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
-+ uint32_t vmid, uint64_t gpu_addr,
-+ uint32_t *ib_cmd, uint32_t ib_len);
-+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
-+ struct dma_fence **ef);
- struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
- struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
-+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
-+int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem,
-+ uint64_t src_offset, struct kgd_mem *dst_mem,
-+ uint64_t dest_offset, uint64_t size, struct dma_fence **f,
-+ uint64_t *actual_size);
-+
-+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev,
-+ u32 vmid);
-
- /* Shared API */
-+int map_bo(struct amdgpu_device *rdev, uint64_t va, void *vm,
-+ struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va);
- int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
- void **mem_obj, uint64_t *gpu_addr,
- void **cpu_ptr);
- void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
--uint64_t get_vmem_size(struct kgd_dev *kgd);
-+void get_local_mem_info(struct kgd_dev *kgd,
-+ struct kfd_local_mem_info *mem_info);
- uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
-
- uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
-+void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
-+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
-+ struct kgd_dev **dmabuf_kgd,
-+ uint64_t *bo_size, void *metadata_buffer,
-+ size_t buffer_size, uint32_t *metadata_size,
-+ uint32_t *flags);
-+uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
-+
-+#define read_user_wptr(mmptr, wptr, dst) \
-+ ({ \
-+ bool valid = false; \
-+ if ((mmptr) && (wptr)) { \
-+ if ((mmptr) == current->mm) { \
-+ valid = !get_user((dst), (wptr)); \
-+ } else if (current->mm == NULL) { \
-+ use_mm(mmptr); \
-+ valid = !get_user((dst), (wptr)); \
-+ unuse_mm(mmptr); \
-+ } \
-+ } \
-+ valid; \
-+ })
-+
-+/* GPUVM API */
-+int amdgpu_amdkfd_gpuvm_sync_memory(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
-+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
-+ struct kgd_dev *kgd, uint64_t va, uint64_t size,
-+ void *vm, struct kgd_mem **mem,
-+ uint64_t *offset, uint32_t flags);
-+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
-+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
-+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
-
-+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-+ void **process_info,
-+ struct dma_fence **ef);
-+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
-+
-+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
-+
-+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
-+ struct kfd_vm_fault_info *info);
-+
-+int amdgpu_amdkfd_gpuvm_mmap_bo(
-+ struct kgd_dev *kgd, struct vm_area_struct *vma);
-+
-+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
-+ struct kgd_mem *mem, void **kptr);
-+
-+int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd,
-+ struct kgd_mem *mem, uint64_t offset,
-+ uint64_t size, struct sg_table **ret_sg);
-+void amdgpu_amdkfd_gpuvm_unpin_put_sg_table(
-+ struct kgd_mem *mem, struct sg_table *sg);
-+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
-+ struct dma_buf *dmabuf,
-+ uint64_t va, void *vm,
-+ struct kgd_mem **mem, uint64_t *size,
-+ uint64_t *mmap_offset);
-+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm,
-+ struct kgd_mem *mem,
-+ struct dma_buf **dmabuf);
-+int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm);
-+int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm);
-+
-+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
-+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
- #endif /* AMDGPU_AMDKFD_H_INCLUDED */
-+
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
-new file mode 100644
-index 0000000..3961937
---- /dev/null
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
-@@ -0,0 +1,196 @@
-+/*
-+ * Copyright 2016 Advanced Micro Devices, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#include <linux/spinlock.h>
-+#include <linux/atomic.h>
-+#include <linux/stacktrace.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include "amdgpu_amdkfd.h"
-+
-+const struct dma_fence_ops amd_kfd_fence_ops;
-+static atomic_t fence_seq = ATOMIC_INIT(0);
-+
-+static int amd_kfd_fence_signal(struct dma_fence *f);
-+
-+/* Eviction Fence
-+ * Fence helper functions to deal with KFD memory eviction.
-+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
-+ * evicted unless all the user queues for that process are evicted.
-+ *
-+ * All the BOs in a process share an eviction fence. When process X wants
-+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
-+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
-+ * by calling ttm_bo_driver->eviction_valuable().
-+ *
-+ * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
-+ * to process X. Otherwise, it will return true to indicate BO can be
-+ * evicted by TTM.
-+ *
-+ * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
-+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
-+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
-+ *
-+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
-+ * nofity when the BO is free to move. fence_add_callback --> enable_signaling
-+ * --> amdgpu_amdkfd_fence.enable_signaling
-+ *
-+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
-+ * user queues and signal fence. The work item will also start another delayed
-+ * work item to restore BOs
-+ */
-+
-+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-+ void *mm)
-+{
-+ struct amdgpu_amdkfd_fence *fence = NULL;
-+
-+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
-+ if (fence == NULL)
-+ return NULL;
-+
-+ /* mm_struct mm is used as void pointer to identify the parent
-+ * KFD process. Don't dereference it. Fence and any threads using
-+ * mm is guranteed to be released before process termination.
-+ */
-+ fence->mm = mm;
-+ get_task_comm(fence->timeline_name, current);
-+ spin_lock_init(&fence->lock);
-+
-+ dma_fence_init(&fence->base, &amd_kfd_fence_ops, &fence->lock,
-+ context, atomic_inc_return(&fence_seq));
-+
-+ return fence;
-+}
-+
-+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
-+{
-+ struct amdgpu_amdkfd_fence *fence;
-+
-+ if (!f)
-+ return NULL;
-+
-+ fence = container_of(f, struct amdgpu_amdkfd_fence, base);
-+ if (fence && f->ops == &amd_kfd_fence_ops)
-+ return fence;
-+
-+ return NULL;
-+}
-+
-+static const char *amd_kfd_fence_get_driver_name(struct dma_fence *f)
-+{
-+ return "amdgpu_amdkfd_fence";
-+}
-+
-+static const char *amd_kfd_fence_get_timeline_name(struct dma_fence *f)
-+{
-+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
-+
-+ return fence->timeline_name;
-+}
-+
-+/**
-+ * amd_kfd_fence_enable_signaling - This gets called when TTM wants to evict
-+ * a KFD BO and schedules a job to move the BO.
-+ * If fence is already signaled return true.
-+ * If fence is not signaled schedule a evict KFD process work item.
-+ */
-+static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
-+{
-+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
-+
-+ if (!fence)
-+ return false;
-+
-+ if (dma_fence_is_signaled(f))
-+ return true;
-+
-+ if (!kgd2kfd->schedule_evict_and_restore_process(
-+ (struct mm_struct *)fence->mm, f))
-+ return true;
-+
-+ return false;
-+}
-+
-+static int amd_kfd_fence_signal(struct dma_fence *f)
-+{
-+ unsigned long flags;
-+ int ret;
-+
-+ spin_lock_irqsave(f->lock, flags);
-+ /* Set enabled bit so cb will called */
-+ set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &f->flags);
-+ ret = dma_fence_signal_locked(f);
-+ spin_unlock_irqrestore(f->lock, flags);
-+
-+ return ret;
-+}
-+
-+/**
-+ * amd_kfd_fence_release - callback that fence can be freed
-+ *
-+ * @fence: fence
-+ *
-+ * This function is called when the reference count becomes zero.
-+ * It just RCU schedules freeing up the fence.
-+*/
-+static void amd_kfd_fence_release(struct dma_fence *f)
-+{
-+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
-+ /* Unconditionally signal the fence. The process is getting
-+ * terminated.
-+ */
-+ if (WARN_ON(!fence))
-+ return; /* Not an amdgpu_amdkfd_fence */
-+
-+ amd_kfd_fence_signal(f);
-+ kfree_rcu(f, rcu);
-+}
-+
-+/**
-+ * amd_kfd_fence_check_mm - Check if @mm is same as that of the fence @f
-+ * if same return TRUE else return FALSE.
-+ *
-+ * @f: [IN] fence
-+ * @mm: [IN] mm that needs to be verified
-+*/
-+bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm)
-+{
-+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
-+
-+ if (!fence)
-+ return false;
-+ else if (fence->mm == mm)
-+ return true;
-+
-+ return false;
-+}
-+
-+const struct dma_fence_ops amd_kfd_fence_ops = {
-+ .get_driver_name = amd_kfd_fence_get_driver_name,
-+ .get_timeline_name = amd_kfd_fence_get_timeline_name,
-+ .enable_signaling = amd_kfd_fence_enable_signaling,
-+ .signaled = NULL,
-+ .wait = dma_fence_default_wait,
-+ .release = amd_kfd_fence_release,
-+};
-+
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-old mode 100644
-new mode 100755
-index 5748504..6964ece
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
-@@ -20,6 +20,9 @@
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-+#undef pr_fmt
-+#define pr_fmt(fmt) "kfd2kgd: " fmt
-+
- #include <linux/fdtable.h>
- #include <linux/uaccess.h>
- #include <linux/firmware.h>
-@@ -39,6 +42,14 @@
- #include "gmc/gmc_7_1_sh_mask.h"
- #include "cik_structs.h"
-
-+#define AMDKFD_SKIP_UNCOMPILED_CODE 1
-+
-+enum hqd_dequeue_request_type {
-+ NO_ACTION = 0,
-+ DRAIN_PIPE,
-+ RESET_WAVES
-+};
-+
- enum {
- MAX_TRAPID = 8, /* 3 bits in the bitfield. */
- MAX_WATCH_ADDRESSES = 4
-@@ -55,8 +66,8 @@ enum {
- enum {
- ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
- ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
-- ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
-- /* extend the mask to 26 bits to match the low address field */
-+ ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENTION = 0x03000000,
-+ /* extend the mask to 26 bits in order to match the low address field */
- ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
- ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
- };
-@@ -81,30 +92,42 @@ union TCP_WATCH_CNTL_BITS {
- float f32All;
- };
-
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem);
-+
-+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
-+
- /*
- * Register access functions
- */
-
- static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-- uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
-- uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
--
-+ uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
-+ uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
- static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-- unsigned int vmid);
--
-+ unsigned int vmid);
- static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-- uint32_t hpd_size, uint64_t hpd_gpu_addr);
-+ uint32_t hpd_size, uint64_t hpd_gpu_addr);
- static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
- static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-- uint32_t queue_id, uint32_t __user *wptr);
--static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm);
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm);
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
- static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-- uint32_t pipe_id, uint32_t queue_id);
--
--static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
-+ uint32_t pipe_id, uint32_t queue_id);
-+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-+ enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
--static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
- static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
- static int kgd_address_watch_disable(struct kgd_dev *kgd);
-@@ -124,21 +147,60 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
- static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
- static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
-+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-+static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req);
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid);
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype);
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base);
-+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
-+
-+/* Because of REG_GET_FIELD() being used, we put this function in the
-+ * asic specific file.
-+ */
-+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
-+ struct tile_config *config)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
--static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
-+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
-+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFBANK);
-+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFRANKS);
-+
-+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
-+ config->num_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
-+ config->macro_tile_config_ptr =
-+ adev->gfx.config.macrotile_mode_array;
-+ config->num_macro_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
-+
-+
-+ return 0;
-+}
-
- static const struct kfd2kgd_calls kfd2kgd = {
- .init_gtt_mem_allocation = alloc_gtt_mem,
- .free_gtt_mem = free_gtt_mem,
-- .get_vmem_size = get_vmem_size,
-+ .get_local_mem_info = get_local_mem_info,
- .get_gpu_clock_counter = get_gpu_clock_counter,
- .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
-+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
-+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
-+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
-+ .open_graphic_handle = open_graphic_handle,
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
-+ .hqd_dump = kgd_hqd_dump,
-+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
-@@ -147,17 +209,50 @@ static const struct kfd2kgd_calls kfd2kgd = {
- .address_watch_execute = kgd_address_watch_execute,
- .wave_control_execute = kgd_wave_control_execute,
- .address_watch_get_offset = kgd_address_watch_get_offset,
-- .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
-- .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
-+ .get_atc_vmid_pasid_mapping_pasid =
-+ get_atc_vmid_pasid_mapping_pasid,
-+ .get_atc_vmid_pasid_mapping_valid =
-+ get_atc_vmid_pasid_mapping_valid,
-+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
-- .get_fw_version = get_fw_version
-+ .invalidate_tlbs = invalidate_tlbs,
-+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
-+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
-+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
-+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
-+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
-+ .get_fw_version = get_fw_version,
-+ .set_num_of_requests = set_num_of_requests,
-+ .get_cu_info = get_cu_info,
-+ .alloc_memory_of_scratch = alloc_memory_of_scratch,
-+ .write_config_static_mem = write_config_static_mem,
-+ .mmap_bo = amdgpu_amdkfd_gpuvm_mmap_bo,
-+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
-+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
-+ .pin_get_sg_table_bo = amdgpu_amdkfd_gpuvm_pin_get_sg_table,
-+ .unpin_put_sg_table_bo = amdgpu_amdkfd_gpuvm_unpin_put_sg_table,
-+ .get_dmabuf_info = amdgpu_amdkfd_get_dmabuf_info,
-+ .import_dmabuf = amdgpu_amdkfd_gpuvm_import_dmabuf,
-+ .export_dmabuf = amdgpu_amdkfd_gpuvm_export_dmabuf,
-+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
-+ .submit_ib = amdgpu_amdkfd_submit_ib,
-+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
-+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
-+ .copy_mem_to_mem = amdgpu_amdkfd_copy_mem_to_mem,
-+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage
- };
-
--struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
-+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
- {
- return (struct kfd2kgd_calls *)&kfd2kgd;
- }
-
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem)
-+{
-+ return 0;
-+}
-+
- static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
- {
- return (struct amdgpu_device *)kgd;
-@@ -186,7 +281,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-- uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- lock_srbm(kgd, mec, pipe, queue_id, 0);
-@@ -222,12 +317,12 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-
- /*
- * We have to assume that there is no outstanding mapping.
-- * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
-- * a mapping is in progress or because a mapping finished and the
-- * SW cleared it. So the protocol is to always wait & clear.
-+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because a
-+ * mapping is in progress or because a mapping finished and the SW
-+ * cleared it. So the protocol is to always wait & clear.
- */
-- uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
-- ATC_VMID0_PASID_MAPPING__VALID_MASK;
-+ uint32_t pasid_mapping = (pasid == 0) ? 0 :
-+ (uint32_t)pasid | ATC_VMID0_PASID_MAPPING__VALID_MASK;
-
- WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
-
-@@ -273,8 +368,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
-
- retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
- m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
--
-- pr_debug("kfd: sdma base address: 0x%x\n", retval);
-+ pr_debug("sdma base address: 0x%x\n", retval);
-
- return retval;
- }
-@@ -290,26 +384,91 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
- }
-
- static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-- uint32_t queue_id, uint32_t __user *wptr)
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm)
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-- uint32_t wptr_shadow, is_wptr_shadow_valid;
- struct cik_mqd *m;
-+ uint32_t *mqd_hqd;
-+ uint32_t reg, wptr_val, data;
-+ bool valid_wptr = false;
-
- m = get_mqd(mqd);
-
-- is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-- if (is_wptr_shadow_valid)
-- m->cp_hqd_pq_wptr = wptr_shadow;
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
-+ mqd_hqd = &m->cp_mqd_base_addr_lo;
-+
-+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
-+ WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
-+
-+ /* Copy userspace write pointer value to register.
-+ * Activate doorbell logic to monitor subsequent changes.
-+ */
-+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
-+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-+ WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
-+
-+ /* read_user_ptr may take the mm->mmap_sem.
-+ * release srbm_mutex to avoid circular dependency between
-+ * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
-+ */
-+ release_queue(kgd);
-+ valid_wptr = read_user_wptr(mm, wptr, wptr_val);
-
- acquire_queue(kgd, pipe_id, queue_id);
-- gfx_v7_0_mqd_commit(adev, m);
-+ if (valid_wptr)
-+ WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
-+
-+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-+ WREG32(mmCP_HQD_ACTIVE, data);
-+
-+
- release_queue(kgd);
-
- return 0;
- }
-
--static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t i = 0, reg;
-+#define HQD_N_REGS (35+4)
-+#define DUMP_REG(addr) do { \
-+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
-+ break; \
-+ (*dump)[i][0] = (addr) << 2; \
-+ (*dump)[i++][1] = RREG32(addr); \
-+ } while (0)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
-+
-+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
-+ DUMP_REG(reg);
-+
-+ release_queue(kgd);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm)
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct cik_sdma_rlc_registers *m;
-@@ -320,17 +479,17 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
- m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
-
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-- m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-+ m->sdma_rlc_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
-
-- end_jiffies = msecs_to_jiffies(2000) + jiffies;
- while (true) {
-- data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-- if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
-- break;
-- if (time_after(jiffies, end_jiffies))
-- return -ETIME;
-- usleep_range(500, 1000);
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
-+ break;
-+ if (timeout == 0)
-+ return -ETIME;
-+ msleep(10);
-+ timeout -= 10;
- }
- if (m->sdma_engine_id) {
- data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
-@@ -344,25 +503,59 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
- WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
- }
-
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL,
-- m->sdma_rlc_doorbell);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
-- m->sdma_rlc_virtual_addr);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
-+ data = REG_SET_FIELD(m->sdma_rlc_doorbell, SDMA0_RLC0_DOORBELL,
-+ ENABLE, 1);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdma_rlc_rb_rptr);
-+ if (read_user_wptr(mm, wptr, data))
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
-+ else
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-+ m->sdma_rlc_rb_rptr);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
-+ m->sdma_rlc_virtual_addr);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdma_rlc_rb_base);
-+
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
- m->sdma_rlc_rb_base_hi);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
- m->sdma_rlc_rb_rptr_addr_lo);
- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
- m->sdma_rlc_rb_rptr_addr_hi);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-- m->sdma_rlc_rb_cntl);
--
-+ data = REG_SET_FIELD(m->sdma_rlc_rb_cntl, SDMA0_RLC0_RB_CNTL,
-+ RB_ENABLE, 1);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
- return 0;
- }
-
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
-+ queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
-+ uint32_t i = 0, reg;
-+#undef HQD_N_REGS
-+#define HQD_N_REGS (19+4)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
-+ DUMP_REG(sdma_offset + reg);
-+ for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
-+ reg++)
-+ DUMP_REG(sdma_offset + reg);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
-+ return 0;
-+}
-+
- static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id)
- {
-@@ -403,30 +596,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
- return false;
- }
-
--static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
-+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-+ enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t temp;
-- int timeout = utimeout;
-+ enum hqd_dequeue_request_type type;
-+ unsigned long flags, end_jiffies;
-+ int retry;
-
- acquire_queue(kgd, pipe_id, queue_id);
- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
-
-- WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
-+ switch (reset_type) {
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
-+ type = DRAIN_PIPE;
-+ break;
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
-+ type = RESET_WAVES;
-+ break;
-+ default:
-+ type = DRAIN_PIPE;
-+ break;
-+ }
-+
-+ /* Workaround: If IQ timer is active and the wait time is close to or
-+ * equal to 0, dequeueing is not safe. Wait until either the wait time
-+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
-+ * cleared before continuing. Also, ensure wait times are set to at
-+ * least 0x3.
-+ */
-+ local_irq_save(flags);
-+ preempt_disable();
-+ retry = 5000; /* wait for 500 usecs at maximum */
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_IQ_TIMER);
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
-+ pr_debug("HW is processing IQ\n");
-+ goto loop;
-+ }
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
-+ == 3) /* SEM-rearm is safe */
-+ break;
-+ /* Wait time 3 is safe for CP, but our MMIO read/write
-+ * time is close to 1 microsecond, so check for 10 to
-+ * leave more buffer room
-+ */
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
-+ >= 10)
-+ break;
-+ pr_debug("IQ timer is active\n");
-+ } else
-+ break;
-+loop:
-+ if (!retry) {
-+ pr_err("CP HQD IQ timer status time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ retry = 1000;
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
-+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
-+ break;
-+ pr_debug("Dequeue request is pending\n");
-
-+ if (!retry) {
-+ pr_err("CP HQD dequeue request time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ local_irq_restore(flags);
-+ preempt_enable();
-+
-+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
-+
-+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
- while (true) {
- temp = RREG32(mmCP_HQD_ACTIVE);
-- if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
-+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
- break;
-- if (timeout <= 0) {
-- pr_err("kfd: cp queue preemption time out.\n");
-+ if (time_after(jiffies, end_jiffies)) {
-+ pr_err("cp queue preemption time out\n");
- release_queue(kgd);
- return -ETIME;
- }
-- msleep(20);
-- timeout -= 20;
-+ usleep_range(500, 1000);
- }
-
- release_queue(kgd);
-@@ -440,7 +702,7 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- struct cik_sdma_rlc_registers *m;
- uint32_t sdma_base_addr;
- uint32_t temp;
-- int timeout = utimeout;
-+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
-
- m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
-@@ -451,12 +713,11 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-
- while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-- if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
- break;
-- if (timeout <= 0)
-+ if (time_after(jiffies, end_jiffies))
- return -ETIME;
-- msleep(20);
-- timeout -= 20;
-+ usleep_range(500, 1000);
- }
-
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-@@ -464,6 +725,8 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
- SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
-
-+ m->sdma_rlc_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
-+
- return 0;
- }
-
-@@ -481,8 +744,9 @@ static int kgd_address_watch_disable(struct kgd_dev *kgd)
-
- /* Turning off this address until we set all the registers */
- for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
-- WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX +
-- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-+ WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-
- return 0;
- }
-@@ -500,20 +764,24 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd,
-
- /* Turning off this watch point until we set all the registers */
- cntl.bitfields.valid = 0;
-- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
-- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-
-- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
-- ADDRESS_WATCH_REG_ADDR_HI], addr_hi);
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_ADDR_HI],
-+ addr_hi);
-
-- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
-- ADDRESS_WATCH_REG_ADDR_LO], addr_lo);
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_ADDR_LO],
-+ addr_lo);
-
- /* Enable the watch point */
- cntl.bitfields.valid = 1;
-
-- WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
-- ADDRESS_WATCH_REG_CNTL], cntl.u32All);
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-
- return 0;
- }
-@@ -567,7 +835,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- }
-
- static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-@@ -577,52 +845,90 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- }
-
-+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ int vmid;
-+
-+ for (vmid = 0; vmid < 16; vmid++) {
-+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-+ continue;
-+ if (RREG32(mmATC_VMID0_PASID_MAPPING + vmid) &
-+ ATC_VMID0_PASID_MAPPING__VALID_MASK) {
-+ if ((RREG32(mmATC_VMID0_PASID_MAPPING + vmid) &
-+ ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-+ break;
-+ }
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype)
-+{
-+ uint32_t reg;
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
-+ element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
-+ index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
-+ mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
-+
-+ WREG32(mmSH_STATIC_MEM_CONFIG, reg);
-+ return 0;
-+}
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ lock_srbm(kgd, 0, 0, 0, vmid);
-+ WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
-+ unlock_srbm(kgd);
-+
-+ return 0;
-+}
-+
-+
- static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
- {
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- const union amdgpu_firmware_header *hdr;
-
-- BUG_ON(kgd == NULL);
--
- switch (type) {
- case KGD_ENGINE_PFP:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.pfp_fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
- break;
-
- case KGD_ENGINE_ME:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.me_fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
- break;
-
- case KGD_ENGINE_CE:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.ce_fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
- break;
-
- case KGD_ENGINE_MEC1:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.mec_fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
- break;
-
- case KGD_ENGINE_MEC2:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.mec2_fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
- break;
-
- case KGD_ENGINE_RLC:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.rlc_fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
- break;
-
- case KGD_ENGINE_SDMA1:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->sdma.instance[0].fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
- break;
-
- case KGD_ENGINE_SDMA2:
-- hdr = (const union amdgpu_firmware_header *)
-- adev->sdma.instance[1].fw->data;
-+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
- break;
-
- default:
-@@ -636,3 +942,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
- return hdr->common.ucode_version;
- }
-
-+static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req)
-+{
-+ uint32_t value;
-+ struct amdgpu_device *adev = get_amdgpu_device(dev);
-+
-+ value = RREG32(mmATC_ATS_DEBUG);
-+ value &= ~ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR_MASK;
-+ value |= (num_of_req << ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR__SHIFT);
-+
-+ WREG32(mmATC_ATS_DEBUG, value);
-+}
-+
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ /* TODO: Don't use hardcoded VMIDs */
-+ if (vmid < 8 || vmid > 15) {
-+ pr_err("trying to set page table base for wrong VMID\n");
-+ return;
-+ }
-+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
-+}
-+
-+ /**
-+ * read_vmid_from_vmfault_reg - read vmid from register
-+ *
-+ * adev: amdgpu_device pointer
-+ * @vmid: vmid pointer
-+ * read vmid from register (CIK).
-+ */
-+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
-+
-+ return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
-+}
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-old mode 100644
-new mode 100755
-index c5044d5..2ff10e9
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
-@@ -20,6 +20,9 @@
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-+#undef pr_fmt
-+#define pr_fmt(fmt) "kfd2kgd: " fmt
-+
- #include <linux/module.h>
- #include <linux/fdtable.h>
- #include <linux/uaccess.h>
-@@ -28,7 +31,7 @@
- #include "amdgpu.h"
- #include "amdgpu_amdkfd.h"
- #include "amdgpu_ucode.h"
--#include "gfx_v8_0.h"
-+#include "amdgpu_amdkfd_gfx_v8.h"
- #include "gca/gfx_8_0_sh_mask.h"
- #include "gca/gfx_8_0_d.h"
- #include "gca/gfx_8_0_enum.h"
-@@ -39,7 +42,31 @@
- #include "vi_structs.h"
- #include "vid.h"
-
--struct cik_sdma_rlc_registers;
-+enum hqd_dequeue_request_type {
-+ NO_ACTION = 0,
-+ DRAIN_PIPE,
-+ RESET_WAVES,
-+ SAVE_WAVES
-+};
-+
-+static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
-+ mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
-+ mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
-+ mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
-+ mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
-+};
-+
-+
-+struct vi_sdma_mqd;
-+
-+static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
-+ void *vm, struct kgd_mem **mem);
-+static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem);
-+
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem);
-+
-+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
-
- /*
- * Register access functions
-@@ -55,17 +82,26 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
- static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
- static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-- uint32_t queue_id, uint32_t __user *wptr);
--static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm);
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm);
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
- static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
- static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
--static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
-+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-+ enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id);
- static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout);
--static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
- static int kgd_address_watch_disable(struct kgd_dev *kgd);
- static int kgd_address_watch_execute(struct kgd_dev *kgd,
- unsigned int watch_point_id,
-@@ -84,20 +120,61 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
- static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- uint8_t vmid);
- static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
--static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
-+static void set_num_of_requests(struct kgd_dev *kgd,
-+ uint8_t num_of_requests);
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid);
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype);
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base);
-+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-+
-+/* Because of REG_GET_FIELD() being used, we put this function in the
-+ * asic specific file.
-+ */
-+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
-+ struct tile_config *config)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+
-+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
-+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFBANK);
-+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFRANKS);
-+
-+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
-+ config->num_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
-+ config->macro_tile_config_ptr =
-+ adev->gfx.config.macrotile_mode_array;
-+ config->num_macro_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
-+
-+ return 0;
-+}
-
- static const struct kfd2kgd_calls kfd2kgd = {
- .init_gtt_mem_allocation = alloc_gtt_mem,
- .free_gtt_mem = free_gtt_mem,
-- .get_vmem_size = get_vmem_size,
-+ .get_local_mem_info = get_local_mem_info,
- .get_gpu_clock_counter = get_gpu_clock_counter,
- .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
-+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
-+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
-+ .create_process_gpumem = create_process_gpumem,
-+ .destroy_process_gpumem = destroy_process_gpumem,
-+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
-+ .open_graphic_handle = open_graphic_handle,
- .program_sh_mem_settings = kgd_program_sh_mem_settings,
- .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
- .init_interrupts = kgd_init_interrupts,
- .hqd_load = kgd_hqd_load,
- .hqd_sdma_load = kgd_hqd_sdma_load,
-+ .hqd_dump = kgd_hqd_dump,
-+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
- .hqd_is_occupied = kgd_hqd_is_occupied,
- .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
- .hqd_destroy = kgd_hqd_destroy,
-@@ -111,14 +188,56 @@ static const struct kfd2kgd_calls kfd2kgd = {
- .get_atc_vmid_pasid_mapping_valid =
- get_atc_vmid_pasid_mapping_valid,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
-- .get_fw_version = get_fw_version
-+ .invalidate_tlbs = invalidate_tlbs,
-+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
-+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
-+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
-+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
-+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
-+ .get_fw_version = get_fw_version,
-+ .set_num_of_requests = set_num_of_requests,
-+ .get_cu_info = get_cu_info,
-+ .alloc_memory_of_scratch = alloc_memory_of_scratch,
-+ .write_config_static_mem = write_config_static_mem,
-+ .mmap_bo = amdgpu_amdkfd_gpuvm_mmap_bo,
-+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
-+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
-+ .pin_get_sg_table_bo = amdgpu_amdkfd_gpuvm_pin_get_sg_table,
-+ .unpin_put_sg_table_bo = amdgpu_amdkfd_gpuvm_unpin_put_sg_table,
-+ .get_dmabuf_info = amdgpu_amdkfd_get_dmabuf_info,
-+ .import_dmabuf = amdgpu_amdkfd_gpuvm_import_dmabuf,
-+ .export_dmabuf = amdgpu_amdkfd_gpuvm_export_dmabuf,
-+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
-+ .submit_ib = amdgpu_amdkfd_submit_ib,
-+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
-+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
-+ .copy_mem_to_mem = amdgpu_amdkfd_copy_mem_to_mem,
-+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage
- };
-
--struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
-+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
- {
- return (struct kfd2kgd_calls *)&kfd2kgd;
- }
-
-+static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
-+ void *vm, struct kgd_mem **mem)
-+{
-+ return 0;
-+}
-+
-+/* Destroys the GPU allocation and frees the kgd_mem structure */
-+static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem)
-+{
-+
-+}
-+
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem)
-+{
-+ return 0;
-+}
-+
- static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
- {
- return (struct amdgpu_device *)kgd;
-@@ -147,7 +266,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-- uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- lock_srbm(kgd, mec, pipe, queue_id, 0);
-@@ -216,21 +335,28 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
- uint32_t mec;
- uint32_t pipe;
-
-- mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
- pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-
- lock_srbm(kgd, mec, pipe, 0, 0);
-
-- WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
-+ WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
-+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
-
- unlock_srbm(kgd);
-
- return 0;
- }
-
--static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
-+static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)
- {
-- return 0;
-+ uint32_t retval;
-+
-+ retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
-+ m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
-+ pr_debug("sdma base address: 0x%x\n", retval);
-+
-+ return retval;
- }
-
- static inline struct vi_mqd *get_mqd(void *mqd)
-@@ -238,9 +364,9 @@ static inline struct vi_mqd *get_mqd(void *mqd)
- return (struct vi_mqd *)mqd;
- }
-
--static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
-+static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
- {
-- return (struct cik_sdma_rlc_registers *)mqd;
-+ return (struct vi_sdma_mqd *)mqd;
- }
-
- static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-@@ -252,16 +378,18 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- struct vi_mqd *m;
- uint32_t *mqd_hqd;
- uint32_t reg, wptr_val, data;
-+ bool valid_wptr = false;
-
- m = get_mqd(mqd);
-
- acquire_queue(kgd, pipe_id, queue_id);
-- /*HIQ is set during driver init period with vmid set to 0. For SRIOV
-- * world switching support let the RLC know about the HIQ.
-- *
-- * Workaround: This causes reboots on CZ. Disable this on CZ, which
-- * doesn't support SRIOV anyway.
-- */
-+
-+ /* HIQ is set during driver init period with vmid set to 0. For SRIOV
-+ * world switching support let the RLC know about the HIQ.
-+ *
-+ * Workaround: This causes reboots on CZ. Disable this on CZ, which
-+ * doesn't support SRIOV anyway.
-+ */
- if (m->cp_hqd_vmid == 0 &&
- adev->asic_type != CHIP_CARRIZO) {
- uint32_t value, mec, pipe;
-@@ -304,7 +432,14 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
- WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
-
-- if (read_user_wptr(mm, wptr, wptr_val))
-+ /* read_user_ptr may take the mm->mmap_sem.
-+ * release srbm_mutex to avoid circular dependency between
-+ * srbm_mutex->mm_sem->reservation_ww_class_mutex->srbm_mutex.
-+ */
-+ release_queue(kgd);
-+ valid_wptr = read_user_wptr(mm, wptr, wptr_val);
-+ acquire_queue(kgd, pipe_id, queue_id);
-+ if (valid_wptr)
- WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
-
- data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-@@ -315,8 +450,138 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
- return 0;
- }
-
--static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t i = 0, reg;
-+#define HQD_N_REGS (54+4)
-+#define DUMP_REG(addr) do { \
-+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
-+ break; \
-+ (*dump)[i][0] = (addr) << 2; \
-+ (*dump)[i++][1] = RREG32(addr); \
-+ } while (0)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE0);
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE1);
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE2);
-+ DUMP_REG(mmCOMPUTE_STATIC_THREAD_MGMT_SE3);
-+
-+ for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_DONES; reg++)
-+ DUMP_REG(reg);
-+
-+ release_queue(kgd);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct vi_sdma_mqd *m;
-+ uint32_t sdma_base_addr;
-+ uint32_t temp, timeout = 2000;
-+ uint32_t data;
-+
-+ m = get_sdma_mqd(mqd);
-+ sdma_base_addr = get_sdma_base_addr(m);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
-+
-+ while (true) {
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
-+ break;
-+ if (timeout == 0)
-+ return -ETIME;
-+ msleep(10);
-+ timeout -= 10;
-+ }
-+ if (m->sdma_engine_id) {
-+ data = RREG32(mmSDMA1_GFX_CONTEXT_CNTL);
-+ data = REG_SET_FIELD(data, SDMA1_GFX_CONTEXT_CNTL,
-+ RESUME_CTX, 0);
-+ WREG32(mmSDMA1_GFX_CONTEXT_CNTL, data);
-+ } else {
-+ data = RREG32(mmSDMA0_GFX_CONTEXT_CNTL);
-+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-+ RESUME_CTX, 0);
-+ WREG32(mmSDMA0_GFX_CONTEXT_CNTL, data);
-+ }
-+
-+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
-+ ENABLE, 1);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-+
-+ if (read_user_wptr(mm, wptr, data))
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, data);
-+ else
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-+ m->sdmax_rlcx_rb_rptr);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
-+ m->sdmax_rlcx_virtual_addr);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
-+ m->sdmax_rlcx_rb_base_hi);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
-+ m->sdmax_rlcx_rb_rptr_addr_lo);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
-+ m->sdmax_rlcx_rb_rptr_addr_hi);
-+
-+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
-+ RB_ENABLE, 1);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
- {
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t sdma_offset = engine_id * SDMA1_REGISTER_OFFSET +
-+ queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
-+ uint32_t i = 0, reg;
-+#undef HQD_N_REGS
-+#define HQD_N_REGS (19+4+2+3+7)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
-+ DUMP_REG(sdma_offset + reg);
-+ for (reg = mmSDMA0_RLC0_VIRTUAL_ADDR; reg <= mmSDMA0_RLC0_WATERMARK;
-+ reg++)
-+ DUMP_REG(sdma_offset + reg);
-+ for (reg = mmSDMA0_RLC0_CSA_ADDR_LO; reg <= mmSDMA0_RLC0_CSA_ADDR_HI;
-+ reg++)
-+ DUMP_REG(sdma_offset + reg);
-+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; reg <= mmSDMA0_RLC0_DUMMY_REG;
-+ reg++)
-+ DUMP_REG(sdma_offset + reg);
-+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; reg <= mmSDMA0_RLC0_MIDCMD_CNTL;
-+ reg++)
-+ DUMP_REG(sdma_offset + reg);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
- return 0;
- }
-
-@@ -345,7 +610,7 @@ static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
- static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-- struct cik_sdma_rlc_registers *m;
-+ struct vi_sdma_mqd *m;
- uint32_t sdma_base_addr;
- uint32_t sdma_rlc_rb_cntl;
-
-@@ -360,29 +625,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
- return false;
- }
-
--static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
-+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-+ enum kfd_preempt_type reset_type,
- unsigned int utimeout, uint32_t pipe_id,
- uint32_t queue_id)
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
- uint32_t temp;
-- int timeout = utimeout;
-+ enum hqd_dequeue_request_type type;
-+ unsigned long flags, end_jiffies;
-+ int retry;
-+ struct vi_mqd *m = get_mqd(mqd);
-
- acquire_queue(kgd, pipe_id, queue_id);
-
-- WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
-+ if (m->cp_hqd_vmid == 0)
-+ WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
-
-+ switch (reset_type) {
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
-+ type = DRAIN_PIPE;
-+ break;
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
-+ type = RESET_WAVES;
-+ break;
-+ default:
-+ type = DRAIN_PIPE;
-+ break;
-+ }
-+
-+ /* Workaround: If IQ timer is active and the wait time is close to or
-+ * equal to 0, dequeueing is not safe. Wait until either the wait time
-+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
-+ * cleared before continuing. Also, ensure wait times are set to at
-+ * least 0x3.
-+ */
-+ local_irq_save(flags);
-+ preempt_disable();
-+ retry = 5000; /* wait for 500 usecs at maximum */
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_IQ_TIMER);
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
-+ pr_debug("HW is processing IQ\n");
-+ goto loop;
-+ }
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
-+ == 3) /* SEM-rearm is safe */
-+ break;
-+ /* Wait time 3 is safe for CP, but our MMIO read/write
-+ * time is close to 1 microsecond, so check for 10 to
-+ * leave more buffer room
-+ */
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
-+ >= 10)
-+ break;
-+ pr_debug("IQ timer is active\n");
-+ } else
-+ break;
-+loop:
-+ if (!retry) {
-+ pr_err("CP HQD IQ timer status time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ retry = 1000;
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
-+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
-+ break;
-+ pr_debug("Dequeue request is pending\n");
-+
-+ if (!retry) {
-+ pr_err("CP HQD dequeue request time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ local_irq_restore(flags);
-+ preempt_enable();
-+
-+ WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
-+
-+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
- while (true) {
- temp = RREG32(mmCP_HQD_ACTIVE);
-- if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
-+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
- break;
-- if (timeout <= 0) {
-- pr_err("kfd: cp queue preemption time out.\n");
-+ if (time_after(jiffies, end_jiffies)) {
-+ pr_err("cp queue preemption time out.\n");
- release_queue(kgd);
- return -ETIME;
- }
-- msleep(20);
-- timeout -= 20;
-+ usleep_range(500, 1000);
- }
-
- release_queue(kgd);
-@@ -393,10 +731,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
- unsigned int utimeout)
- {
- struct amdgpu_device *adev = get_amdgpu_device(kgd);
-- struct cik_sdma_rlc_registers *m;
-+ struct vi_sdma_mqd *m;
- uint32_t sdma_base_addr;
- uint32_t temp;
-- int timeout = utimeout;
-+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
-
- m = get_sdma_mqd(mqd);
- sdma_base_addr = get_sdma_base_addr(m);
-@@ -407,18 +745,19 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-
- while (true) {
- temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-- if (temp & SDMA0_STATUS_REG__RB_CMD_IDLE__SHIFT)
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
- break;
-- if (timeout <= 0)
-+ if (time_after(jiffies, end_jiffies))
- return -ETIME;
-- msleep(20);
-- timeout -= 20;
-+ usleep_range(500, 1000);
- }
-
- WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, 0);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, 0);
-- WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, 0);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
-+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
-+
-+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
-
- return 0;
- }
-@@ -440,7 +779,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
-- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
- }
-
- static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-@@ -450,8 +789,83 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
- }
-
-+/*
-+ * FIXME: Poliars test failed with this package, FIJI works fine
-+ * From the CP spec it does not official support the invalidation
-+ * with the specified pasid in the package, so disable it for V8
-+ *
-+ */
-+#ifdef V8_SUPPORT_IT_OFFICIAL
-+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-+{
-+ signed long r;
-+ struct dma_fence *f;
-+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-+
-+ mutex_lock(&adev->gfx.kiq.ring_mutex);
-+ amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-+ amdgpu_ring_write(ring,
-+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-+ PACKET3_INVALIDATE_TLBS_PASID(pasid));
-+ amdgpu_fence_emit(ring, &f);
-+ amdgpu_ring_commit(ring);
-+ mutex_unlock(&adev->gfx.kiq.ring_mutex);
-+
-+ r = dma_fence_wait(f, false);
-+ if (r)
-+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-+ dma_fence_put(f);
-+
-+ return r;
-+}
-+#endif
-+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ int vmid;
-+
-+#ifdef V8_SUPPORT_IT_OFFICIAL
-+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-+
-+ if (ring->ready)
-+ return invalidate_tlbs_with_kiq(adev, pasid);
-+#endif
-+
-+ for (vmid = 0; vmid < 16; vmid++) {
-+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-+ continue;
-+ if (RREG32(mmATC_VMID0_PASID_MAPPING + vmid) &
-+ ATC_VMID0_PASID_MAPPING__VALID_MASK) {
-+ if ((RREG32(mmATC_VMID0_PASID_MAPPING + vmid) &
-+ ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
-+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
-+ break;
-+ }
-+ }
-+ }
-+
-+ return 0;
-+}
-+
- static int kgd_address_watch_disable(struct kgd_dev *kgd)
- {
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ union TCP_WATCH_CNTL_BITS cntl;
-+ unsigned int i;
-+
-+ cntl.u32All = 0;
-+
-+ cntl.bitfields.valid = 0;
-+ cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
-+ cntl.bitfields.atc = 1;
-+
-+ /* Turning off this address until we set all the registers */
-+ for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
-+ WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+
- return 0;
- }
-
-@@ -461,6 +875,32 @@ static int kgd_address_watch_execute(struct kgd_dev *kgd,
- uint32_t addr_hi,
- uint32_t addr_lo)
- {
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ union TCP_WATCH_CNTL_BITS cntl;
-+
-+ cntl.u32All = cntl_val;
-+
-+ /* Turning off this watch point until we set all the registers */
-+ cntl.bitfields.valid = 0;
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_ADDR_HI],
-+ addr_hi);
-+
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_ADDR_LO],
-+ addr_lo);
-+
-+ /* Enable the watch point */
-+ cntl.bitfields.valid = 1;
-+
-+ WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX
-+ + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+
- return 0;
- }
-
-@@ -493,6 +933,32 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
- unsigned int watch_point_id,
- unsigned int reg_offset)
- {
-+ return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
-+}
-+
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype)
-+{
-+ uint32_t reg;
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ reg = swizzle_enable << SH_STATIC_MEM_CONFIG__SWIZZLE_ENABLE__SHIFT |
-+ element_size << SH_STATIC_MEM_CONFIG__ELEMENT_SIZE__SHIFT |
-+ index_stride << SH_STATIC_MEM_CONFIG__INDEX_STRIDE__SHIFT |
-+ mtype << SH_STATIC_MEM_CONFIG__PRIVATE_MTYPE__SHIFT;
-+
-+ WREG32(mmSH_STATIC_MEM_CONFIG, reg);
-+ return 0;
-+}
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ lock_srbm(kgd, 0, 0, 0, vmid);
-+ WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
-+ unlock_srbm(kgd);
-+
- return 0;
- }
-
-@@ -501,47 +967,45 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- const union amdgpu_firmware_header *hdr;
-
-- BUG_ON(kgd == NULL);
--
- switch (type) {
- case KGD_ENGINE_PFP:
- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.pfp_fw->data;
-+ adev->gfx.pfp_fw->data;
- break;
-
- case KGD_ENGINE_ME:
- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.me_fw->data;
-+ adev->gfx.me_fw->data;
- break;
-
- case KGD_ENGINE_CE:
- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.ce_fw->data;
-+ adev->gfx.ce_fw->data;
- break;
-
- case KGD_ENGINE_MEC1:
- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.mec_fw->data;
-+ adev->gfx.mec_fw->data;
- break;
-
- case KGD_ENGINE_MEC2:
- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.mec2_fw->data;
-+ adev->gfx.mec2_fw->data;
- break;
-
- case KGD_ENGINE_RLC:
- hdr = (const union amdgpu_firmware_header *)
-- adev->gfx.rlc_fw->data;
-+ adev->gfx.rlc_fw->data;
- break;
-
- case KGD_ENGINE_SDMA1:
- hdr = (const union amdgpu_firmware_header *)
-- adev->sdma.instance[0].fw->data;
-+ adev->sdma.instance[0].fw->data;
- break;
-
- case KGD_ENGINE_SDMA2:
- hdr = (const union amdgpu_firmware_header *)
-- adev->sdma.instance[1].fw->data;
-+ adev->sdma.instance[1].fw->data;
- break;
-
- default:
-@@ -554,3 +1018,21 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
- /* Only 12 bit in use*/
- return hdr->common.ucode_version;
- }
-+
-+static void set_num_of_requests(struct kgd_dev *kgd,
-+ uint8_t num_of_requests)
-+{
-+ pr_debug("This is a stub\n");
-+}
-+
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ /* TODO: Don't use hardcoded VMIDs */
-+ if (vmid < 8 || vmid > 15) {
-+ pr_err("trying to set page table base for wrong VMID\n");
-+ return;
-+ }
-+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
-+}
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.h
-new file mode 100644
-index 0000000..3c94919
---- /dev/null
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.h
-@@ -0,0 +1,62 @@
-+/*
-+ * Copyright 2015 Advanced Micro Devices, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#ifndef AMDGPU_AMDKFD_GFX_V8_H_INCLUDED
-+#define AMDGPU_AMDKFD_GFX_V8_H_INCLUDED
-+
-+#include <linux/types.h>
-+
-+enum {
-+ MAX_TRAPID = 8, /* 3 bits in the bitfield. */
-+ MAX_WATCH_ADDRESSES = 4
-+};
-+
-+enum {
-+ ADDRESS_WATCH_REG_ADDR_HI = 0,
-+ ADDRESS_WATCH_REG_ADDR_LO,
-+ ADDRESS_WATCH_REG_CNTL,
-+ ADDRESS_WATCH_REG_MAX
-+};
-+
-+/* not defined in the VI reg file */
-+enum {
-+ ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
-+ ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
-+ ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
-+ /* extend the mask to 26 bits in order to match the low address field */
-+ ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
-+ ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
-+};
-+
-+union TCP_WATCH_CNTL_BITS {
-+ struct {
-+ uint32_t mask:24;
-+ uint32_t vmid:4;
-+ uint32_t atc:1;
-+ uint32_t mode:2;
-+ uint32_t valid:1;
-+ } bitfields, bits;
-+ uint32_t u32All;
-+ signed int i32All;
-+ float f32All;
-+};
-+#endif
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-new file mode 100644
-index 0000000..edbae19
---- /dev/null
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
-@@ -0,0 +1,1227 @@
-+/*
-+ * Copyright 2014 Advanced Micro Devices, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#undef pr_fmt
-+#define pr_fmt(fmt) "kfd2kgd: " fmt
-+
-+#include <linux/module.h>
-+#include <linux/fdtable.h>
-+#include <linux/uaccess.h>
-+#include <linux/firmware.h>
-+#include <drm/drmP.h>
-+#include "amdgpu.h"
-+#include "amdgpu_amdkfd.h"
-+#include "amdgpu_ucode.h"
-+#include "amdgpu_amdkfd_gfx_v8.h"
-+#include "vega10/soc15ip.h"
-+#include "vega10/GC/gc_9_0_offset.h"
-+#include "vega10/GC/gc_9_0_sh_mask.h"
-+#include "vega10/vega10_enum.h"
-+#include "vega10/SDMA0/sdma0_4_0_offset.h"
-+#include "vega10/SDMA0/sdma0_4_0_sh_mask.h"
-+#include "vega10/SDMA1/sdma1_4_0_offset.h"
-+#include "vega10/SDMA1/sdma1_4_0_sh_mask.h"
-+#include "vega10/ATHUB/athub_1_0_offset.h"
-+#include "vega10/ATHUB/athub_1_0_sh_mask.h"
-+#include "vega10/OSSSYS/osssys_4_0_offset.h"
-+#include "vega10/OSSSYS/osssys_4_0_sh_mask.h"
-+#include "soc15_common.h"
-+#include "v9_structs.h"
-+#include "soc15.h"
-+#include "soc15d.h"
-+
-+/* HACK: MMHUB and GC both have VM-related register with the same
-+ * names but different offsets. Define the MMHUB register we need here
-+ * with a prefix. A proper solution would be to move the functions
-+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
-+ * respectively.
-+ */
-+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
-+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
-+
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
-+
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
-+
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
-+
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
-+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
-+
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
-+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
-+
-+enum hqd_dequeue_request_type {
-+ NO_ACTION = 0,
-+ DRAIN_PIPE,
-+ RESET_WAVES,
-+ SAVE_WAVES
-+};
-+
-+static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
-+ mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL,
-+ mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL,
-+ mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL,
-+ mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL
-+};
-+
-+
-+static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
-+ void *vm, struct kgd_mem **mem);
-+static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem);
-+
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem);
-+
-+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
-+
-+/*
-+ * Register access functions
-+ */
-+
-+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t sh_mem_config,
-+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
-+ uint32_t sh_mem_bases);
-+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-+ unsigned int vmid);
-+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-+ uint32_t hpd_size, uint64_t hpd_gpu_addr);
-+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
-+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm);
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm);
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-+ uint32_t pipe_id, uint32_t queue_id);
-+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-+ enum kfd_preempt_type reset_type,
-+ unsigned int utimeout, uint32_t pipe_id,
-+ uint32_t queue_id);
-+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-+ unsigned int utimeout);
-+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
-+static uint32_t get_watch_base_addr(void);
-+static int kgd_address_watch_disable(struct kgd_dev *kgd);
-+static int kgd_address_watch_execute(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ uint32_t cntl_val,
-+ uint32_t addr_hi,
-+ uint32_t addr_lo);
-+static int kgd_wave_control_execute(struct kgd_dev *kgd,
-+ uint32_t gfx_index_val,
-+ uint32_t sq_cmd);
-+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ unsigned int reg_offset);
-+
-+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-+ uint8_t vmid);
-+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-+ uint8_t vmid);
-+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
-+static void set_num_of_requests(struct kgd_dev *kgd,
-+ uint8_t num_of_requests);
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid);
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype);
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base);
-+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
-+
-+/* Because of REG_GET_FIELD() being used, we put this function in the
-+ * asic specific file.
-+ */
-+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
-+ struct tile_config *config)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+
-+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
-+#if 0
-+/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but
-+ * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu
-+ * changes commented out related code, doing the same here for now but
-+ * need to sync with Ken et al
-+ */
-+ config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFBANK);
-+ config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-+ MC_ARB_RAMCFG, NOOFRANKS);
-+#endif
-+
-+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
-+ config->num_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
-+ config->macro_tile_config_ptr =
-+ adev->gfx.config.macrotile_mode_array;
-+ config->num_macro_tile_configs =
-+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
-+
-+ return 0;
-+}
-+
-+static const struct kfd2kgd_calls kfd2kgd = {
-+ .init_gtt_mem_allocation = alloc_gtt_mem,
-+ .free_gtt_mem = free_gtt_mem,
-+ .get_local_mem_info = get_local_mem_info,
-+ .get_gpu_clock_counter = get_gpu_clock_counter,
-+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
-+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
-+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
-+ .create_process_gpumem = create_process_gpumem,
-+ .destroy_process_gpumem = destroy_process_gpumem,
-+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
-+ .open_graphic_handle = open_graphic_handle,
-+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
-+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
-+ .init_pipeline = kgd_init_pipeline,
-+ .init_interrupts = kgd_init_interrupts,
-+ .hqd_load = kgd_hqd_load,
-+ .hqd_sdma_load = kgd_hqd_sdma_load,
-+ .hqd_dump = kgd_hqd_dump,
-+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
-+ .hqd_is_occupied = kgd_hqd_is_occupied,
-+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
-+ .hqd_destroy = kgd_hqd_destroy,
-+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
-+ .address_watch_disable = kgd_address_watch_disable,
-+ .address_watch_execute = kgd_address_watch_execute,
-+ .wave_control_execute = kgd_wave_control_execute,
-+ .address_watch_get_offset = kgd_address_watch_get_offset,
-+ .get_atc_vmid_pasid_mapping_pasid =
-+ get_atc_vmid_pasid_mapping_pasid,
-+ .get_atc_vmid_pasid_mapping_valid =
-+ get_atc_vmid_pasid_mapping_valid,
-+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
-+ .invalidate_tlbs = invalidate_tlbs,
-+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
-+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
-+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
-+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
-+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
-+ .get_fw_version = get_fw_version,
-+ .set_num_of_requests = set_num_of_requests,
-+ .get_cu_info = get_cu_info,
-+ .alloc_memory_of_scratch = alloc_memory_of_scratch,
-+ .write_config_static_mem = write_config_static_mem,
-+ .mmap_bo = amdgpu_amdkfd_gpuvm_mmap_bo,
-+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
-+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
-+ .pin_get_sg_table_bo = amdgpu_amdkfd_gpuvm_pin_get_sg_table,
-+ .unpin_put_sg_table_bo = amdgpu_amdkfd_gpuvm_unpin_put_sg_table,
-+ .get_dmabuf_info = amdgpu_amdkfd_get_dmabuf_info,
-+ .import_dmabuf = amdgpu_amdkfd_gpuvm_import_dmabuf,
-+ .export_dmabuf = amdgpu_amdkfd_gpuvm_export_dmabuf,
-+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
-+ .submit_ib = amdgpu_amdkfd_submit_ib,
-+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
-+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
-+ .copy_mem_to_mem = amdgpu_amdkfd_copy_mem_to_mem,
-+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage
-+};
-+
-+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions()
-+{
-+ return (struct kfd2kgd_calls *)&kfd2kgd;
-+}
-+
-+static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
-+ void *vm, struct kgd_mem **mem)
-+{
-+ return 0;
-+}
-+
-+/* Destroys the GPU allocation and frees the kgd_mem structure */
-+static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem)
-+{
-+
-+}
-+
-+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
-+ int fd, uint32_t handle, struct kgd_mem **mem)
-+{
-+ return 0;
-+}
-+
-+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-+{
-+ return (struct amdgpu_device *)kgd;
-+}
-+
-+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
-+ uint32_t queue, uint32_t vmid)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ mutex_lock(&adev->srbm_mutex);
-+ soc15_grbm_select(adev, mec, pipe, queue, vmid);
-+}
-+
-+static void unlock_srbm(struct kgd_dev *kgd)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ soc15_grbm_select(adev, 0, 0, 0, 0);
-+ mutex_unlock(&adev->srbm_mutex);
-+}
-+
-+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
-+ uint32_t queue_id)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-+
-+ lock_srbm(kgd, mec, pipe, queue_id, 0);
-+}
-+
-+static uint32_t get_queue_mask(struct amdgpu_device *adev,
-+ uint32_t pipe_id, uint32_t queue_id)
-+{
-+ unsigned int bit = (pipe_id * adev->gfx.mec.num_pipe_per_mec +
-+ queue_id) & 31;
-+
-+ return ((uint32_t)1) << bit;
-+}
-+
-+static void release_queue(struct kgd_dev *kgd)
-+{
-+ unlock_srbm(kgd);
-+}
-+
-+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t sh_mem_config,
-+ uint32_t sh_mem_ape1_base,
-+ uint32_t sh_mem_ape1_limit,
-+ uint32_t sh_mem_bases)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ lock_srbm(kgd, 0, 0, 0, vmid);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
-+ /* APE1 no longer exists on GFX9 */
-+
-+ unlock_srbm(kgd);
-+}
-+
-+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
-+ unsigned int vmid)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ /*
-+ * We have to assume that there is no outstanding mapping.
-+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
-+ * a mapping is in progress or because a mapping finished
-+ * and the SW cleared it.
-+ * So the protocol is to always wait & clear.
-+ */
-+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
-+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
-+
-+ /*
-+ * need to do this twice, once for gfx and once for mmhub
-+ * for ATC add 16 to VMID for mmhub, for IH different registers.
-+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
-+ */
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
-+ pasid_mapping);
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(
-+ ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
-+ (1U << vmid)))
-+ cpu_relax();
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
-+ 1U << vmid);
-+
-+ /* Mapping vmid to pasid also for IH block */
-+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
-+ pasid_mapping);
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
-+ pasid_mapping);
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(
-+ ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
-+ (1U << (vmid + 16))))
-+ cpu_relax();
-+
-+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
-+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
-+ 1U << (vmid + 16));
-+
-+ /* Mapping vmid to pasid also for IH block */
-+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
-+ pasid_mapping);
-+ return 0;
-+}
-+
-+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
-+ uint32_t hpd_size, uint64_t hpd_gpu_addr)
-+{
-+ /* amdgpu owns the per-pipe state */
-+ return 0;
-+}
-+
-+/* TODO - RING0 form of field is obsolete, seems to date back to SI
-+ * but still works
-+ */
-+
-+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t mec;
-+ uint32_t pipe;
-+
-+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-+
-+ lock_srbm(kgd, mec, pipe, 0, 0);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
-+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
-+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
-+
-+ unlock_srbm(kgd);
-+
-+ return 0;
-+}
-+
-+static uint32_t get_sdma_base_addr(unsigned int engine_id,
-+ unsigned int queue_id)
-+{
-+ static const uint32_t base[2] = {
-+ SOC15_REG_OFFSET(SDMA0, 0,
-+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
-+ SOC15_REG_OFFSET(SDMA1, 0,
-+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
-+ };
-+ uint32_t retval;
-+
-+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
-+ mmSDMA0_RLC0_RB_CNTL);
-+
-+ pr_debug("sdma base address: 0x%x\n", retval);
-+
-+ return retval;
-+}
-+
-+static uint32_t get_watch_base_addr(void)
-+{
-+ uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
-+ mmTCP_WATCH0_ADDR_H;
-+
-+ pr_debug("kfd: reg watch base address: 0x%x\n", retval);
-+
-+ return retval;
-+}
-+
-+static inline struct v9_mqd *get_mqd(void *mqd)
-+{
-+ return (struct v9_mqd *)mqd;
-+}
-+
-+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
-+{
-+ return (struct v9_sdma_mqd *)mqd;
-+}
-+
-+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_mqd *m;
-+ uint32_t *mqd_hqd;
-+ uint32_t reg, hqd_base, data;
-+
-+ m = get_mqd(mqd);
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ /* HIQ is set during driver init period with vmid set to 0*/
-+ if (m->cp_hqd_vmid == 0) {
-+ uint32_t value, mec, pipe;
-+
-+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
-+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
-+
-+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
-+ mec, pipe, queue_id);
-+ value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
-+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
-+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
-+ }
-+
-+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
-+ mqd_hqd = &m->cp_mqd_base_addr_lo;
-+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
-+
-+ for (reg = hqd_base;
-+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-+ WREG32(reg, mqd_hqd[reg - hqd_base]);
-+
-+
-+ /* Activate doorbell logic before triggering WPTR poll. */
-+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
-+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
-+
-+ if (wptr) {
-+ /* Don't read wptr with get_user because the user
-+ * context may not be accessible (if this function
-+ * runs in a work queue). Instead trigger a one-shot
-+ * polling read from memory in the CP. This assumes
-+ * that wptr is GPU-accessible in the queue's VMID via
-+ * ATC or SVM. WPTR==RPTR before starting the poll so
-+ * the CP starts fetching new commands from the right
-+ * place.
-+ *
-+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
-+ * tricky. Assume that the queue didn't overflow. The
-+ * number of valid bits in the 32-bit RPTR depends on
-+ * the queue size. The remaining bits are taken from
-+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
-+ * queue size.
-+ */
-+ uint32_t queue_size =
-+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
-+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
-+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
-+
-+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
-+ guessed_wptr += queue_size;
-+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
-+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
-+ lower_32_bits(guessed_wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
-+ upper_32_bits(guessed_wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
-+ lower_32_bits((uint64_t)wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
-+ upper_32_bits((uint64_t)wptr));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
-+ get_queue_mask(adev, pipe_id, queue_id));
-+ }
-+
-+ /* Start the EOP fetcher */
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
-+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
-+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
-+
-+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
-+
-+ release_queue(kgd);
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_dump(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t i = 0, reg;
-+#define HQD_N_REGS 56
-+#define DUMP_REG(addr) do { \
-+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
-+ break; \
-+ (*dump)[i][0] = (addr) << 2; \
-+ (*dump)[i++][1] = RREG32(addr); \
-+ } while (0)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
-+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-+ DUMP_REG(reg);
-+
-+ release_queue(kgd);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_sdma_mqd *m;
-+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
-+ uint32_t temp, timeout = 2000;
-+ uint32_t data;
-+ uint64_t data64;
-+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
-+
-+ m = get_sdma_mqd(mqd);
-+ sdma_base_addr = get_sdma_base_addr(m->sdma_engine_id,
-+ m->sdma_queue_id);
-+ sdmax_gfx_context_cntl = m->sdma_engine_id ?
-+ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
-+ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
-+
-+ while (true) {
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
-+ break;
-+ if (timeout == 0)
-+ return -ETIME;
-+ msleep(10);
-+ timeout -= 10;
-+ }
-+ data = RREG32(sdmax_gfx_context_cntl);
-+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
-+ RESUME_CTX, 0);
-+ WREG32(sdmax_gfx_context_cntl, data);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
-+ m->sdmax_rlcx_doorbell_offset);
-+
-+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
-+ ENABLE, 1);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
-+ m->sdmax_rlcx_rb_rptr_hi);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
-+ if (read_user_wptr(mm, wptr64, data64)) {
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-+ lower_32_bits(data64));
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
-+ upper_32_bits(data64));
-+ } else {
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
-+ m->sdmax_rlcx_rb_rptr);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
-+ m->sdmax_rlcx_rb_rptr_hi);
-+ }
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
-+ m->sdmax_rlcx_rb_base_hi);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
-+ m->sdmax_rlcx_rb_rptr_addr_lo);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
-+ m->sdmax_rlcx_rb_rptr_addr_hi);
-+
-+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
-+ RB_ENABLE, 1);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
-+
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t sdma_base_addr = get_sdma_base_addr(engine_id, queue_id);
-+ uint32_t i = 0, reg;
-+#undef HQD_N_REGS
-+#define HQD_N_REGS (19+6+7+10)
-+
-+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
-+ if (*dump == NULL)
-+ return -ENOMEM;
-+
-+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
-+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
-+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
-+ DUMP_REG(sdma_base_addr + reg);
-+
-+ WARN_ON_ONCE(i != HQD_N_REGS);
-+ *n_regs = i;
-+
-+ return 0;
-+}
-+
-+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
-+ uint32_t pipe_id, uint32_t queue_id)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t act;
-+ bool retval = false;
-+ uint32_t low, high;
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
-+ if (act) {
-+ low = lower_32_bits(queue_address >> 8);
-+ high = upper_32_bits(queue_address >> 8);
-+
-+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
-+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
-+ retval = true;
-+ }
-+ release_queue(kgd);
-+ return retval;
-+}
-+
-+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_sdma_mqd *m;
-+ uint32_t sdma_base_addr;
-+ uint32_t sdma_rlc_rb_cntl;
-+
-+ m = get_sdma_mqd(mqd);
-+ sdma_base_addr = get_sdma_base_addr(m->sdma_engine_id,
-+ m->sdma_queue_id);
-+
-+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
-+
-+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
-+ return true;
-+
-+ return false;
-+}
-+
-+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
-+ enum kfd_preempt_type reset_type,
-+ unsigned int utimeout, uint32_t pipe_id,
-+ uint32_t queue_id)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ enum hqd_dequeue_request_type type;
-+ unsigned long end_jiffies;
-+ uint32_t temp;
-+ struct v9_mqd *m = get_mqd(mqd);
-+
-+#if 0
-+ unsigned long flags;
-+ int retry;
-+#endif
-+
-+ acquire_queue(kgd, pipe_id, queue_id);
-+
-+ if (m->cp_hqd_vmid == 0)
-+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
-+
-+ switch (reset_type) {
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
-+ type = DRAIN_PIPE;
-+ break;
-+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
-+ type = RESET_WAVES;
-+ break;
-+ default:
-+ type = DRAIN_PIPE;
-+ break;
-+ }
-+
-+#if 0 /* Is this still needed? */
-+ /* Workaround: If IQ timer is active and the wait time is close to or
-+ * equal to 0, dequeueing is not safe. Wait until either the wait time
-+ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
-+ * cleared before continuing. Also, ensure wait times are set to at
-+ * least 0x3.
-+ */
-+ local_irq_save(flags);
-+ preempt_disable();
-+ retry = 5000; /* wait for 500 usecs at maximum */
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_IQ_TIMER);
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
-+ pr_debug("HW is processing IQ\n");
-+ goto loop;
-+ }
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
-+ == 3) /* SEM-rearm is safe */
-+ break;
-+ /* Wait time 3 is safe for CP, but our MMIO read/write
-+ * time is close to 1 microsecond, so check for 10 to
-+ * leave more buffer room
-+ */
-+ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
-+ >= 10)
-+ break;
-+ pr_debug("IQ timer is active\n");
-+ } else
-+ break;
-+loop:
-+ if (!retry) {
-+ pr_err("CP HQD IQ timer status time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ retry = 1000;
-+ while (true) {
-+ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
-+ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
-+ break;
-+ pr_debug("Dequeue request is pending\n");
-+
-+ if (!retry) {
-+ pr_err("CP HQD dequeue request time out\n");
-+ break;
-+ }
-+ ndelay(100);
-+ --retry;
-+ }
-+ local_irq_restore(flags);
-+ preempt_enable();
-+#endif
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
-+
-+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
-+ while (true) {
-+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
-+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
-+ break;
-+ if (time_after(jiffies, end_jiffies)) {
-+ pr_err("cp queue preemption time out.\n");
-+ release_queue(kgd);
-+ return -ETIME;
-+ }
-+ usleep_range(500, 1000);
-+ }
-+
-+ release_queue(kgd);
-+ return 0;
-+}
-+
-+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
-+ unsigned int utimeout)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ struct v9_sdma_mqd *m;
-+ uint32_t sdma_base_addr;
-+ uint32_t temp;
-+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
-+
-+ m = get_sdma_mqd(mqd);
-+ sdma_base_addr = get_sdma_base_addr(m->sdma_engine_id,
-+ m->sdma_queue_id);
-+
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
-+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
-+
-+ while (true) {
-+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
-+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
-+ break;
-+ if (time_after(jiffies, end_jiffies))
-+ return -ETIME;
-+ usleep_range(500, 1000);
-+ }
-+
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
-+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
-+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
-+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
-+
-+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
-+ m->sdmax_rlcx_rb_rptr_hi =
-+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
-+
-+ return 0;
-+}
-+
-+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
-+ uint8_t vmid)
-+{
-+ uint32_t reg;
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-+ + vmid);
-+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-+}
-+
-+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
-+ uint8_t vmid)
-+{
-+ uint32_t reg;
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+
-+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
-+ + vmid);
-+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
-+}
-+
-+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ uint32_t req = (1 << vmid) |
-+ (1 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* light */
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
-+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
-+
-+ spin_lock(&adev->tlb_invalidation_lock);
-+
-+ /* Use light weight invalidation.
-+ *
-+ * TODO 1: agree on the right set of invalidation registers for
-+ * KFD use. Use the last one for now. Invalidate both GC and
-+ * MMHUB.
-+ *
-+ * TODO 2: support range-based invalidation, requires kfg2kgd
-+ * interface change
-+ */
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
-+ 0xffffffff);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
-+ 0x0000001f);
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
-+ 0xffffffff);
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
-+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
-+ 0x0000001f);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
-+ req);
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
-+ (1 << vmid)))
-+ cpu_relax();
-+
-+ while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
-+ mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
-+ (1 << vmid)))
-+ cpu_relax();
-+
-+ spin_unlock(&adev->tlb_invalidation_lock);
-+
-+}
-+
-+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
-+{
-+ signed long r;
-+ struct dma_fence *f;
-+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-+
-+ mutex_lock(&adev->gfx.kiq.ring_mutex);
-+ amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
-+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
-+ amdgpu_ring_write(ring,
-+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
-+ PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
-+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(2));
-+ amdgpu_fence_emit(ring, &f);
-+ amdgpu_ring_commit(ring);
-+ mutex_unlock(&adev->gfx.kiq.ring_mutex);
-+
-+ r = dma_fence_wait(f, false);
-+ if (r)
-+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
-+ dma_fence_put(f);
-+
-+ return r;
-+}
-+
-+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ int vmid;
-+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
-+
-+ if (ring->ready)
-+ return invalidate_tlbs_with_kiq(adev, pasid);
-+
-+ for (vmid = 0; vmid < 16; vmid++) {
-+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
-+ continue;
-+ if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
-+ if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
-+ == pasid) {
-+ write_vmid_invalidate_request(kgd, vmid);
-+ break;
-+ }
-+ }
-+ }
-+
-+ return 0;
-+}
-+
-+static int kgd_address_watch_disable(struct kgd_dev *kgd)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ union TCP_WATCH_CNTL_BITS cntl;
-+ unsigned int i;
-+ uint32_t watch_base_addr;
-+
-+ cntl.u32All = 0;
-+
-+ cntl.bitfields.valid = 0;
-+ cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
-+ cntl.bitfields.atc = 1;
-+
-+ watch_base_addr = get_watch_base_addr();
-+ /* Turning off this address until we set all the registers */
-+ for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
-+ WREG32(watch_base_addr +
-+ watchRegs[i * ADDRESS_WATCH_REG_MAX +
-+ ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+
-+ return 0;
-+}
-+
-+static int kgd_address_watch_execute(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ uint32_t cntl_val,
-+ uint32_t addr_hi,
-+ uint32_t addr_lo)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ union TCP_WATCH_CNTL_BITS cntl;
-+ uint32_t watch_base_addr;
-+
-+ watch_base_addr = get_watch_base_addr();
-+ cntl.u32All = cntl_val;
-+
-+ /* Turning off this watch point until we set all the registers */
-+ cntl.bitfields.valid = 0;
-+ WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+
-+ WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_HI],
-+ addr_hi);
-+
-+ WREG32(watch_base_addr + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + ADDRESS_WATCH_REG_ADDR_LO],
-+ addr_lo);
-+
-+ /* Enable the watch point */
-+ cntl.bitfields.valid = 1;
-+
-+ WREG32(watch_base_addr +
-+ watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
-+ ADDRESS_WATCH_REG_CNTL],
-+ cntl.u32All);
-+
-+ return 0;
-+}
-+
-+static int kgd_wave_control_execute(struct kgd_dev *kgd,
-+ uint32_t gfx_index_val,
-+ uint32_t sq_cmd)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint32_t data = 0;
-+
-+ mutex_lock(&adev->grbm_idx_mutex);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
-+
-+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
-+ INSTANCE_BROADCAST_WRITES, 1);
-+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
-+ SH_BROADCAST_WRITES, 1);
-+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
-+ SE_BROADCAST_WRITES, 1);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
-+ mutex_unlock(&adev->grbm_idx_mutex);
-+
-+ return 0;
-+}
-+
-+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
-+ unsigned int watch_point_id,
-+ unsigned int reg_offset)
-+{
-+ return get_watch_base_addr() +
-+ watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
-+}
-+
-+static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype)
-+{
-+ /* No longer needed on GFXv9. These values are now hard-coded,
-+ * except for the MTYPE which comes from the page table.
-+ */
-+
-+ return 0;
-+}
-+static int alloc_memory_of_scratch(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid)
-+{
-+ /* No longer needed on GFXv9. The scratch base address is
-+ * passed to the shader by the CP. It's the user mode driver's
-+ * responsibility.
-+ */
-+
-+ return 0;
-+}
-+
-+/* FIXME: Does this need to be ASIC-specific code? */
-+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ const union amdgpu_firmware_header *hdr;
-+
-+ switch (type) {
-+ case KGD_ENGINE_PFP:
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_ME:
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_CE:
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_MEC1:
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_MEC2:
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_RLC:
-+ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
-+ break;
-+
-+ case KGD_ENGINE_SDMA1:
-+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
-+ break;
-+
-+ case KGD_ENGINE_SDMA2:
-+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
-+ break;
-+
-+ default:
-+ return 0;
-+ }
-+
-+ if (hdr == NULL)
-+ return 0;
-+
-+ /* Only 12 bit in use*/
-+ return hdr->common.ucode_version;
-+}
-+
-+static void set_num_of_requests(struct kgd_dev *kgd,
-+ uint8_t num_of_requests)
-+{
-+ pr_debug("This is a stub\n");
-+}
-+
-+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base)
-+{
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+ uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
-+ AMDGPU_PTE_VALID;
-+
-+ /* TODO: Don't use hardcoded VMIDs */
-+ if (vmid < 8 || vmid > 15) {
-+ pr_err("trying to set page table base for wrong VMID %u\n",
-+ vmid);
-+ return;
-+ }
-+
-+ /* TODO: take advantage of per-process address space size. For
-+ * now, all processes share the same address space size, like
-+ * on GFX8 and older.
-+ */
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
-+ lower_32_bits(adev->vm_manager.max_pfn - 1));
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
-+ upper_32_bits(adev->vm_manager.max_pfn - 1));
-+
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
-+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
-+ lower_32_bits(adev->vm_manager.max_pfn - 1));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
-+ upper_32_bits(adev->vm_manager.max_pfn - 1));
-+
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
-+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
-+}
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-new file mode 100644
-index 0000000..7df892d
---- /dev/null
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-@@ -0,0 +1,2578 @@
-+/*
-+ * Copyright 2014 Advanced Micro Devices, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#undef pr_fmt
-+#define pr_fmt(fmt) "kfd2kgd: " fmt
-+
-+#include <linux/module.h>
-+#include <linux/fdtable.h>
-+#include <linux/uaccess.h>
-+#include <linux/firmware.h>
-+#include <linux/list.h>
-+#include <linux/sched/mm.h>
-+#include <drm/drmP.h>
-+#include <linux/dma-buf.h>
-+#include <linux/pagemap.h>
-+#include "amdgpu_amdkfd.h"
-+#include "amdgpu_ucode.h"
-+#include "gca/gfx_8_0_sh_mask.h"
-+#include "gca/gfx_8_0_d.h"
-+#include "gca/gfx_8_0_enum.h"
-+#include "oss/oss_3_0_sh_mask.h"
-+#include "oss/oss_3_0_d.h"
-+#include "gmc/gmc_8_1_sh_mask.h"
-+#include "gmc/gmc_8_1_d.h"
-+
-+/* Special VM and GART address alignment needed for VI pre-Fiji due to
-+ * a HW bug.
-+ */
-+#define VI_BO_SIZE_ALIGN (0x8000)
-+
-+/* BO flag to indicate a KFD userptr BO */
-+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
-+
-+/* Impose limit on how much memory KFD can use */
-+struct kfd_mem_usage_limit {
-+ uint64_t max_system_mem_limit;
-+ uint64_t max_userptr_mem_limit;
-+ int64_t system_mem_used;
-+ int64_t userptr_mem_used;
-+ spinlock_t mem_limit_lock;
-+};
-+
-+static struct kfd_mem_usage_limit kfd_mem_limit;
-+
-+/* Struct used for amdgpu_amdkfd_bo_validate */
-+struct amdgpu_vm_parser {
-+ uint32_t domain;
-+ bool wait;
-+};
-+
-+static const char * const domain_bit_to_string[] = {
-+ "CPU",
-+ "GTT",
-+ "VRAM",
-+ "GDS",
-+ "GWS",
-+ "OA"
-+};
-+
-+#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
-+
-+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
-+
-+
-+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
-+{
-+ return (struct amdgpu_device *)kgd;
-+}
-+
-+static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
-+ struct kgd_mem *mem)
-+{
-+ struct kfd_bo_va_list *entry;
-+
-+ list_for_each_entry(entry, &mem->bo_va_list, bo_list)
-+ if (entry->bo_va->base.vm == avm)
-+ return false;
-+
-+ return true;
-+}
-+
-+/* Set memory usage limits. Current, limits are
-+ * System (kernel) memory - 15/16th System RAM
-+ * Userptr memory - 15/16th System RAM
-+ */
-+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
-+{
-+ struct sysinfo si;
-+ uint64_t mem;
-+
-+ si_meminfo(&si);
-+ mem = si.totalram - si.totalhigh;
-+ mem *= si.mem_unit;
-+
-+ spin_lock_init(&kfd_mem_limit.mem_limit_lock);
-+ kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); /* 15/16 */
-+ kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 4); /* 15/16 */
-+ pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
-+ (kfd_mem_limit.max_system_mem_limit >> 20),
-+ (kfd_mem_limit.max_userptr_mem_limit >> 20));
-+}
-+
-+static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
-+ uint64_t size, u32 domain)
-+{
-+ size_t acc_size;
-+ int ret = 0;
-+
-+ acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
-+ sizeof(struct amdgpu_bo));
-+
-+ spin_lock(&kfd_mem_limit.mem_limit_lock);
-+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-+ if (kfd_mem_limit.system_mem_used + (acc_size + size) >
-+ kfd_mem_limit.max_system_mem_limit) {
-+ ret = -ENOMEM;
-+ goto err_no_mem;
-+ }
-+ kfd_mem_limit.system_mem_used += (acc_size + size);
-+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
-+ if ((kfd_mem_limit.system_mem_used + acc_size >
-+ kfd_mem_limit.max_system_mem_limit) ||
-+ (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
-+ kfd_mem_limit.max_userptr_mem_limit)) {
-+ ret = -ENOMEM;
-+ goto err_no_mem;
-+ }
-+ kfd_mem_limit.system_mem_used += acc_size;
-+ kfd_mem_limit.userptr_mem_used += size;
-+ }
-+err_no_mem:
-+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
-+ return ret;
-+}
-+
-+static void unreserve_system_mem_limit(struct amdgpu_device *adev,
-+ uint64_t size, u32 domain)
-+{
-+ size_t acc_size;
-+
-+ acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
-+ sizeof(struct amdgpu_bo));
-+
-+ spin_lock(&kfd_mem_limit.mem_limit_lock);
-+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-+ kfd_mem_limit.system_mem_used -= (acc_size + size);
-+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
-+ kfd_mem_limit.system_mem_used -= acc_size;
-+ kfd_mem_limit.userptr_mem_used -= size;
-+ }
-+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
-+ "kfd system memory accounting unbalanced");
-+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
-+ "kfd userptr memory accounting unbalanced");
-+
-+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
-+}
-+
-+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
-+{
-+ spin_lock(&kfd_mem_limit.mem_limit_lock);
-+
-+ if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
-+ kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
-+ kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
-+ } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
-+ kfd_mem_limit.system_mem_used -=
-+ (bo->tbo.acc_size + amdgpu_bo_size(bo));
-+ }
-+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
-+ "kfd system memory accounting unbalanced");
-+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
-+ "kfd userptr memory accounting unbalanced");
-+
-+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
-+}
-+
-+
-+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
-+ * reservation object.
-+ *
-+ * @bo: [IN] Remove eviction fence(s) from this BO
-+ * @ef: [IN] If ef is specified, then this eviction fence is removed if it
-+ * is present in the shared list.
-+ * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
-+ * from BO's reservation object shared list.
-+ * @ef_count: [OUT] Number of fences in ef_list.
-+ *
-+ * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
-+ * called to restore the eviction fences and to avoid memory leak. This is
-+ * useful for shared BOs.
-+ * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
-+ */
-+static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
-+ struct amdgpu_amdkfd_fence *ef,
-+ struct amdgpu_amdkfd_fence ***ef_list,
-+ unsigned int *ef_count)
-+{
-+ struct reservation_object_list *fobj;
-+ struct reservation_object *resv;
-+ unsigned int i = 0, j = 0, k = 0, shared_count;
-+ unsigned int count = 0;
-+ struct amdgpu_amdkfd_fence **fence_list;
-+
-+ if (!ef && !ef_list)
-+ return -EINVAL;
-+
-+ if (ef_list) {
-+ *ef_list = NULL;
-+ *ef_count = 0;
-+ }
-+
-+ resv = bo->tbo.resv;
-+ fobj = reservation_object_get_list(resv);
-+
-+ if (!fobj)
-+ return 0;
-+
-+ preempt_disable();
-+ write_seqcount_begin(&resv->seq);
-+
-+ /* Go through all the shared fences in the resevation object. If
-+ * ef is specified and it exists in the list, remove it and reduce the
-+ * count. If ef is not specified, then get the count of eviction fences
-+ * present.
-+ */
-+ shared_count = fobj->shared_count;
-+ for (i = 0; i < shared_count; ++i) {
-+ struct dma_fence *f;
-+
-+ f = rcu_dereference_protected(fobj->shared[i],
-+ reservation_object_held(resv));
-+
-+ if (ef) {
-+ if (f->context == ef->base.context) {
-+ dma_fence_put(f);
-+ fobj->shared_count--;
-+ } else
-+ RCU_INIT_POINTER(fobj->shared[j++], f);
-+
-+ } else if (to_amdgpu_amdkfd_fence(f))
-+ count++;
-+ }
-+ write_seqcount_end(&resv->seq);
-+ preempt_enable();
-+
-+ if (ef || !count)
-+ return 0;
-+
-+ /* Alloc memory for count number of eviction fence pointers. Fill the
-+ * ef_list array and ef_count
-+ */
-+
-+ fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
-+ GFP_KERNEL);
-+ if (!fence_list)
-+ return -ENOMEM;
-+
-+ preempt_disable();
-+ write_seqcount_begin(&resv->seq);
-+
-+ j = 0;
-+ for (i = 0; i < shared_count; ++i) {
-+ struct dma_fence *f;
-+ struct amdgpu_amdkfd_fence *efence;
-+
-+ f = rcu_dereference_protected(fobj->shared[i],
-+ reservation_object_held(resv));
-+
-+ efence = to_amdgpu_amdkfd_fence(f);
-+ if (efence) {
-+ fence_list[k++] = efence;
-+ fobj->shared_count--;
-+ } else
-+ RCU_INIT_POINTER(fobj->shared[j++], f);
-+ }
-+
-+ write_seqcount_end(&resv->seq);
-+ preempt_enable();
-+
-+ *ef_list = fence_list;
-+ *ef_count = k;
-+
-+ return 0;
-+}
-+
-+/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
-+ * reservation object.
-+ *
-+ * @bo: [IN] Add eviction fences to this BO
-+ * @ef_list: [IN] List of eviction fences to be added
-+ * @ef_count: [IN] Number of fences in ef_list.
-+ *
-+ * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
-+ * function.
-+ */
-+static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
-+ struct amdgpu_amdkfd_fence **ef_list,
-+ unsigned int ef_count)
-+{
-+ int i;
-+
-+ if (!ef_list || !ef_count)
-+ return;
-+
-+ for (i = 0; i < ef_count; i++) {
-+ amdgpu_bo_fence(bo, &ef_list[i]->base, true);
-+ /* Readding the fence takes an additional reference. Drop that
-+ * reference.
-+ */
-+ dma_fence_put(&ef_list[i]->base);
-+ }
-+
-+ kfree(ef_list);
-+}
-+
-+static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
-+ bool wait)
-+{
-+ int ret;
-+
-+ if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
-+ "Called with userptr BO"))
-+ return -EINVAL;
-+
-+ amdgpu_ttm_placement_from_domain(bo, domain);
-+
-+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
-+ if (ret)
-+ goto validate_fail;
-+ if (wait) {
-+ struct amdgpu_amdkfd_fence **ef_list;
-+ unsigned int ef_count;
-+
-+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
-+ &ef_count);
-+ if (ret)
-+ goto validate_fail;
-+
-+ ttm_bo_wait(&bo->tbo, false, false);
-+ amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
-+ }
-+
-+validate_fail:
-+ return ret;
-+}
-+
-+static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
-+{
-+ struct amdgpu_vm_parser *p = param;
-+
-+ return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
-+}
-+
-+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
-+ *
-+ * Also updates page directory entries so we don't need to do this
-+ * again later until the page directory is validated again (e.g. after
-+ * an eviction or allocating new page tables).
-+ */
-+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
-+{
-+ struct amdgpu_bo *pd = vm->root.base.bo;
-+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
-+ struct amdgpu_vm_parser param;
-+ int ret;
-+
-+ param.domain = AMDGPU_GEM_DOMAIN_VRAM;
-+ param.wait = false;
-+
-+ ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
-+ &param);
-+ if (ret) {
-+ pr_err("amdgpu: failed to validate PT BOs\n");
-+ return ret;
-+ }
-+
-+ ret = amdgpu_amdkfd_validate(&param, pd);
-+ if (ret) {
-+ pr_err("amdgpu: failed to validate PD\n");
-+ return ret;
-+ }
-+
-+ ret = amdgpu_vm_update_directories(adev, vm);
-+ if (ret != 0)
-+ return ret;
-+
-+ return 0;
-+}
-+
-+/* add_bo_to_vm - Add a BO to a VM
-+ *
-+ * Everything that needs to bo done only once when a BO is first added
-+ * to a VM. It can later be mapped and unmapped many times without
-+ * repeating these steps.
-+ *
-+ * 1. Allocate and initialize BO VA entry data structure
-+ * 2. Add BO to the VM
-+ * 3. Determine ASIC-specific PTE flags
-+ * 4. Alloc page tables and directories if needed
-+ * 4a. Validate new page tables and directories and update directories
-+ */
-+static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
-+ struct amdgpu_vm *avm, bool is_aql,
-+ struct kfd_bo_va_list **p_bo_va_entry)
-+{
-+ int ret;
-+ struct kfd_bo_va_list *bo_va_entry;
-+ struct amdkfd_vm *kvm = container_of(avm,
-+ struct amdkfd_vm, base);
-+ struct amdgpu_bo *pd = avm->root.base.bo;
-+ struct amdgpu_bo *bo = mem->bo;
-+ uint64_t va = mem->va;
-+ struct list_head *list_bo_va = &mem->bo_va_list;
-+ unsigned long bo_size = bo->tbo.mem.size;
-+
-+ if (!va) {
-+ pr_err("Invalid VA when adding BO to VM\n");
-+ return -EINVAL;
-+ }
-+
-+ if (is_aql)
-+ va += bo_size;
-+
-+ bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
-+ if (!bo_va_entry)
-+ return -ENOMEM;
-+
-+ pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-+ va + bo_size, avm);
-+
-+ /* Add BO to VM internal data structures*/
-+ bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
-+ if (bo_va_entry->bo_va == NULL) {
-+ ret = -EINVAL;
-+ pr_err("Failed to add BO object to VM. ret == %d\n",
-+ ret);
-+ goto err_vmadd;
-+ }
-+
-+ bo_va_entry->va = va;
-+ bo_va_entry->pte_flags = amdgpu_vm_get_pte_flags(adev,
-+ mem->mapping_flags);
-+ bo_va_entry->kgd_dev = (void *)adev;
-+ list_add(&bo_va_entry->bo_list, list_bo_va);
-+
-+ if (p_bo_va_entry)
-+ *p_bo_va_entry = bo_va_entry;
-+
-+ /* Allocate new page tables if neeeded and validate
-+ * them. Clearing of new page tables and validate need to wait
-+ * on move fences. We don't want that to trigger the eviction
-+ * fence, so remove it temporarily.
-+ */
-+ amdgpu_amdkfd_remove_eviction_fence(pd,
-+ kvm->process_info->eviction_fence,
-+ NULL, NULL);
-+
-+ ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
-+ if (ret) {
-+ pr_err("Failed to allocate pts, err=%d\n", ret);
-+ goto err_alloc_pts;
-+ }
-+
-+ ret = vm_validate_pt_pd_bos(avm);
-+ if (ret != 0) {
-+ pr_err("validate_pt_pd_bos() failed\n");
-+ goto err_alloc_pts;
-+ }
-+
-+ /* Add the eviction fence back */
-+ amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
-+
-+ return 0;
-+
-+err_alloc_pts:
-+ amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
-+ amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
-+ list_del(&bo_va_entry->bo_list);
-+err_vmadd:
-+ kfree(bo_va_entry);
-+ return ret;
-+}
-+
-+static void remove_bo_from_vm(struct amdgpu_device *adev,
-+ struct kfd_bo_va_list *entry, unsigned long size)
-+{
-+ pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
-+ entry->va,
-+ entry->va + size, entry);
-+ amdgpu_vm_bo_rmv(adev, entry->bo_va);
-+ list_del(&entry->bo_list);
-+ kfree(entry);
-+}
-+
-+static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
-+ struct amdkfd_process_info *process_info,
-+ bool userptr)
-+{
-+ struct ttm_validate_buffer *entry = &mem->validate_list;
-+ struct amdgpu_bo *bo = mem->bo;
-+
-+ INIT_LIST_HEAD(&entry->head);
-+ entry->shared = true;
-+ entry->bo = &bo->tbo;
-+ mutex_lock(&process_info->lock);
-+ if (userptr)
-+ list_add_tail(&entry->head, &process_info->userptr_valid_list);
-+ else
-+ list_add_tail(&entry->head, &process_info->kfd_bo_list);
-+ mutex_unlock(&process_info->lock);
-+}
-+
-+/* Initializes user pages. It registers the MMU notifier and validates
-+ * the userptr BO in the GTT domain.
-+ *
-+ * The BO must already be on the userptr_valid_list. Otherwise an
-+ * eviction and restore may happen that leaves the new BO unmapped
-+ * with the user mode queues running.
-+ *
-+ * Takes the process_info->lock to protect against concurrent restore
-+ * workers.
-+ *
-+ * Returns 0 for success, negative errno for errors.
-+ */
-+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
-+ uint64_t user_addr)
-+{
-+ struct amdkfd_process_info *process_info = mem->process_info;
-+ struct amdgpu_bo *bo = mem->bo;
-+ int ret = 0;
-+
-+ mutex_lock(&process_info->lock);
-+
-+ ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
-+ if (ret) {
-+ pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
-+ goto out;
-+ }
-+
-+ ret = amdgpu_mn_register(bo, user_addr);
-+ if (ret) {
-+ pr_err("%s: Failed to register MMU notifier: %d\n",
-+ __func__, ret);
-+ goto out;
-+ }
-+
-+ /* If no restore worker is running concurrently, user_pages
-+ * should not be allocated
-+ */
-+ WARN(mem->user_pages, "Leaking user_pages array");
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-+ mem->user_pages = drm_calloc_large(bo->tbo.ttm->num_pages,
-+ sizeof(struct page *));
-+#else
-+ mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-+ sizeof(struct page *),
-+ GFP_KERNEL | __GFP_ZERO);
-+#endif
-+ if (!mem->user_pages) {
-+ pr_err("%s: Failed to allocate pages array\n", __func__);
-+ ret = -ENOMEM;
-+ goto unregister_out;
-+ }
-+
-+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
-+ if (ret) {
-+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-+ goto free_out;
-+ }
-+
-+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-+
-+ ret = amdgpu_bo_reserve(bo, true);
-+ if (ret) {
-+ pr_err("%s: Failed to reserve BO\n", __func__);
-+ goto release_out;
-+ }
-+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
-+ ret = ttm_bo_validate(&bo->tbo, &bo->placement,
-+ true, false);
-+ if (ret)
-+ pr_err("%s: failed to validate BO\n", __func__);
-+ amdgpu_bo_unreserve(bo);
-+
-+release_out:
-+ if (ret)
-+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages, 0);
-+free_out:
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-+ drm_free_large(mem->user_pages);
-+#else
-+ kvfree(mem->user_pages);
-+#endif
-+ mem->user_pages = NULL;
-+unregister_out:
-+ if (ret)
-+ amdgpu_mn_unregister(bo);
-+out:
-+ mutex_unlock(&process_info->lock);
-+ return ret;
-+}
-+
-+static int __map_bo_to_kernel(struct amdgpu_bo *bo, u32 domain, void **kptr)
-+{
-+ int ret;
-+
-+ ret = amdgpu_bo_reserve(bo, true);
-+ if (ret) {
-+ pr_err("Failed to reserve bo. ret %d\n", ret);
-+ return ret;
-+ }
-+
-+ ret = amdgpu_bo_pin(bo, domain, NULL);
-+ if (ret) {
-+ pr_err("Failed to pin bo. ret %d\n", ret);
-+ goto pin_failed;
-+ }
-+
-+ ret = amdgpu_bo_kmap(bo, kptr);
-+ if (ret) {
-+ pr_err("Failed to map bo to kernel. ret %d\n", ret);
-+ goto kmap_failed;
-+ }
-+
-+ amdgpu_bo_unreserve(bo);
-+
-+ return ret;
-+
-+kmap_failed:
-+ amdgpu_bo_unpin(bo);
-+pin_failed:
-+ amdgpu_bo_unreserve(bo);
-+
-+ return ret;
-+}
-+
-+static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
-+ uint64_t size, void *vm, struct kgd_mem **mem,
-+ uint64_t *offset, u32 domain, u64 flags,
-+ struct sg_table *sg, bool aql_queue,
-+ bool readonly, bool execute, bool coherent, bool no_sub,
-+ bool userptr)
-+{
-+ struct amdgpu_device *adev;
-+ int ret;
-+ struct amdgpu_bo *bo;
-+ uint64_t user_addr = 0;
-+ int byte_align;
-+ u32 alloc_domain;
-+ uint32_t mapping_flags;
-+ struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
-+
-+ if (aql_queue)
-+ size = size >> 1;
-+ if (userptr) {
-+ if (!offset || !*offset)
-+ return -EINVAL;
-+ user_addr = *offset;
-+ }
-+
-+ adev = get_amdgpu_device(kgd);
-+ byte_align = (adev->family == AMDGPU_FAMILY_VI &&
-+ adev->asic_type != CHIP_FIJI &&
-+ adev->asic_type != CHIP_POLARIS10 &&
-+ adev->asic_type != CHIP_POLARIS11) ?
-+ VI_BO_SIZE_ALIGN : 1;
-+
-+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-+ if (*mem == NULL) {
-+ ret = -ENOMEM;
-+ goto err;
-+ }
-+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
-+ mutex_init(&(*mem)->lock);
-+ (*mem)->coherent = coherent;
-+ (*mem)->no_substitute = no_sub;
-+ (*mem)->aql_queue = aql_queue;
-+
-+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
-+ if (!readonly)
-+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
-+ if (execute)
-+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
-+ if (coherent)
-+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
-+ else
-+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
-+
-+ (*mem)->mapping_flags = mapping_flags;
-+
-+ alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain;
-+
-+ amdgpu_sync_create(&(*mem)->sync);
-+
-+ ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
-+ if (ret) {
-+ pr_err("Insufficient system memory\n");
-+ goto err_bo_create;
-+ }
-+
-+ pr_debug("\t create BO VA 0x%llx size 0x%llx domain %s\n",
-+ va, size, domain_string(alloc_domain));
-+
-+ /* Allocate buffer object. Userptr objects need to start out
-+ * in the CPU domain, get moved to GTT when pinned.
-+ */
-+ ret = amdgpu_bo_create(adev, size, byte_align, false,
-+ alloc_domain,
-+ flags, sg, NULL, 0, &bo);
-+ if (ret != 0) {
-+ pr_err("Failed to create BO on domain %s. ret %d\n",
-+ domain_string(alloc_domain), ret);
-+ unreserve_system_mem_limit(adev, size, alloc_domain);
-+ goto err_bo_create;
-+ }
-+ bo->kfd_bo = *mem;
-+ (*mem)->bo = bo;
-+ if (userptr)
-+ bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
-+
-+ (*mem)->va = va;
-+ (*mem)->domain = domain;
-+ (*mem)->mapped_to_gpu_memory = 0;
-+ (*mem)->process_info = kfd_vm->process_info;
-+ add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, userptr);
-+
-+ if (userptr) {
-+ ret = init_user_pages(*mem, current->mm, user_addr);
-+ if (ret) {
-+ mutex_lock(&kfd_vm->process_info->lock);
-+ list_del(&(*mem)->validate_list.head);
-+ mutex_unlock(&kfd_vm->process_info->lock);
-+ goto allocate_init_user_pages_failed;
-+ }
-+ }
-+
-+ if (offset)
-+ *offset = amdgpu_bo_mmap_offset(bo);
-+
-+ return 0;
-+
-+allocate_init_user_pages_failed:
-+ amdgpu_bo_unref(&bo);
-+err_bo_create:
-+ kfree(*mem);
-+err:
-+ return ret;
-+}
-+
-+/* Reserving a BO and its page table BOs must happen atomically to
-+ * avoid deadlocks. When updating userptrs we need to temporarily
-+ * back-off the reservation and then reacquire it. Track all the
-+ * reservation info in a context structure. Buffers can be mapped to
-+ * multiple VMs simultaneously (buffers being restored on multiple
-+ * GPUs).
-+ */
-+struct bo_vm_reservation_context {
-+ struct amdgpu_bo_list_entry kfd_bo;
-+ unsigned int n_vms;
-+ struct amdgpu_bo_list_entry *vm_pd;
-+ struct ww_acquire_ctx ticket;
-+ struct list_head list, duplicates;
-+ struct amdgpu_sync *sync;
-+ bool reserved;
-+};
-+
-+/**
-+ * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
-+ * @mem: KFD BO structure.
-+ * @vm: the VM to reserve.
-+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
-+ */
-+static int reserve_bo_and_vm(struct kgd_mem *mem,
-+ struct amdgpu_vm *vm,
-+ struct bo_vm_reservation_context *ctx)
-+{
-+ struct amdgpu_bo *bo = mem->bo;
-+ int ret;
-+
-+ WARN_ON(!vm);
-+
-+ ctx->reserved = false;
-+ ctx->n_vms = 1;
-+ ctx->sync = &mem->sync;
-+
-+ INIT_LIST_HEAD(&ctx->list);
-+ INIT_LIST_HEAD(&ctx->duplicates);
-+
-+ ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry)
-+ * ctx->n_vms, GFP_KERNEL);
-+ if (ctx->vm_pd == NULL)
-+ return -ENOMEM;
-+
-+ ctx->kfd_bo.robj = bo;
-+ ctx->kfd_bo.priority = 0;
-+ ctx->kfd_bo.tv.bo = &bo->tbo;
-+ ctx->kfd_bo.tv.shared = true;
-+ ctx->kfd_bo.user_pages = NULL;
-+ list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-+
-+ amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
-+
-+ ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-+ false, &ctx->duplicates);
-+ if (!ret)
-+ ctx->reserved = true;
-+ else
-+ pr_err("Failed to reserve buffers in ttm\n");
-+
-+ if (ret) {
-+ kfree(ctx->vm_pd);
-+ ctx->vm_pd = NULL;
-+ }
-+
-+ return ret;
-+}
-+
-+enum VA_TYPE {
-+ VA_NOT_MAPPED = 0,
-+ VA_MAPPED,
-+ VA_DO_NOT_CARE,
-+};
-+
-+/**
-+ * reserve_bo_and_vm - reserve a BO and some VMs that the BO has been added
-+ * to, conditionally based on map_type.
-+ * @mem: KFD BO structure.
-+ * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
-+ * is used. Otherwise, a single VM associated with the BO.
-+ * @map_type: the mapping status that will be used to filter the VMs.
-+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
-+ */
-+static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
-+ struct amdgpu_vm *vm, enum VA_TYPE map_type,
-+ struct bo_vm_reservation_context *ctx)
-+{
-+ struct amdgpu_bo *bo = mem->bo;
-+ struct kfd_bo_va_list *entry;
-+ unsigned int i;
-+ int ret;
-+
-+ ctx->reserved = false;
-+ ctx->n_vms = 0;
-+ ctx->vm_pd = NULL;
-+ ctx->sync = &mem->sync;
-+
-+ INIT_LIST_HEAD(&ctx->list);
-+ INIT_LIST_HEAD(&ctx->duplicates);
-+
-+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-+ if ((vm && vm != entry->bo_va->base.vm) ||
-+ (entry->is_mapped != map_type
-+ && map_type != VA_DO_NOT_CARE))
-+ continue;
-+
-+ ctx->n_vms++;
-+ }
-+
-+ if (ctx->n_vms != 0) {
-+ ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry)
-+ * ctx->n_vms, GFP_KERNEL);
-+ if (ctx->vm_pd == NULL)
-+ return -ENOMEM;
-+ }
-+
-+ ctx->kfd_bo.robj = bo;
-+ ctx->kfd_bo.priority = 0;
-+ ctx->kfd_bo.tv.bo = &bo->tbo;
-+ ctx->kfd_bo.tv.shared = true;
-+ ctx->kfd_bo.user_pages = NULL;
-+ list_add(&ctx->kfd_bo.tv.head, &ctx->list);
-+
-+ i = 0;
-+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-+ if ((vm && vm != entry->bo_va->base.vm) ||
-+ (entry->is_mapped != map_type
-+ && map_type != VA_DO_NOT_CARE))
-+ continue;
-+
-+ amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
-+ &ctx->vm_pd[i]);
-+ i++;
-+ }
-+
-+ ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-+ false, &ctx->duplicates);
-+ if (!ret)
-+ ctx->reserved = true;
-+ else
-+ pr_err("Failed to reserve buffers in ttm.\n");
-+
-+ if (ret) {
-+ kfree(ctx->vm_pd);
-+ ctx->vm_pd = NULL;
-+ }
-+
-+ return ret;
-+}
-+
-+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
-+ bool wait, bool intr)
-+{
-+ int ret = 0;
-+
-+ if (wait)
-+ ret = amdgpu_sync_wait(ctx->sync, intr);
-+
-+ if (ctx->reserved)
-+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
-+ kfree(ctx->vm_pd);
-+
-+ ctx->sync = NULL;
-+
-+ ctx->reserved = false;
-+ ctx->vm_pd = NULL;
-+
-+ return ret;
-+}
-+
-+static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
-+ struct kfd_bo_va_list *entry,
-+ struct amdgpu_sync *sync)
-+{
-+ struct amdgpu_bo_va *bo_va = entry->bo_va;
-+ struct amdgpu_vm *vm = bo_va->base.vm;
-+ struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
-+ struct amdgpu_bo *pd = vm->root.base.bo;
-+
-+ /* Remove eviction fence from PD (and thereby from PTs too as they
-+ * share the resv. object. Otherwise during PT update job (see
-+ * amdgpu_vm_bo_update_mapping), eviction fence will get added to
-+ * job->sync object
-+ */
-+ amdgpu_amdkfd_remove_eviction_fence(pd,
-+ kvm->process_info->eviction_fence,
-+ NULL, NULL);
-+ amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
-+
-+ amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
-+
-+ /* Add the eviction fence back */
-+ amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
-+
-+ amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
-+
-+ /* Sync objects can't handle multiple GPUs (contexts) updating
-+ * sync->last_vm_update. Fortunately we don't need it for
-+ * KFD's purposes, so we can just drop that fence.
-+ */
-+ if (sync->last_vm_update) {
-+ dma_fence_put(sync->last_vm_update);
-+ sync->last_vm_update = NULL;
-+ }
-+
-+ return 0;
-+}
-+
-+static int update_gpuvm_pte(struct amdgpu_device *adev,
-+ struct kfd_bo_va_list *entry,
-+ struct amdgpu_sync *sync)
-+{
-+ int ret;
-+ struct amdgpu_vm *vm;
-+ struct amdgpu_bo_va *bo_va;
-+ struct amdgpu_bo *bo;
-+
-+ bo_va = entry->bo_va;
-+ vm = bo_va->base.vm;
-+ bo = bo_va->base.bo;
-+
-+ /* Update the page tables */
-+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
-+ if (ret != 0) {
-+ pr_err("amdgpu_vm_bo_update failed\n");
-+ return ret;
-+ }
-+
-+ amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
-+
-+ /* Sync objects can't handle multiple GPUs (contexts) updating
-+ * sync->last_vm_update. Fortunately we don't need it for
-+ * KFD's purposes, so we can just drop that fence.
-+ */
-+ if (sync->last_vm_update) {
-+ dma_fence_put(sync->last_vm_update);
-+ sync->last_vm_update = NULL;
-+ }
-+
-+ return 0;
-+}
-+
-+static int map_bo_to_gpuvm(struct amdgpu_device *adev,
-+ struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
-+ bool no_update_pte)
-+{
-+ int ret;
-+
-+ /* Set virtual address for the allocation */
-+ ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
-+ amdgpu_bo_size(entry->bo_va->base.bo), entry->pte_flags);
-+ if (ret != 0) {
-+ pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
-+ entry->va, ret);
-+ return ret;
-+ }
-+
-+ if (no_update_pte)
-+ return 0;
-+
-+ ret = update_gpuvm_pte(adev, entry, sync);
-+ if (ret != 0) {
-+ pr_err("update_gpuvm_pte() failed\n");
-+ goto update_gpuvm_pte_failed;
-+ }
-+
-+ return 0;
-+
-+update_gpuvm_pte_failed:
-+ unmap_bo_from_gpuvm(adev, entry, sync);
-+ return ret;
-+}
-+
-+static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
-+{
-+ struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-+
-+ if (!sg)
-+ return NULL;
-+ if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
-+ kfree(sg);
-+ return NULL;
-+ }
-+ sg->sgl->dma_address = addr;
-+ sg->sgl->length = size;
-+#ifdef CONFIG_NEED_SG_DMA_LENGTH
-+ sg->sgl->dma_length = size;
-+#endif
-+ return sg;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_sync_memory(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
-+{
-+ int ret = 0;
-+ struct amdgpu_sync sync;
-+ struct amdgpu_device *adev;
-+
-+ adev = get_amdgpu_device(kgd);
-+ amdgpu_sync_create(&sync);
-+
-+ mutex_lock(&mem->lock);
-+ amdgpu_sync_clone(adev, &mem->sync, &sync);
-+ mutex_unlock(&mem->lock);
-+
-+ ret = amdgpu_sync_wait(&sync, intr);
-+ amdgpu_sync_free(&sync);
-+ return ret;
-+}
-+
-+#define BOOL_TO_STR(b) (b == true) ? "true" : "false"
-+
-+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
-+ struct kgd_dev *kgd, uint64_t va, uint64_t size,
-+ void *vm, struct kgd_mem **mem,
-+ uint64_t *offset, uint32_t flags)
-+{
-+ bool aql_queue, public, readonly, execute, coherent, no_sub, userptr;
-+ u64 alloc_flag;
-+ uint32_t domain;
-+ uint64_t *temp_offset;
-+ struct sg_table *sg = NULL;
-+
-+ if (!(flags & ALLOC_MEM_FLAGS_NONPAGED)) {
-+ pr_err("current hw doesn't support paged memory\n");
-+ return -EINVAL;
-+ }
-+
-+ domain = 0;
-+ alloc_flag = 0;
-+ temp_offset = NULL;
-+
-+ aql_queue = (flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) ? true : false;
-+ public = (flags & ALLOC_MEM_FLAGS_PUBLIC) ? true : false;
-+ readonly = (flags & ALLOC_MEM_FLAGS_READONLY) ? true : false;
-+ execute = (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS) ? true : false;
-+ coherent = (flags & ALLOC_MEM_FLAGS_COHERENT) ? true : false;
-+ no_sub = (flags & ALLOC_MEM_FLAGS_NO_SUBSTITUTE) ? true : false;
-+ userptr = (flags & ALLOC_MEM_FLAGS_USERPTR) ? true : false;
-+
-+ /*
-+ * Check on which domain to allocate BO
-+ */
-+ if (flags & ALLOC_MEM_FLAGS_VRAM) {
-+ domain = AMDGPU_GEM_DOMAIN_VRAM;
-+ alloc_flag = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-+ if (public) {
-+ alloc_flag = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-+ temp_offset = offset;
-+ }
-+ alloc_flag |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
-+ } else if (flags & (ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_USERPTR)) {
-+ domain = AMDGPU_GEM_DOMAIN_GTT;
-+ alloc_flag = 0;
-+ temp_offset = offset;
-+ } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
-+ domain = AMDGPU_GEM_DOMAIN_GTT;
-+ alloc_flag = 0;
-+ temp_offset = offset;
-+ if (size > UINT_MAX)
-+ return -EINVAL;
-+ sg = create_doorbell_sg(*offset, size);
-+ if (!sg)
-+ return -ENOMEM;
-+ }
-+
-+ if (offset && !userptr)
-+ *offset = 0;
-+
-+ pr_debug("Allocate VA 0x%llx - 0x%llx domain %s aql %s\n",
-+ va, va + size, domain_string(domain),
-+ BOOL_TO_STR(aql_queue));
-+
-+ pr_debug("\t alloc_flag 0x%llx public %s readonly %s execute %s coherent %s no_sub %s\n",
-+ alloc_flag, BOOL_TO_STR(public),
-+ BOOL_TO_STR(readonly), BOOL_TO_STR(execute),
-+ BOOL_TO_STR(coherent), BOOL_TO_STR(no_sub));
-+
-+ return __alloc_memory_of_gpu(kgd, va, size, vm, mem,
-+ temp_offset, domain,
-+ alloc_flag, sg,
-+ aql_queue, readonly, execute,
-+ coherent, no_sub, userptr);
-+}
-+
-+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
-+{
-+ struct amdgpu_device *adev;
-+ struct kfd_bo_va_list *entry, *tmp;
-+ struct bo_vm_reservation_context ctx;
-+ int ret = 0;
-+ struct ttm_validate_buffer *bo_list_entry;
-+ struct amdkfd_process_info *process_info;
-+ unsigned long bo_size;
-+
-+ adev = get_amdgpu_device(kgd);
-+ process_info = ((struct amdkfd_vm *)vm)->process_info;
-+
-+ bo_size = mem->bo->tbo.mem.size;
-+
-+ mutex_lock(&mem->lock);
-+
-+ if (mem->mapped_to_gpu_memory > 0) {
-+ pr_err("BO VA 0x%llx size 0x%lx is already mapped to vm %p.\n",
-+ mem->va, bo_size, vm);
-+ mutex_unlock(&mem->lock);
-+ return -EBUSY;
-+ }
-+
-+ mutex_unlock(&mem->lock);
-+ /* lock is not needed after this, since mem is unused and will
-+ * be freed anyway
-+ */
-+
-+ /* No more MMU notifiers */
-+ amdgpu_mn_unregister(mem->bo);
-+
-+ /* Make sure restore workers don't access the BO any more */
-+ bo_list_entry = &mem->validate_list;
-+ mutex_lock(&process_info->lock);
-+ list_del(&bo_list_entry->head);
-+ mutex_unlock(&process_info->lock);
-+
-+ /* Free user pages if necessary */
-+ if (mem->user_pages) {
-+ pr_debug("%s: Freeing user_pages array\n", __func__);
-+ if (mem->user_pages[0])
-+ release_pages(mem->user_pages,
-+ mem->bo->tbo.ttm->num_pages, 0);
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-+ drm_free_large(mem->user_pages);
-+#else
-+ kvfree(mem->user_pages);
-+#endif
-+ }
-+
-+ ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ /* The eviction fence should be removed by the last unmap.
-+ * TODO: Log an error condition if the bo still has the eviction fence
-+ * attached
-+ */
-+ amdgpu_amdkfd_remove_eviction_fence(mem->bo,
-+ process_info->eviction_fence,
-+ NULL, NULL);
-+ pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
-+ mem->va + bo_size * (1 + mem->aql_queue));
-+
-+ /* Remove from VM internal data structures */
-+ list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) {
-+ remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
-+ entry, bo_size);
-+ }
-+
-+ ret = unreserve_bo_and_vms(&ctx, false, false);
-+
-+ /* Free the sync object */
-+ amdgpu_sync_free(&mem->sync);
-+
-+ /* If the SG is not NULL, it's one we created for a doorbell
-+ * BO. We need to free it.
-+ */
-+ if (mem->bo->tbo.sg) {
-+ sg_free_table(mem->bo->tbo.sg);
-+ kfree(mem->bo->tbo.sg);
-+ }
-+
-+ /* Free the BO*/
-+ amdgpu_bo_unref(&mem->bo);
-+ kfree(mem);
-+
-+ return ret;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
-+{
-+ struct amdgpu_device *adev;
-+ int ret;
-+ struct amdgpu_bo *bo;
-+ uint32_t domain;
-+ struct kfd_bo_va_list *entry;
-+ struct bo_vm_reservation_context ctx;
-+ struct kfd_bo_va_list *bo_va_entry = NULL;
-+ struct kfd_bo_va_list *bo_va_entry_aql = NULL;
-+ struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
-+ unsigned long bo_size;
-+ bool is_invalid_userptr;
-+
-+ adev = get_amdgpu_device(kgd);
-+
-+ /* Make sure restore is not running concurrently. Since we
-+ * don't map invalid userptr BOs, we rely on the next restore
-+ * worker to do the mapping
-+ */
-+ mutex_lock(&mem->process_info->lock);
-+
-+ /* Lock mmap-sem. If we find an invalid userptr BO, we can be
-+ * sure that the MMU notifier is no longer running
-+ * concurrently and the queues are actually stopped
-+ */
-+ down_read(&current->mm->mmap_sem);
-+ is_invalid_userptr = atomic_read(&mem->invalid);
-+ up_read(&current->mm->mmap_sem);
-+
-+ mutex_lock(&mem->lock);
-+
-+ bo = mem->bo;
-+
-+ if (!bo) {
-+ pr_err("Invalid BO when mapping memory to GPU\n");
-+ return -EINVAL;
-+ }
-+
-+ domain = mem->domain;
-+ bo_size = bo->tbo.mem.size;
-+
-+ pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
-+ mem->va,
-+ mem->va + bo_size * (1 + mem->aql_queue),
-+ vm, domain_string(domain));
-+
-+ ret = reserve_bo_and_vm(mem, vm, &ctx);
-+ if (unlikely(ret != 0))
-+ goto bo_reserve_failed;
-+
-+ /* Userptr can be marked as "not invalid", but not actually be
-+ * validated yet (still in the system domain). In that case
-+ * the queues are still stopped and we can leave mapping for
-+ * the next restore worker
-+ */
-+ if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
-+ is_invalid_userptr = true;
-+
-+ if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) {
-+ ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false,
-+ &bo_va_entry);
-+ if (ret != 0)
-+ goto add_bo_to_vm_failed;
-+ if (mem->aql_queue) {
-+ ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm,
-+ true, &bo_va_entry_aql);
-+ if (ret != 0)
-+ goto add_bo_to_vm_failed_aql;
-+ }
-+ }
-+
-+ if (mem->mapped_to_gpu_memory == 0 &&
-+ !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-+ /* Validate BO only once. The eviction fence gets added to BO
-+ * the first time it is mapped. Validate will wait for all
-+ * background evictions to complete.
-+ */
-+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
-+ if (ret) {
-+ pr_debug("Validate failed\n");
-+ goto map_bo_to_gpuvm_failed;
-+ }
-+ }
-+
-+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-+ if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
-+ pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
-+ entry->va, entry->va + bo_size,
-+ entry);
-+
-+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
-+ is_invalid_userptr);
-+ if (ret != 0) {
-+ pr_err("Failed to map radeon bo to gpuvm\n");
-+ goto map_bo_to_gpuvm_failed;
-+ }
-+ entry->is_mapped = true;
-+ mem->mapped_to_gpu_memory++;
-+ pr_debug("\t INC mapping count %d\n",
-+ mem->mapped_to_gpu_memory);
-+ }
-+ }
-+
-+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) == NULL)
-+ amdgpu_bo_fence(bo,
-+ &kfd_vm->process_info->eviction_fence->base,
-+ true);
-+ ret = unreserve_bo_and_vms(&ctx, false, false);
-+
-+ mutex_unlock(&mem->process_info->lock);
-+ mutex_unlock(&mem->lock);
-+ return ret;
-+
-+map_bo_to_gpuvm_failed:
-+ if (bo_va_entry_aql)
-+ remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
-+add_bo_to_vm_failed_aql:
-+ if (bo_va_entry)
-+ remove_bo_from_vm(adev, bo_va_entry, bo_size);
-+add_bo_to_vm_failed:
-+ unreserve_bo_and_vms(&ctx, false, false);
-+bo_reserve_failed:
-+ mutex_unlock(&mem->process_info->lock);
-+ mutex_unlock(&mem->lock);
-+ return ret;
-+}
-+
-+static u64 get_vm_pd_gpu_offset(void *vm)
-+{
-+ struct amdgpu_vm *avm = (struct amdgpu_vm *) vm;
-+ struct amdgpu_device *adev =
-+ amdgpu_ttm_adev(avm->root.base.bo->tbo.bdev);
-+ u64 offset;
-+
-+ BUG_ON(avm == NULL);
-+
-+ amdgpu_bo_reserve(avm->root.base.bo, false);
-+
-+ offset = amdgpu_bo_gpu_offset(avm->root.base.bo);
-+
-+ amdgpu_bo_unreserve(avm->root.base.bo);
-+
-+ /* On some ASICs the FB doesn't start at 0. Adjust FB offset
-+ * to an actual MC address.
-+ */
-+ if (adev->gart.gart_funcs->get_vm_pde)
-+ offset = amdgpu_gart_get_vm_pde(adev, offset);
-+
-+ return offset;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-+ void **process_info,
-+ struct dma_fence **ef)
-+{
-+ int ret;
-+ struct amdkfd_vm *new_vm;
-+ struct amdkfd_process_info *info;
-+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
-+
-+ new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
-+ if (new_vm == NULL)
-+ return -ENOMEM;
-+
-+ /* Initialize the VM context, allocate the page directory and zero it */
-+ ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE);
-+ if (ret != 0) {
-+ pr_err("Failed init vm ret %d\n", ret);
-+ /* Undo everything related to the new VM context */
-+ goto vm_init_fail;
-+ }
-+ new_vm->adev = adev;
-+
-+ if (!*process_info) {
-+ info = kzalloc(sizeof(*info), GFP_KERNEL);
-+ if (!info) {
-+ pr_err("Failed to create amdkfd_process_info");
-+ ret = -ENOMEM;
-+ goto alloc_process_info_fail;
-+ }
-+
-+ mutex_init(&info->lock);
-+ INIT_LIST_HEAD(&info->vm_list_head);
-+ INIT_LIST_HEAD(&info->kfd_bo_list);
-+ INIT_LIST_HEAD(&info->userptr_valid_list);
-+ INIT_LIST_HEAD(&info->userptr_inval_list);
-+
-+ info->eviction_fence =
-+ amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
-+ current->mm);
-+ if (info->eviction_fence == NULL) {
-+ pr_err("Failed to create eviction fence\n");
-+ goto create_evict_fence_fail;
-+ }
-+
-+ info->pid = get_task_pid(current->group_leader,
-+ PIDTYPE_PID);
-+ atomic_set(&info->evicted_bos, 0);
-+ INIT_DELAYED_WORK(&info->work,
-+ amdgpu_amdkfd_restore_userptr_worker);
-+
-+ *process_info = info;
-+ *ef = dma_fence_get(&info->eviction_fence->base);
-+ }
-+
-+ new_vm->process_info = *process_info;
-+
-+ mutex_lock(&new_vm->process_info->lock);
-+ list_add_tail(&new_vm->vm_list_node,
-+ &(new_vm->process_info->vm_list_head));
-+ new_vm->process_info->n_vms++;
-+ mutex_unlock(&new_vm->process_info->lock);
-+
-+ *vm = (void *) new_vm;
-+
-+ pr_debug("Created process vm %p\n", *vm);
-+
-+ return ret;
-+
-+create_evict_fence_fail:
-+ kfree(info);
-+alloc_process_info_fail:
-+ amdgpu_vm_fini(adev, &new_vm->base);
-+vm_init_fail:
-+ kfree(new_vm);
-+ return ret;
-+
-+}
-+
-+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-+ struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm;
-+ struct amdgpu_vm *avm = &kfd_vm->base;
-+ struct amdgpu_bo *pd;
-+ struct amdkfd_process_info *process_info;
-+
-+ if (WARN_ON(!kgd || !vm))
-+ return;
-+
-+ pr_debug("Destroying process vm %p\n", vm);
-+ /* Release eviction fence from PD */
-+ pd = avm->root.base.bo;
-+ amdgpu_bo_reserve(pd, false);
-+ amdgpu_bo_fence(pd, NULL, false);
-+ amdgpu_bo_unreserve(pd);
-+
-+ process_info = kfd_vm->process_info;
-+
-+ mutex_lock(&process_info->lock);
-+ process_info->n_vms--;
-+ list_del(&kfd_vm->vm_list_node);
-+ mutex_unlock(&process_info->lock);
-+
-+ /* Release per-process resources */
-+ if (!process_info->n_vms) {
-+ WARN_ON(!list_empty(&process_info->kfd_bo_list));
-+ WARN_ON(!list_empty(&process_info->userptr_valid_list));
-+ WARN_ON(!list_empty(&process_info->userptr_inval_list));
-+
-+ dma_fence_put(&process_info->eviction_fence->base);
-+ cancel_delayed_work_sync(&process_info->work);
-+ put_pid(process_info->pid);
-+ kfree(process_info);
-+ }
-+
-+ /* Release the VM context */
-+ amdgpu_vm_fini(adev, avm);
-+ kfree(vm);
-+}
-+
-+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
-+{
-+ return get_vm_pd_gpu_offset(vm) >> AMDGPU_GPU_PAGE_SHIFT;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
-+ struct kfd_vm_fault_info *mem)
-+{
-+ struct amdgpu_device *adev;
-+
-+ adev = (struct amdgpu_device *) kgd;
-+ if (atomic_read(&adev->mc.vm_fault_info_updated) == 1) {
-+ *mem = *adev->mc.vm_fault_info;
-+ mb();
-+ atomic_set(&adev->mc.vm_fault_info_updated, 0);
-+ }
-+ return 0;
-+}
-+
-+static bool is_mem_on_local_device(struct kgd_dev *kgd,
-+ struct list_head *bo_va_list, void *vm)
-+{
-+ struct kfd_bo_va_list *entry;
-+
-+ list_for_each_entry(entry, bo_va_list, bo_list) {
-+ if (entry->kgd_dev == kgd && entry->bo_va->base.vm == vm)
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
-+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
-+{
-+ struct kfd_bo_va_list *entry;
-+ struct amdgpu_device *adev;
-+ unsigned int mapped_before;
-+ int ret = 0;
-+ struct bo_vm_reservation_context ctx;
-+ struct amdkfd_process_info *process_info;
-+ unsigned long bo_size;
-+
-+ adev = (struct amdgpu_device *) kgd;
-+ process_info = ((struct amdkfd_vm *)vm)->process_info;
-+
-+ bo_size = mem->bo->tbo.mem.size;
-+
-+ mutex_lock(&mem->lock);
-+
-+ /*
-+ * Make sure that this BO mapped on KGD before unmappping it
-+ */
-+ if (!is_mem_on_local_device(kgd, &mem->bo_va_list, vm)) {
-+ ret = -EINVAL;
-+ goto out;
-+ }
-+
-+ if (mem->mapped_to_gpu_memory == 0) {
-+ pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n",
-+ mem->va, bo_size, vm);
-+ ret = -EINVAL;
-+ goto out;
-+ }
-+ mapped_before = mem->mapped_to_gpu_memory;
-+
-+ ret = reserve_bo_and_cond_vms(mem, vm, VA_MAPPED, &ctx);
-+ if (unlikely(ret != 0))
-+ goto out;
-+
-+ pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
-+ mem->va,
-+ mem->va + bo_size * (1 + mem->aql_queue),
-+ vm);
-+
-+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
-+ if (entry->bo_va->base.vm == vm && entry->is_mapped) {
-+ pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
-+ entry->va,
-+ entry->va + bo_size,
-+ entry);
-+
-+ ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
-+ if (ret == 0) {
-+ entry->is_mapped = false;
-+ } else {
-+ pr_err("failed to unmap VA 0x%llx\n",
-+ mem->va);
-+ goto unreserve_out;
-+ }
-+
-+ mem->mapped_to_gpu_memory--;
-+ pr_debug("\t DEC mapping count %d\n",
-+ mem->mapped_to_gpu_memory);
-+ }
-+ }
-+
-+ /* If BO is unmapped from all VMs, unfence it. It can be evicted if
-+ * required.
-+ */
-+ if (mem->mapped_to_gpu_memory == 0 &&
-+ !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm))
-+ amdgpu_amdkfd_remove_eviction_fence(mem->bo,
-+ process_info->eviction_fence,
-+ NULL, NULL);
-+
-+ if (mapped_before == mem->mapped_to_gpu_memory) {
-+ pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n",
-+ mem->va, bo_size, vm);
-+ ret = -EINVAL;
-+ }
-+
-+unreserve_out:
-+ unreserve_bo_and_vms(&ctx, false, false);
-+out:
-+ mutex_unlock(&mem->lock);
-+ return ret;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_mmap_bo(struct kgd_dev *kgd, struct vm_area_struct *vma)
-+{
-+ struct amdgpu_device *adev;
-+
-+ adev = get_amdgpu_device(kgd);
-+ if (!adev) {
-+ pr_err("Could not get amdgpu device in %s\n", __func__);
-+ return -ENODEV;
-+ }
-+
-+ return amdgpu_bo_mmap(NULL, vma, &adev->mman.bdev);
-+}
-+
-+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
-+ struct kgd_mem *mem, void **kptr)
-+{
-+ int ret;
-+ struct amdgpu_bo *bo = mem->bo;
-+
-+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-+ pr_err("userptr can't be mapped to kernel\n");
-+ return -EINVAL;
-+ }
-+
-+ /* delete kgd_mem from kfd_bo_list to avoid re-validating
-+ * this BO in BO's restoring after eviction.
-+ */
-+ mutex_lock(&mem->process_info->lock);
-+
-+ list_del_init(&mem->validate_list.head);
-+
-+ ret = __map_bo_to_kernel(bo, AMDGPU_GEM_DOMAIN_GTT, kptr);
-+ if (!ret)
-+ mem->kptr = *kptr;
-+
-+ mutex_unlock(&mem->process_info->lock);
-+
-+ return ret;
-+}
-+
-+static int pin_bo_wo_map(struct kgd_mem *mem)
-+{
-+ struct amdgpu_bo *bo = mem->bo;
-+ int ret = 0;
-+
-+ ret = amdgpu_bo_reserve(bo, false);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ ret = amdgpu_bo_pin(bo, mem->domain, NULL);
-+ amdgpu_bo_unreserve(bo);
-+
-+ return ret;
-+}
-+
-+static void unpin_bo_wo_map(struct kgd_mem *mem)
-+{
-+ struct amdgpu_bo *bo = mem->bo;
-+ int ret = 0;
-+
-+ ret = amdgpu_bo_reserve(bo, false);
-+ if (unlikely(ret != 0))
-+ return;
-+
-+ amdgpu_bo_unpin(bo);
-+ amdgpu_bo_unreserve(bo);
-+}
-+
-+#define AMD_GPU_PAGE_SHIFT PAGE_SHIFT
-+#define AMD_GPU_PAGE_SIZE (_AC(1, UL) << AMD_GPU_PAGE_SHIFT)
-+
-+static int get_sg_table(struct amdgpu_device *adev,
-+ struct kgd_mem *mem, uint64_t offset,
-+ uint64_t size, struct sg_table **ret_sg)
-+{
-+ struct amdgpu_bo *bo = mem->bo;
-+ struct sg_table *sg = NULL;
-+ unsigned long bus_addr;
-+ unsigned int chunks;
-+ unsigned int i;
-+ struct scatterlist *s;
-+ uint64_t offset_in_page;
-+ unsigned int page_size;
-+ int ret;
-+
-+ sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-+ if (!sg) {
-+ ret = -ENOMEM;
-+ goto out;
-+ }
-+
-+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM)
-+ page_size = AMD_GPU_PAGE_SIZE;
-+ else
-+ page_size = PAGE_SIZE;
-+
-+
-+ offset_in_page = offset & (page_size - 1);
-+ chunks = (size + offset_in_page + page_size - 1)
-+ / page_size;
-+
-+ ret = sg_alloc_table(sg, chunks, GFP_KERNEL);
-+ if (unlikely(ret))
-+ goto out;
-+
-+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) {
-+ bus_addr = bo->tbo.offset + adev->mc.aper_base + offset;
-+
-+ for_each_sg(sg->sgl, s, sg->orig_nents, i) {
-+ uint64_t chunk_size, length;
-+
-+ chunk_size = page_size - offset_in_page;
-+ length = min(size, chunk_size);
-+
-+ sg_set_page(s, NULL, length, offset_in_page);
-+ s->dma_address = bus_addr;
-+ s->dma_length = length;
-+
-+ size -= length;
-+ offset_in_page = 0;
-+ bus_addr += length;
-+ }
-+ } else {
-+ struct page **pages;
-+ unsigned int cur_page;
-+
-+ pages = bo->tbo.ttm->pages;
-+
-+ cur_page = offset / page_size;
-+ for_each_sg(sg->sgl, s, sg->orig_nents, i) {
-+ uint64_t chunk_size, length;
-+
-+ chunk_size = page_size - offset_in_page;
-+ length = min(size, chunk_size);
-+
-+ sg_set_page(s, pages[cur_page], length, offset_in_page);
-+ s->dma_address = page_to_phys(pages[cur_page]);
-+ s->dma_length = length;
-+
-+ size -= length;
-+ offset_in_page = 0;
-+ cur_page++;
-+ }
-+ }
-+
-+ *ret_sg = sg;
-+ return 0;
-+out:
-+ kfree(sg);
-+ *ret_sg = NULL;
-+ return ret;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd,
-+ struct kgd_mem *mem, uint64_t offset,
-+ uint64_t size, struct sg_table **ret_sg)
-+{
-+ int ret;
-+ struct amdgpu_device *adev;
-+
-+ ret = pin_bo_wo_map(mem);
-+ if (unlikely(ret != 0))
-+ return ret;
-+
-+ adev = get_amdgpu_device(kgd);
-+
-+ ret = get_sg_table(adev, mem, offset, size, ret_sg);
-+ if (ret)
-+ unpin_bo_wo_map(mem);
-+
-+ return ret;
-+}
-+
-+void amdgpu_amdkfd_gpuvm_unpin_put_sg_table(
-+ struct kgd_mem *mem, struct sg_table *sg)
-+{
-+ sg_free_table(sg);
-+ kfree(sg);
-+
-+ unpin_bo_wo_map(mem);
-+}
-+
-+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
-+ struct dma_buf *dma_buf,
-+ uint64_t va, void *vm,
-+ struct kgd_mem **mem, uint64_t *size,
-+ uint64_t *mmap_offset)
-+{
-+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-+ struct drm_gem_object *obj;
-+ struct amdgpu_bo *bo;
-+ struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
-+
-+ if (dma_buf->ops != &drm_gem_prime_dmabuf_ops)
-+ /* Can't handle non-graphics buffers */
-+ return -EINVAL;
-+
-+ obj = dma_buf->priv;
-+ if (obj->dev->dev_private != adev)
-+ /* Can't handle buffers from other devices */
-+ return -EINVAL;
-+
-+ bo = gem_to_amdgpu_bo(obj);
-+ if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-+ AMDGPU_GEM_DOMAIN_GTT |
-+ AMDGPU_GEM_DOMAIN_DGMA)))
-+ /* Only VRAM and GTT BOs are supported */
-+ return -EINVAL;
-+
-+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-+ if (*mem == NULL)
-+ return -ENOMEM;
-+
-+ if (size)
-+ *size = amdgpu_bo_size(bo);
-+
-+ if (mmap_offset)
-+ *mmap_offset = amdgpu_bo_mmap_offset(bo);
-+
-+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
-+ mutex_init(&(*mem)->lock);
-+ (*mem)->mapping_flags =
-+ AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
-+ AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
-+
-+ (*mem)->bo = amdgpu_bo_ref(bo);
-+ (*mem)->va = va;
-+ if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
-+ (*mem)->domain = AMDGPU_GEM_DOMAIN_VRAM;
-+ else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT)
-+ (*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
-+ else
-+ (*mem)->domain = AMDGPU_GEM_DOMAIN_DGMA;
-+ (*mem)->mapped_to_gpu_memory = 0;
-+ (*mem)->process_info = kfd_vm->process_info;
-+ add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, false);
-+ amdgpu_sync_create(&(*mem)->sync);
-+
-+ return 0;
-+}
-+
-+int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm,
-+ struct kgd_mem *mem,
-+ struct dma_buf **dmabuf)
-+{
-+ struct amdgpu_device *adev = NULL;
-+ struct amdgpu_bo *bo = NULL;
-+ struct drm_gem_object *gobj = NULL;
-+
-+ if (!dmabuf || !kgd || !vm || !mem)
-+ return -EINVAL;
-+
-+ adev = get_amdgpu_device(kgd);
-+ bo = mem->bo;
-+
-+ gobj = amdgpu_gem_prime_foreign_bo(adev, bo);
-+ if (gobj == NULL) {
-+ pr_err("Export BO failed. Unable to find/create GEM object\n");
-+ return -EINVAL;
-+ }
-+
-+ *dmabuf = amdgpu_gem_prime_export(adev->ddev, gobj, 0);
-+ return 0;
-+}
-+
-+static int process_validate_vms(struct amdkfd_process_info *process_info)
-+{
-+ struct amdkfd_vm *peer_vm;
-+ int ret;
-+
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node) {
-+ ret = vm_validate_pt_pd_bos(&peer_vm->base);
-+ if (ret)
-+ return ret;
-+ }
-+
-+ return 0;
-+}
-+
-+/* Evict a userptr BO by stopping the queues if necessary
-+ *
-+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
-+ * cannot do any memory allocations, and cannot take any locks that
-+ * are held elsewhere while allocating memory. Therefore this is as
-+ * simple as possible, using atomic counters.
-+ *
-+ * It doesn't do anything to the BO itself. The real work happens in
-+ * restore, where we get updated page addresses. This function only
-+ * ensures that GPU access to the BO is stopped.
-+ */
-+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
-+ struct mm_struct *mm)
-+{
-+ struct amdkfd_process_info *process_info = mem->process_info;
-+ int invalid, evicted_bos;
-+ int r = 0;
-+
-+ invalid = atomic_inc_return(&mem->invalid);
-+ evicted_bos = atomic_inc_return(&process_info->evicted_bos);
-+ if (evicted_bos == 1) {
-+ /* First eviction, stop the queues */
-+ r = kgd2kfd->quiesce_mm(NULL, mm);
-+ if (r != 0)
-+ pr_err("Failed to quiesce KFD\n");
-+ schedule_delayed_work(&process_info->work, 1);
-+ }
-+
-+ return r;
-+}
-+
-+/* Update invalid userptr BOs
-+ *
-+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
-+ * userptr_inval_list and updates user pages for all BOs that have
-+ * been invalidated since their last update.
-+ */
-+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
-+ struct mm_struct *mm)
-+{
-+ struct kgd_mem *mem, *tmp_mem;
-+ struct amdgpu_bo *bo;
-+ int invalid, ret;
-+
-+ /* Move all invalidated BOs to the userptr_inval_list and
-+ * release their user pages by migration to the CPU domain
-+ */
-+ list_for_each_entry_safe(mem, tmp_mem,
-+ &process_info->userptr_valid_list,
-+ validate_list.head) {
-+ if (!atomic_read(&mem->invalid))
-+ continue; /* BO is still valid */
-+
-+ bo = mem->bo;
-+
-+ if (amdgpu_bo_reserve(bo, true))
-+ return -EAGAIN;
-+ amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
-+ amdgpu_bo_unreserve(bo);
-+ if (ret) {
-+ pr_err("%s: Failed to invalidate userptr BO\n",
-+ __func__);
-+ return -EAGAIN;
-+ }
-+
-+ list_move_tail(&mem->validate_list.head,
-+ &process_info->userptr_inval_list);
-+ }
-+
-+ if (list_empty(&process_info->userptr_inval_list))
-+ return 0; /* All evicted userptr BOs were freed */
-+
-+ /* Go through userptr_inval_list and update any invalid user_pages */
-+ list_for_each_entry(mem, &process_info->userptr_inval_list,
-+ validate_list.head) {
-+ invalid = atomic_read(&mem->invalid);
-+ if (!invalid)
-+ /* BO hasn't been invalidated since the last
-+ * revalidation attempt. Keep its BO list.
-+ */
-+ continue;
-+
-+ bo = mem->bo;
-+
-+ if (!mem->user_pages) {
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-+ mem->user_pages =
-+ drm_calloc_large(bo->tbo.ttm->num_pages,
-+ sizeof(struct page *));
-+#else
-+ mem->user_pages =
-+ kvmalloc_array(bo->tbo.ttm->num_pages,
-+ sizeof(struct page *),
-+ GFP_KERNEL | __GFP_ZERO);
-+#endif
-+ if (!mem->user_pages) {
-+ pr_err("%s: Failed to allocate pages array\n",
-+ __func__);
-+ return -ENOMEM;
-+ }
-+ } else if (mem->user_pages[0]) {
-+ release_pages(mem->user_pages,
-+ bo->tbo.ttm->num_pages, 0);
-+ }
-+
-+ /* Get updated user pages */
-+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
-+ mem->user_pages);
-+ if (ret) {
-+ mem->user_pages[0] = NULL;
-+ pr_info("%s: Failed to get user pages: %d\n",
-+ __func__, ret);
-+ /* Pretend it succeeded. It will fail later
-+ * with a VM fault if the GPU tries to access
-+ * it. Better than hanging indefinitely with
-+ * stalled user mode queues.
-+ */
-+ }
-+
-+ /* Mark the BO as valid unless it was invalidated
-+ * again concurrently
-+ */
-+ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
-+ return -EAGAIN;
-+ }
-+ return 0;
-+}
-+
-+/* Validate invalid userptr BOs
-+ *
-+ * Validates BOs on the userptr_inval_list, and moves them back to the
-+ * userptr_valid_list. Also updates GPUVM page tables with new page
-+ * addresses and waits for the page table updates to complete.
-+ */
-+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
-+{
-+ struct amdgpu_bo_list_entry *pd_bo_list_entries;
-+ struct list_head resv_list, duplicates;
-+ struct ww_acquire_ctx ticket;
-+ struct amdgpu_sync sync;
-+
-+ struct amdkfd_vm *peer_vm;
-+ struct kgd_mem *mem, *tmp_mem;
-+ struct amdgpu_bo *bo;
-+ int i, ret;
-+
-+ pd_bo_list_entries = kcalloc(process_info->n_vms,
-+ sizeof(struct amdgpu_bo_list_entry),
-+ GFP_KERNEL);
-+ if (!pd_bo_list_entries) {
-+ pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
-+ return -ENOMEM;
-+ }
-+
-+ INIT_LIST_HEAD(&resv_list);
-+ INIT_LIST_HEAD(&duplicates);
-+
-+ /* Get all the page directory BOs that need to be reserved */
-+ i = 0;
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node)
-+ amdgpu_vm_get_pd_bo(&peer_vm->base, &resv_list,
-+ &pd_bo_list_entries[i++]);
-+ /* Add the userptr_inval_list entries to resv_list */
-+ list_for_each_entry(mem, &process_info->userptr_inval_list,
-+ validate_list.head) {
-+ list_add_tail(&mem->resv_list.head, &resv_list);
-+ mem->resv_list.bo = mem->validate_list.bo;
-+ mem->resv_list.shared = mem->validate_list.shared;
-+ }
-+
-+ /* Reserve all BOs and page tables for validation */
-+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
-+ WARN(!list_empty(&duplicates), "Duplicates should be empty");
-+ if (ret)
-+ goto out;
-+
-+ amdgpu_sync_create(&sync);
-+
-+ /* Avoid triggering eviction fences when unmapping invalid
-+ * userptr BOs (waits for all fences, doesn't use
-+ * FENCE_OWNER_VM)
-+ */
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node)
-+ amdgpu_amdkfd_remove_eviction_fence(peer_vm->base.root.base.bo,
-+ process_info->eviction_fence,
-+ NULL, NULL);
-+
-+ ret = process_validate_vms(process_info);
-+ if (ret)
-+ goto unreserve_out;
-+
-+ /* Validate BOs and update GPUVM page tables */
-+ list_for_each_entry_safe(mem, tmp_mem,
-+ &process_info->userptr_inval_list,
-+ validate_list.head) {
-+ struct kfd_bo_va_list *bo_va_entry;
-+
-+ bo = mem->bo;
-+
-+ /* Copy pages array and validate the BO if we got user pages */
-+ if (mem->user_pages[0]) {
-+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-+ mem->user_pages);
-+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
-+ ret = ttm_bo_validate(&bo->tbo, &bo->placement,
-+ false, false);
-+ if (ret) {
-+ pr_err("%s: failed to validate BO\n", __func__);
-+ goto unreserve_out;
-+ }
-+ }
-+
-+ /* Validate succeeded, now the BO owns the pages, free
-+ * our copy of the pointer array. Put this BO back on
-+ * the userptr_valid_list. If we need to revalidate
-+ * it, we need to start from scratch.
-+ */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-+ drm_free_large(mem->user_pages);
-+#else
-+ kvfree(mem->user_pages);
-+#endif
-+ mem->user_pages = NULL;
-+ list_move_tail(&mem->validate_list.head,
-+ &process_info->userptr_valid_list);
-+
-+ /* Update mapping. If the BO was not validated
-+ * (because we couldn't get user pages), this will
-+ * clear the page table entries, which will result in
-+ * VM faults if the GPU tries to access the invalid
-+ * memory.
-+ */
-+ list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
-+ if (!bo_va_entry->is_mapped)
-+ continue;
-+
-+ ret = update_gpuvm_pte((struct amdgpu_device *)
-+ bo_va_entry->kgd_dev,
-+ bo_va_entry, &sync);
-+ if (ret) {
-+ pr_err("%s: update PTE failed\n", __func__);
-+ /* make sure this gets validated again */
-+ atomic_inc(&mem->invalid);
-+ goto unreserve_out;
-+ }
-+ }
-+ }
-+unreserve_out:
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node)
-+ amdgpu_bo_fence(peer_vm->base.root.base.bo,
-+ &process_info->eviction_fence->base, true);
-+ ttm_eu_backoff_reservation(&ticket, &resv_list);
-+ amdgpu_sync_wait(&sync, false);
-+ amdgpu_sync_free(&sync);
-+out:
-+ kfree(pd_bo_list_entries);
-+
-+ return ret;
-+}
-+
-+/* Worker callback to restore evicted userptr BOs
-+ *
-+ * Tries to update and validate all userptr BOs. If successful and no
-+ * concurrent evictions happened, the queues are restarted. Otherwise,
-+ * reschedule for another attempt later.
-+ */
-+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
-+{
-+ struct delayed_work *dwork = to_delayed_work(work);
-+ struct amdkfd_process_info *process_info =
-+ container_of(dwork, struct amdkfd_process_info, work);
-+ struct task_struct *usertask;
-+ struct mm_struct *mm;
-+ int evicted_bos;
-+
-+ evicted_bos = atomic_read(&process_info->evicted_bos);
-+ if (!evicted_bos)
-+ return;
-+
-+ /* Reference task and mm in case of concurrent process termination */
-+ usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
-+ if (!usertask)
-+ return;
-+ mm = get_task_mm(usertask);
-+ if (!mm) {
-+ put_task_struct(usertask);
-+ return;
-+ }
-+
-+ mutex_lock(&process_info->lock);
-+
-+ if (update_invalid_user_pages(process_info, mm))
-+ goto unlock_out;
-+ /* userptr_inval_list can be empty if all evicted userptr BOs
-+ * have been freed. In that case there is nothing to validate
-+ * and we can just restart the queues.
-+ */
-+ if (!list_empty(&process_info->userptr_inval_list)) {
-+ if (atomic_read(&process_info->evicted_bos) != evicted_bos)
-+ goto unlock_out; /* Concurrent eviction, try again */
-+
-+ if (validate_invalid_user_pages(process_info))
-+ goto unlock_out;
-+ }
-+ /* Final check for concurrent evicton and atomic update. If
-+ * another eviction happens after successful update, it will
-+ * be a first eviction that calls quiesce_mm. The eviction
-+ * reference counting inside KFD will handle this case.
-+ */
-+ if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
-+ evicted_bos)
-+ goto unlock_out;
-+ evicted_bos = 0;
-+ if (kgd2kfd->resume_mm(NULL, mm)) {
-+ pr_err("%s: Failed to resume KFD\n", __func__);
-+ /* No recovery from this failure. Probably the CP is
-+ * hanging. No point trying again.
-+ */
-+ }
-+unlock_out:
-+ mutex_unlock(&process_info->lock);
-+ mmput(mm);
-+ put_task_struct(usertask);
-+
-+ /* If validation failed, reschedule another attempt */
-+ if (evicted_bos)
-+ schedule_delayed_work(&process_info->work, 1);
-+}
-+
-+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
-+ * KFD process identified by process_info
-+ *
-+ * @process_info: amdkfd_process_info of the KFD process
-+ *
-+ * After memory eviction, restore thread calls this function. The function
-+ * should be called when the Process is still valid. BO restore involves -
-+ *
-+ * 1. Release old eviction fence and create new one
-+ * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
-+ * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of
-+ * BOs that need to be reserved.
-+ * 4. Reserve all the BOs
-+ * 5. Validate of PD and PT BOs.
-+ * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence
-+ * 7. Add fence to all PD and PT BOs.
-+ * 8. Unreserve all BOs
-+ */
-+
-+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
-+{
-+ struct amdgpu_bo_list_entry *pd_bo_list;
-+ struct amdkfd_process_info *process_info = info;
-+ struct amdkfd_vm *peer_vm;
-+ struct kgd_mem *mem;
-+ struct bo_vm_reservation_context ctx;
-+ struct amdgpu_amdkfd_fence *new_fence;
-+ int ret = 0, i;
-+ struct list_head duplicate_save;
-+ struct amdgpu_sync sync_obj;
-+
-+ INIT_LIST_HEAD(&duplicate_save);
-+ INIT_LIST_HEAD(&ctx.list);
-+ INIT_LIST_HEAD(&ctx.duplicates);
-+
-+ pd_bo_list = kcalloc(process_info->n_vms,
-+ sizeof(struct amdgpu_bo_list_entry),
-+ GFP_KERNEL);
-+ if (pd_bo_list == NULL)
-+ return -ENOMEM;
-+
-+ i = 0;
-+ mutex_lock(&process_info->lock);
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node)
-+ amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
-+ &pd_bo_list[i++]);
-+
-+ /* Reserve all BOs and page tables/directory. Add all BOs from
-+ * kfd_bo_list to ctx.list
-+ */
-+ list_for_each_entry(mem, &process_info->kfd_bo_list,
-+ validate_list.head) {
-+
-+ list_add_tail(&mem->resv_list.head, &ctx.list);
-+ mem->resv_list.bo = mem->validate_list.bo;
-+ mem->resv_list.shared = mem->validate_list.shared;
-+ }
-+
-+ ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
-+ false, &duplicate_save);
-+ if (ret) {
-+ pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
-+ goto ttm_reserve_fail;
-+ }
-+
-+ amdgpu_sync_create(&sync_obj);
-+ ctx.sync = &sync_obj;
-+
-+ /* Validate PDs and PTs */
-+ ret = process_validate_vms(process_info);
-+ if (ret)
-+ goto validate_map_fail;
-+
-+ /* Wait for PD/PTs validate to finish */
-+ /* FIXME: I think this isn't needed */
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node) {
-+ struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
-+
-+ ttm_bo_wait(&bo->tbo, false, false);
-+ }
-+
-+ /* Validate BOs and map them to GPUVM (update VM page tables). */
-+ list_for_each_entry(mem, &process_info->kfd_bo_list,
-+ validate_list.head) {
-+
-+ struct amdgpu_bo *bo = mem->bo;
-+ uint32_t domain = mem->domain;
-+ struct kfd_bo_va_list *bo_va_entry;
-+
-+ ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
-+ if (ret) {
-+ pr_debug("Memory eviction: Validate BOs failed. Try again\n");
-+ goto validate_map_fail;
-+ }
-+
-+ list_for_each_entry(bo_va_entry, &mem->bo_va_list,
-+ bo_list) {
-+ ret = update_gpuvm_pte((struct amdgpu_device *)
-+ bo_va_entry->kgd_dev,
-+ bo_va_entry,
-+ ctx.sync);
-+ if (ret) {
-+ pr_debug("Memory eviction: update PTE failed. Try again\n");
-+ goto validate_map_fail;
-+ }
-+ }
-+ }
-+
-+ amdgpu_sync_wait(ctx.sync, false);
-+
-+ /* Release old eviction fence and create new one, because fence only
-+ * goes from unsignaled to signaled, fence cannot be reused.
-+ * Use context and mm from the old fence.
-+ */
-+ new_fence = amdgpu_amdkfd_fence_create(
-+ process_info->eviction_fence->base.context,
-+ process_info->eviction_fence->mm);
-+ if (!new_fence) {
-+ pr_err("Failed to create eviction fence\n");
-+ ret = -ENOMEM;
-+ goto validate_map_fail;
-+ }
-+ dma_fence_put(&process_info->eviction_fence->base);
-+ process_info->eviction_fence = new_fence;
-+ *ef = dma_fence_get(&new_fence->base);
-+
-+ /* Wait for validate to finish and attach new eviction fence */
-+ list_for_each_entry(mem, &process_info->kfd_bo_list,
-+ validate_list.head)
-+ ttm_bo_wait(&mem->bo->tbo, false, false);
-+ list_for_each_entry(mem, &process_info->kfd_bo_list,
-+ validate_list.head)
-+ amdgpu_bo_fence(mem->bo,
-+ &process_info->eviction_fence->base, true);
-+
-+ /* Attach eviction fence to PD / PT BOs */
-+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
-+ vm_list_node) {
-+ struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
-+
-+ amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
-+ }
-+validate_map_fail:
-+ ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
-+ amdgpu_sync_free(&sync_obj);
-+ttm_reserve_fail:
-+ mutex_unlock(&process_info->lock);
-+evict_fence_fail:
-+ kfree(pd_bo_list);
-+ return ret;
-+}
-+
-+int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem,
-+ uint64_t src_offset, struct kgd_mem *dst_mem,
-+ uint64_t dst_offset, uint64_t size,
-+ struct dma_fence **f, uint64_t *actual_size)
-+{
-+ struct amdgpu_device *adev = NULL;
-+ struct ttm_mem_reg *src = NULL, *dst = NULL;
-+ struct ttm_buffer_object *src_ttm_bo, *dst_ttm_bo;
-+ struct drm_mm_node *src_mm, *dst_mm;
-+ struct amdgpu_ring *ring;
-+ struct ww_acquire_ctx ticket;
-+ struct list_head list;
-+ struct ttm_validate_buffer resv_list[2];
-+ uint64_t src_start, dst_start;
-+ uint64_t src_left, dst_left, cur_copy_size, total_copy_size = 0;
-+ struct dma_fence *fence = NULL;
-+ int r;
-+
-+ if (!kgd || !src_mem || !dst_mem)
-+ return -EINVAL;
-+
-+ if (actual_size)
-+ *actual_size = 0;
-+
-+ adev = get_amdgpu_device(kgd);
-+ src_ttm_bo = &src_mem->bo->tbo;
-+ dst_ttm_bo = &dst_mem->bo->tbo;
-+ src = &src_ttm_bo->mem;
-+ dst = &dst_ttm_bo->mem;
-+ src_mm = (struct drm_mm_node *)src->mm_node;
-+ dst_mm = (struct drm_mm_node *)dst->mm_node;
-+
-+ ring = adev->mman.buffer_funcs_ring;
-+
-+ INIT_LIST_HEAD(&list);
-+
-+ resv_list[0].bo = src_ttm_bo;
-+ resv_list[0].shared = true;
-+ resv_list[1].bo = dst_ttm_bo;
-+ resv_list[1].shared = true;
-+
-+ list_add_tail(&resv_list[0].head, &list);
-+ list_add_tail(&resv_list[1].head, &list);
-+
-+ if (!ring->ready) {
-+ pr_err("Trying to move memory with ring turned off.\n");
-+ return -EINVAL;
-+ }
-+
-+ r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
-+ if (r) {
-+ pr_err("Copy buffer failed. Unable to reserve bo (%d)\n", r);
-+ return r;
-+ }
-+
-+ switch (src->mem_type) {
-+ case TTM_PL_TT:
-+ r = amdgpu_ttm_bind(src_ttm_bo, src);
-+ if (r) {
-+ DRM_ERROR("Copy failed. Cannot bind to gart\n");
-+ goto copy_fail;
-+ }
-+ break;
-+ case TTM_PL_VRAM:
-+ /* VRAM could be scattered. Find the node in which the offset
-+ * belongs to
-+ */
-+ while (src_offset >= (src_mm->size << PAGE_SHIFT)) {
-+ src_offset -= (src_mm->size << PAGE_SHIFT);
-+ ++src_mm;
-+ }
-+ break;
-+ default:
-+ DRM_ERROR("Unknown placement %d\n", src->mem_type);
-+ r = -EINVAL;
-+ goto copy_fail;
-+ }
-+ src_start = src_mm->start << PAGE_SHIFT;
-+ src_start += src_ttm_bo->bdev->man[src->mem_type].gpu_offset;
-+ src_start += src_offset;
-+ src_left = (src_mm->size << PAGE_SHIFT) - src_offset;
-+
-+ switch (dst->mem_type) {
-+ case TTM_PL_TT:
-+ r = amdgpu_ttm_bind(dst_ttm_bo, dst);
-+ if (r) {
-+ DRM_ERROR("Copy failed. Cannot bind to gart\n");
-+ goto copy_fail;
-+ }
-+ break;
-+ case TTM_PL_VRAM:
-+ while (dst_offset >= (dst_mm->size << PAGE_SHIFT)) {
-+ dst_offset -= (dst_mm->size << PAGE_SHIFT);
-+ ++dst_mm;
-+ }
-+ break;
-+ default:
-+ DRM_ERROR("Unknown placement %d\n", dst->mem_type);
-+ r = -EINVAL;
-+ goto copy_fail;
-+ }
-+ dst_start = dst_mm->start << PAGE_SHIFT;
-+ dst_start += dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset;
-+ dst_start += dst_offset;
-+ dst_left = (dst_mm->size << PAGE_SHIFT) - dst_offset;
-+
-+ do {
-+ struct dma_fence *next;
-+
-+ /* src_left/dst_left: amount of space left in the current node
-+ * Copy minimum of (src_left, dst_left, amount of bytes left to
-+ * copy)
-+ */
-+ cur_copy_size = min3(src_left, dst_left,
-+ (size - total_copy_size));
-+
-+ r = amdgpu_copy_buffer(ring, src_start, dst_start,
-+ cur_copy_size, NULL, &next, false, false);
-+ if (r)
-+ break;
-+
-+ /* Just keep the last fence */
-+ dma_fence_put(fence);
-+ fence = next;
-+
-+ total_copy_size += cur_copy_size;
-+ /* Required amount of bytes copied. Done. */
-+ if (total_copy_size >= size)
-+ break;
-+
-+ /* If end of src or dst node is reached, move to next node */
-+ src_left -= cur_copy_size;
-+ if (!src_left) {
-+ ++src_mm;
-+ src_start = src_mm->start << PAGE_SHIFT;
-+ src_start +=
-+ src_ttm_bo->bdev->man[src->mem_type].gpu_offset;
-+ src_left = src_mm->size << PAGE_SHIFT;
-+ } else
-+ src_start += cur_copy_size;
-+
-+ dst_left -= cur_copy_size;
-+ if (!dst_left) {
-+ ++dst_mm;
-+ dst_start = dst_mm->start << PAGE_SHIFT;
-+ dst_start +=
-+ dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset;
-+ dst_left = dst_mm->size << PAGE_SHIFT;
-+ } else
-+ dst_start += cur_copy_size;
-+
-+ } while (total_copy_size < size);
-+
-+ /* Failure could occur after partial copy. So fill in amount copied
-+ * and fence, still fill-in
-+ */
-+ if (actual_size)
-+ *actual_size = total_copy_size;
-+
-+ if (fence) {
-+ amdgpu_bo_fence(src_mem->bo, fence, true);
-+ amdgpu_bo_fence(dst_mem->bo, fence, true);
-+ }
-+
-+ if (f)
-+ *f = fence;
-+
-+copy_fail:
-+ ttm_eu_backoff_reservation(&ticket, &list);
-+ return r;
-+}
-+
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-index 9c472c5..2be2e05 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
-@@ -817,11 +817,7 @@ static struct drm_driver kms_driver = {
- .driver_features =
- DRIVER_USE_AGP |
- DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
--#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 13, 0)
- DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
--#else
-- DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
--#endif
- .load = amdgpu_driver_load_kms,
- .open = amdgpu_driver_open_kms,
- .postclose = amdgpu_driver_postclose_kms,
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
-old mode 100644
-new mode 100755
-index 283dc1b..f421505
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
-@@ -36,6 +36,7 @@
- #include <drm/drm_cache.h>
- #include "amdgpu.h"
- #include "amdgpu_trace.h"
-+#include "amdgpu_amdkfd.h"
-
- static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
- {
-@@ -46,6 +47,8 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
-
- if (bo->tbo.mem.mem_type == AMDGPU_PL_DGMA_IMPORT)
- kfree(tbo->mem.bus.addr);
-+ if (bo->kfd_bo)
-+ amdgpu_amdkfd_unreserve_system_memory_limit(bo);
- amdgpu_bo_kunmap(bo);
-
- if (bo->gem_base.import_attach)
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
-old mode 100644
-new mode 100755
-index 8a91658..f73dba5
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
-@@ -89,6 +89,7 @@ struct amdgpu_bo {
-
- struct ttm_bo_kmap_obj dma_buf_vmap;
- struct amdgpu_mn *mn;
-+ struct kgd_mem *kfd_bo;
-
- union {
- struct list_head mn_list;
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
-old mode 100644
-new mode 100755
-index 322d2529..af8e544
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
-@@ -36,6 +36,7 @@
- /* some special values for the owner field */
- #define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
- #define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
-+#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
-
- #define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
- #define AMDGPU_FENCE_FLAG_INT (1 << 1)
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
-old mode 100644
-new mode 100755
-index c586f44..7ee8247
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
-@@ -31,6 +31,7 @@
- #include <drm/drmP.h>
- #include "amdgpu.h"
- #include "amdgpu_trace.h"
-+#include "amdgpu_amdkfd.h"
-
- struct amdgpu_sync_entry {
- struct hlist_node node;
-@@ -84,11 +85,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
- */
- static void *amdgpu_sync_get_owner(struct dma_fence *f)
- {
-- struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
-+ struct amd_sched_fence *s_fence;
-+ struct amdgpu_amdkfd_fence *kfd_fence;
-+
-+ if (f == NULL)
-+ return AMDGPU_FENCE_OWNER_UNDEFINED;
-
-+ s_fence = to_amd_sched_fence(f);
- if (s_fence)
- return s_fence->owner;
-
-+ kfd_fence = to_amdgpu_amdkfd_fence(f);
-+ if (kfd_fence)
-+ return AMDGPU_FENCE_OWNER_KFD;
-+
- return AMDGPU_FENCE_OWNER_UNDEFINED;
- }
-
-@@ -171,7 +181,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- * @resv: reservation object with embedded fence
- * @shared: true if we should only sync to the exclusive fence
- *
-- * Sync to the fence
-+ * Sync to the fence except if it is KFD eviction fence and owner is
-+ * AMDGPU_FENCE_OWNER_VM.
- */
- int amdgpu_sync_resv(struct amdgpu_device *adev,
- struct amdgpu_sync *sync,
-@@ -198,11 +209,15 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
- for (i = 0; i < flist->shared_count; ++i) {
- f = rcu_dereference_protected(flist->shared[i],
- reservation_object_held(resv));
-+ fence_owner = amdgpu_sync_get_owner(f);
-+ if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
-+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-+ continue;
-+
- if (amdgpu_sync_same_dev(adev, f)) {
- /* VM updates are only interesting
- * for other VM updates and moves.
- */
-- fence_owner = amdgpu_sync_get_owner(f);
- if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
- (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
- ((owner == AMDGPU_FENCE_OWNER_VM) !=
-@@ -297,6 +312,31 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
- return NULL;
- }
-
-+int amdgpu_sync_clone(struct amdgpu_device *adev,
-+ struct amdgpu_sync *source,
-+ struct amdgpu_sync *clone)
-+{
-+ struct amdgpu_sync_entry *e;
-+ struct hlist_node *tmp;
-+ struct dma_fence *f;
-+ int i, r;
-+
-+ hash_for_each_safe(source->fences, i, tmp, e, node) {
-+
-+ f = e->fence;
-+ if (!dma_fence_is_signaled(f)) {
-+ r = amdgpu_sync_fence(adev, clone, f);
-+ if (r)
-+ return r;
-+ } else {
-+ hash_del(&e->node);
-+ dma_fence_put(f);
-+ kmem_cache_free(amdgpu_sync_slab, e);
-+ }
-+ }
-+ return 0;
-+}
-+
- int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
- {
- struct amdgpu_sync_entry *e;
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
-old mode 100644
-new mode 100755
-index dc76879..8e29bc7
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
-@@ -49,6 +49,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
- struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
- struct amdgpu_ring *ring);
- struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
-+int amdgpu_sync_clone(struct amdgpu_device *adev, struct amdgpu_sync *source,
-+ struct amdgpu_sync *clone);
- int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
- void amdgpu_sync_free(struct amdgpu_sync *sync);
- int amdgpu_sync_init(void);
-diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
-old mode 100644
-new mode 100755
-index 9f34fab..f22f7a8
---- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
-+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
-@@ -272,6 +272,7 @@
- # define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
- # define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
- # define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
-+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
- #define PACKET3_SET_RESOURCES 0xA0
- /* 1. header
- * 2. CONTROL
-diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
-old mode 100644
-new mode 100755
-index 323e21c..d09592a
---- a/drivers/gpu/drm/amd/amdgpu/vid.h
-+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
-@@ -27,6 +27,8 @@
- #define SDMA1_REGISTER_OFFSET 0x200 /* not a register */
- #define SDMA_MAX_INSTANCE 2
-
-+#define KFD_VI_SDMA_QUEUE_OFFSET 0x80 /* not a register */
-+
- /* crtc instance offsets */
- #define CRTC0_REGISTER_OFFSET (0x1b9c - 0x1b9c)
- #define CRTC1_REGISTER_OFFSET (0x1d9c - 0x1b9c)
-diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
-old mode 100644
-new mode 100755
-index f55a0f8..dba08ec
---- a/drivers/gpu/drm/amd/amdkfd/Makefile
-+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
-@@ -26,5 +26,3 @@ amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o
-
- obj-$(CONFIG_HSA_AMD) += amdkfd.o
-
--AMDKFD_FULL_PATH = $(src)
--include $(AMDKFD_FULL_PATH)/backport/Makefile
-diff --git a/drivers/gpu/drm/amd/amdkfd/backport/backport.h b/drivers/gpu/drm/amd/amdkfd/backport/backport.h
-index 8b13b98..e1f8c1d 100644
---- a/drivers/gpu/drm/amd/amdkfd/backport/backport.h
-+++ b/drivers/gpu/drm/amd/amdkfd/backport/backport.h
-@@ -2,12 +2,5 @@
- #define AMDKFD_BACKPORT_H
-
- #include <linux/version.h>
--#if defined(BUILD_AS_DKMS)
--#include <kcl/kcl_amd_asic_type.h>
--#endif
--#include <kcl/kcl_compat.h>
--#include <kcl/kcl_pci.h>
--#include <kcl/kcl_mn.h>
--#include <kcl/kcl_fence.h>
-
- #endif
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-index b2795af..207a05e 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-@@ -25,9 +25,7 @@
- #include <linux/err.h>
- #include <linux/fs.h>
- #include <linux/sched.h>
--#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
- #include <linux/sched/mm.h>
--#endif
- #include <linux/slab.h>
- #include <linux/uaccess.h>
- #include <linux/compat.h>
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
-index 5f597a6..4e94081 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
-@@ -811,11 +811,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
- */
- pgdat = NODE_DATA(numa_node_id);
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-- mem_in_bytes += pgdat->node_zones[zone_type].present_pages;
--#else
- mem_in_bytes += pgdat->node_zones[zone_type].managed_pages;
--#endif
- mem_in_bytes <<= PAGE_SHIFT;
-
- sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-index c6b447d..6b3a1fa 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
-@@ -326,11 +326,6 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
-
- static int kfd_resume(struct kfd_dev *kfd);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
--void kfd_init_processes_srcu(void);
--void kfd_cleanup_processes_srcu(void);
--#endif
--
- static const struct kfd_device_info *lookup_device_info(unsigned short did)
- {
- size_t i;
-@@ -633,10 +628,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
-
- kfd_ib_mem_init(kfd);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-- kfd_init_processes_srcu();
--#endif
--
- if (kfd_resume(kfd)) {
- dev_err(kfd_device, "Error resuming kfd\n");
- goto kfd_resume_error;
-@@ -678,9 +669,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
- {
- if (kfd->init_complete) {
- kgd2kfd_suspend(kfd);
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-- kfd_cleanup_processes_srcu();
--#endif
- kfd_cwsr_fini(kfd);
- device_queue_manager_uninit(kfd->dqm);
- kfd_interrupt_exit(kfd);
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
-index 8debe6e..7eacf42 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
-@@ -24,10 +24,8 @@
- #include <linux/slab.h>
- #include <linux/types.h>
- #include <linux/uaccess.h>
--#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
- #include <linux/sched/mm.h>
- #include <linux/sched/signal.h>
--#endif
- #include <linux/mman.h>
- #include <linux/memory.h>
- #include "kfd_priv.h"
-@@ -269,13 +267,7 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
- {
- struct kfd_event *ev;
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--
-- hash_for_each_possible(p->events, ev, node, events, id)
--#else
- hash_for_each_possible(p->events, ev, events, id)
--#endif
- if (ev->event_id == id)
- return ev;
-
-@@ -420,13 +412,7 @@ static void destroy_events(struct kfd_process *p)
- struct hlist_node *tmp;
- unsigned int hash_bkt;
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--
-- hash_for_each_safe(p->events, hash_bkt, node, tmp, ev, events)
--#else
- hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
--#endif
- destroy_event(p, ev);
- }
-
-@@ -972,16 +958,9 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
- int bkt;
- bool send_signal = true;
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
-- ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
--
-- hash_for_each(p->events, bkt, node, ev, events)
--#else
- ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
-
- hash_for_each(p->events, bkt, ev, events)
--#endif
- if (ev->type == type) {
- send_signal = false;
- dev_dbg(kfd_device,
-@@ -1114,9 +1093,6 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
- int bkt;
- struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
- struct kfd_hsa_memory_exception_data memory_exception_data;
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--#endif
-
- if (!p)
- return; /* Presumably process exited. */
-@@ -1136,11 +1112,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
- }
- mutex_lock(&p->event_mutex);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- hash_for_each(p->events, bkt, node, ev, events) {
--#else
- hash_for_each(p->events, bkt, ev, events) {
--#endif
- if (ev->type == KFD_EVENT_TYPE_MEMORY) {
- ev->memory_exception_data = memory_exception_data;
- set_event(ev);
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
-index 4f4392a..47dcf4a 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
-@@ -61,11 +61,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
- return r;
- }
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-- kfd->ih_wq = create_rt_workqueue("KFD IH");
--#else
- kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
--#endif
- spin_lock_init(&kfd->interrupt_lock);
-
- INIT_WORK(&kfd->interrupt_work, interrupt_wq);
-@@ -115,15 +111,9 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
- count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
- kfd->device_info->ih_ring_entry_size);
- if (count != kfd->device_info->ih_ring_entry_size) {
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-- dev_err(kfd_chardev(),
-- "Interrupt ring overflow, dropping interrupt %d\n",
-- count);
--#else
- dev_err_ratelimited(kfd_chardev(),
- "Interrupt ring overflow, dropping interrupt %d\n",
- count);
--#endif
- return false;
- }
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c
-index c6be3ba..e67eb9f 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_ipc.c
-@@ -192,21 +192,13 @@ int kfd_ipc_import_handle(struct kfd_dev *dev, struct kfd_process *p,
- {
- int r;
- struct kfd_ipc_obj *entry, *found = NULL;
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *tmp_node;
--#endif
-
- mutex_lock(&kfd_ipc_handles.lock);
- /* Convert the user provided handle to hash key and search only in that
- * bucket
- */
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- hlist_for_each_entry(entry, tmp_node,
-- &kfd_ipc_handles.handles[HANDLE_TO_KEY(share_handle)], node) {
--#else
- hlist_for_each_entry(entry,
- &kfd_ipc_handles.handles[HANDLE_TO_KEY(share_handle)], node) {
--#endif
- if (!memcmp(entry->share_handle, share_handle,
- sizeof(entry->share_handle))) {
- found = entry;
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
-index 64bf653..5724d33 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
-@@ -465,19 +465,15 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
-
- static int debugfs_show_mqd(struct seq_file *m, void *data)
- {
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0) && !defined(OS_NAME_RHEL_7_2)
- seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
- data, sizeof(struct cik_mqd), false);
--#endif
- return 0;
- }
-
- static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
- {
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0) && !defined(OS_NAME_RHEL_7_2)
- seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
- data, sizeof(struct cik_sdma_rlc_registers), false);
--#endif
- return 0;
- }
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
-index 0713cac..6c302d2 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
-@@ -455,19 +455,15 @@ static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
-
- static int debugfs_show_mqd(struct seq_file *m, void *data)
- {
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0) && !defined(OS_NAME_RHEL_7_2)
- seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
- data, sizeof(struct v9_mqd), false);
--#endif
- return 0;
- }
-
- static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
- {
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0) && !defined(OS_NAME_RHEL_7_2)
- seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
- data, sizeof(struct v9_sdma_mqd), false);
--#endif
- return 0;
- }
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
-index a5ba6f7..5c26e5a 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
-@@ -468,19 +468,15 @@ static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
-
- static int debugfs_show_mqd(struct seq_file *m, void *data)
- {
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0) && !defined(OS_NAME_RHEL_7_2)
- seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
- data, sizeof(struct vi_mqd), false);
--#endif
- return 0;
- }
-
- static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
- {
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0) && !defined(OS_NAME_RHEL_7_2)
- seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
- data, sizeof(struct vi_sdma_mqd), false);
--#endif
- return 0;
- }
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
-index 9fcb6fb..7cca7b4 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
-@@ -410,10 +410,8 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)
- return 0;
- }
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 0, 0) && !defined(OS_NAME_RHEL_7_2)
- seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
- pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
--#endif
-
- return 0;
- }
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-old mode 100644
-new mode 100755
-index ebe311e..88fdfc9
---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-@@ -36,11 +36,7 @@
- #include <linux/interval_tree.h>
- #include <linux/seq_file.h>
- #include <linux/kref.h>
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
--#include <linux/kfifo-new.h>
--#else
- #include <linux/kfifo.h>
--#endif
- #include <kgd_kfd_interface.h>
-
- #include <drm/amd_rdma.h>
-@@ -727,7 +723,7 @@ struct kfd_process {
- size_t signal_event_count;
- bool signal_event_limit_reached;
-
-- struct rb_root bo_interval_tree;
-+ struct rb_root_cached bo_interval_tree;
-
- /* Information used for memory eviction */
- void *process_info;
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-index b458995..c798fa3 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
-@@ -23,10 +23,8 @@
- #include <linux/mutex.h>
- #include <linux/log2.h>
- #include <linux/sched.h>
--#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
- #include <linux/sched/mm.h>
- #include <linux/sched/task.h>
--#endif
- #include <linux/slab.h>
- #if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
- #include <linux/amd-iommu.h>
-@@ -50,20 +48,7 @@ struct mm_struct;
- static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
- static DEFINE_MUTEX(kfd_processes_mutex);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
--static struct srcu_struct kfd_processes_srcu;
--void kfd_init_processes_srcu(void)
--{
-- init_srcu_struct(&kfd_processes_srcu);
--}
--
--void kfd_cleanup_processes_srcu(void)
--{
-- cleanup_srcu_struct(&kfd_processes_srcu);
--}
--#else
- DEFINE_STATIC_SRCU(kfd_processes_srcu);
--#endif
-
- static struct workqueue_struct *kfd_process_wq;
-
-@@ -81,11 +66,7 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
- void kfd_process_create_wq(void)
- {
- if (!kfd_process_wq)
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
-- kfd_process_wq = create_workqueue("kfd_process_wq");
--#else
- kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
--#endif
- }
-
- void kfd_process_destroy_wq(void)
-@@ -273,15 +254,8 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
- {
- struct kfd_process *process;
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--
-- hash_for_each_possible_rcu(kfd_processes_table, process, node,
-- kfd_processes, (uintptr_t)mm)
--#else
- hash_for_each_possible_rcu(kfd_processes_table, process,
- kfd_processes, (uintptr_t)mm)
--#endif
- if (process->mm == mm)
- return process;
-
-@@ -586,7 +560,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
- if (!process)
- goto err_alloc_process;
-
-- process->bo_interval_tree = RB_ROOT;
-+ process->bo_interval_tree = RB_ROOT_CACHED;
-
- process->pasid = kfd_pasid_alloc();
- if (process->pasid == 0)
-@@ -1026,13 +1000,7 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--
-- hash_for_each_rcu(kfd_processes_table, temp, node, p, kfd_processes) {
--#else
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
--#endif
- if (p->pasid == pasid) {
- kref_get(&p->ref);
- ret_p = p;
-@@ -1051,13 +1019,7 @@ void kfd_suspend_all_processes(void)
- unsigned int temp;
- int idx = srcu_read_lock(&kfd_processes_srcu);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--
-- hash_for_each_rcu(kfd_processes_table, temp, node, p, kfd_processes) {
--#else
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
--#endif
- if (cancel_delayed_work_sync(&p->eviction_work.dwork))
- dma_fence_put(p->eviction_work.quiesce_fence);
- cancel_delayed_work_sync(&p->restore_work);
-@@ -1077,13 +1039,7 @@ int kfd_resume_all_processes(void)
- unsigned int temp;
- int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--
-- hash_for_each_rcu(kfd_processes_table, temp, node, p, kfd_processes) {
--#else
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
--#endif
- if (!schedule_delayed_work(&p->restore_work, 0)) {
- pr_err("Restore process %d failed during resume\n",
- p->pasid);
-@@ -1171,13 +1127,7 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
-
- int idx = srcu_read_lock(&kfd_processes_srcu);
-
--#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 0)
-- struct hlist_node *node;
--
-- hash_for_each_rcu(kfd_processes_table, temp, node, p, kfd_processes) {
--#else
- hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
--#endif
- seq_printf(m, "Process %d PASID %d:\n",
- p->lead_thread->tgid, p->pasid);
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
-index ffd8e0f..d08e3de 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
-@@ -122,9 +122,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
- struct kfd_mem_properties *mem;
- struct kfd_cache_properties *cache;
- struct kfd_iolink_properties *iolink;
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- struct kfd_perf_properties *perf;
--#endif
-
- list_del(&dev->list);
-
-@@ -149,14 +147,12 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
- kfree(iolink);
- }
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- while (dev->perf_props.next != &dev->perf_props) {
- perf = container_of(dev->perf_props.next,
- struct kfd_perf_properties, list);
- list_del(&perf->list);
- kfree(perf);
- }
--#endif
-
- kfree(dev);
- }
-@@ -192,9 +188,7 @@ struct kfd_topology_device *kfd_create_topology_device(
- INIT_LIST_HEAD(&dev->mem_props);
- INIT_LIST_HEAD(&dev->cache_props);
- INIT_LIST_HEAD(&dev->io_link_props);
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- INIT_LIST_HEAD(&dev->perf_props);
--#endif
-
- list_add_tail(&dev->list, device_list);
-
-@@ -374,7 +368,6 @@ static struct kobj_type cache_type = {
- .sysfs_ops = &cache_ops,
- };
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- /****** Sysfs of Performance Counters ******/
-
- struct kfd_perf_attr {
-@@ -407,7 +400,6 @@ static struct kfd_perf_attr perf_attr_iommu[] = {
- KFD_PERF_DESC(counter_ids, 0),
- };
- /****************************************/
--#endif
-
- static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
- char *buffer)
-@@ -546,9 +538,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
- struct kfd_iolink_properties *iolink;
- struct kfd_cache_properties *cache;
- struct kfd_mem_properties *mem;
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- struct kfd_perf_properties *perf;
--#endif
-
- if (dev->kobj_iolink) {
- list_for_each_entry(iolink, &dev->io_link_props, list)
-@@ -590,7 +580,6 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
- dev->kobj_mem = NULL;
- }
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- if (dev->kobj_perf) {
- list_for_each_entry(perf, &dev->perf_props, list) {
- kfree(perf->attr_group);
-@@ -600,7 +589,6 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
- kobject_put(dev->kobj_perf);
- dev->kobj_perf = NULL;
- }
--#endif
-
- if (dev->kobj_node) {
- sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid);
-@@ -618,11 +606,9 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
- struct kfd_iolink_properties *iolink;
- struct kfd_cache_properties *cache;
- struct kfd_mem_properties *mem;
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- struct kfd_perf_properties *perf;
- uint32_t num_attrs;
- struct attribute **attrs;
--#endif
- int ret;
- uint32_t i;
-
-@@ -653,11 +639,9 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
- if (!dev->kobj_iolink)
- return -ENOMEM;
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
- if (!dev->kobj_perf)
- return -ENOMEM;
--#endif
-
- /*
- * Creating sysfs files for node properties
-@@ -749,7 +733,6 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
- i++;
- }
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- /* All hardware blocks have the same number of attributes. */
- num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr);
- list_for_each_entry(perf, &dev->perf_props, list) {
-@@ -775,7 +758,6 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
- if (ret < 0)
- return ret;
- }
--#endif
-
- return 0;
- }
-@@ -942,7 +924,6 @@ static void find_system_memory(const struct dmi_header *dm,
- }
- }
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- /*
- * Performance counters information is not part of CRAT but we would like to
- * put them in the sysfs under topology directory for Thunk to get the data.
-@@ -966,7 +947,6 @@ static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev)
-
- return 0;
- }
--#endif
-
- /* kfd_add_non_crat_information - Add information that is not currently
- * defined in CRAT but is necessary for KFD topology
-@@ -1074,11 +1054,9 @@ int kfd_topology_init(void)
- }
- }
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- kdev = list_first_entry(&temp_topology_device_list,
- struct kfd_topology_device, list);
- kfd_add_perf_to_topology(kdev);
--#endif
-
- down_write(&topology_lock);
- kfd_topology_update_device_list(&temp_topology_device_list,
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
-index b59b32c..f22d420 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
-@@ -141,14 +141,12 @@ struct kfd_iolink_properties {
- struct attribute attr;
- };
-
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- struct kfd_perf_properties {
- struct list_head list;
- char block_name[16];
- uint32_t max_concurrent;
- struct attribute_group *attr_group;
- };
--#endif
-
- struct kfd_topology_device {
- struct list_head list;
-@@ -160,17 +158,13 @@ struct kfd_topology_device {
- struct list_head cache_props;
- uint32_t io_link_count;
- struct list_head io_link_props;
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- struct list_head perf_props;
--#endif
- struct kfd_dev *gpu;
- struct kobject *kobj_node;
- struct kobject *kobj_mem;
- struct kobject *kobj_cache;
- struct kobject *kobj_iolink;
--#if LINUX_VERSION_CODE > KERNEL_VERSION(3, 10, 0)
- struct kobject *kobj_perf;
--#endif
- struct attribute attr_gpuid;
- struct attribute attr_name;
- struct attribute attr_props;
-diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-index 2780641..977b21b 100644
---- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
-@@ -707,11 +707,7 @@ static int dm_display_resume(struct drm_device *ddev)
-
- err:
- DRM_ERROR("Restoring old state failed with %i\n", ret);
--#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0)
-- drm_atomic_state_free(state);
--#else
- drm_atomic_state_put(state);
--#endif
-
- return ret;
- }
-diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-old mode 100644
-new mode 100755
-index 36f3766..b6cf2d5
---- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
-@@ -30,6 +30,7 @@
-
- #include <linux/types.h>
- #include <linux/bitmap.h>
-+#include <linux/dma-buf.h>
-
- struct pci_dev;
-
-@@ -40,6 +41,46 @@ struct kfd_dev;
- struct kgd_dev;
-
- struct kgd_mem;
-+struct kfd_process_device;
-+struct amdgpu_bo;
-+
-+enum kfd_preempt_type {
-+ KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
-+ KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
-+};
-+
-+struct kfd_vm_fault_info {
-+ uint64_t page_addr;
-+ uint32_t vmid;
-+ uint32_t mc_id;
-+ uint32_t status;
-+ bool prot_valid;
-+ bool prot_read;
-+ bool prot_write;
-+ bool prot_exec;
-+};
-+
-+struct kfd_cu_info {
-+ uint32_t num_shader_engines;
-+ uint32_t num_shader_arrays_per_engine;
-+ uint32_t num_cu_per_sh;
-+ uint32_t cu_active_number;
-+ uint32_t cu_ao_mask;
-+ uint32_t simd_per_cu;
-+ uint32_t max_waves_per_simd;
-+ uint32_t wave_front_size;
-+ uint32_t max_scratch_slots_per_cu;
-+ uint32_t lds_size;
-+ uint32_t cu_bitmap[4][4];
-+};
-+
-+/* For getting GPU local memory information from KGD */
-+struct kfd_local_mem_info {
-+ uint64_t local_mem_size_private;
-+ uint64_t local_mem_size_public;
-+ uint32_t vram_width;
-+ uint32_t mem_clk_max;
-+};
-
- enum kgd_memory_pool {
- KGD_POOL_SYSTEM_CACHEABLE = 1,
-@@ -72,6 +113,21 @@ struct kgd2kfd_shared_resources {
- /* Bit n == 1 means Queue n is available for KFD */
- DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
-
-+ /* Doorbell assignments (SOC15 and later chips only). Only
-+ * specific doorbells are routed to each SDMA engine. Others
-+ * are routed to IH and VCN. They are not usable by the CP.
-+ *
-+ * Any doorbell number D that satisfies the following condition
-+ * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
-+ *
-+ * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
-+ * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means
-+ * mask would be set to 0x1f8 and val set to 0x0f0.
-+ */
-+ unsigned int sdma_doorbell[2][2];
-+ unsigned int reserved_doorbell_mask;
-+ unsigned int reserved_doorbell_val;
-+
- /* Base address of doorbell aperture. */
- phys_addr_t doorbell_physical_address;
-
-@@ -80,8 +136,41 @@ struct kgd2kfd_shared_resources {
-
- /* Number of bytes at start of aperture reserved for KGD. */
- size_t doorbell_start_offset;
-+
-+ /* GPUVM address space size in bytes */
-+ uint64_t gpuvm_size;
- };
-
-+struct tile_config {
-+ uint32_t *tile_config_ptr;
-+ uint32_t *macro_tile_config_ptr;
-+ uint32_t num_tile_configs;
-+ uint32_t num_macro_tile_configs;
-+
-+ uint32_t gb_addr_config;
-+ uint32_t num_banks;
-+ uint32_t num_ranks;
-+};
-+
-+/*
-+ * Allocation flag domains currently only VRAM and GTT domain supported
-+ */
-+#define ALLOC_MEM_FLAGS_VRAM (1 << 0)
-+#define ALLOC_MEM_FLAGS_GTT (1 << 1)
-+#define ALLOC_MEM_FLAGS_USERPTR (1 << 2)
-+#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
-+
-+/*
-+ * Allocation flags attributes/access options.
-+ */
-+#define ALLOC_MEM_FLAGS_NONPAGED (1 << 31)
-+#define ALLOC_MEM_FLAGS_READONLY (1 << 30)
-+#define ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
-+#define ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
-+#define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
-+#define ALLOC_MEM_FLAGS_EXECUTE_ACCESS (1 << 26)
-+#define ALLOC_MEM_FLAGS_COHERENT (1 << 25)
-+
- /**
- * struct kfd2kgd_calls
- *
-@@ -90,7 +179,7 @@ struct kgd2kfd_shared_resources {
- *
- * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
- *
-- * @get_vmem_size: Retrieves (physical) size of VRAM
-+ * @get_local_mem_info: Retrieves information about GPU local memory
- *
- * @get_gpu_clock_counter: Retrieves GPU clock counter
- *
-@@ -112,6 +201,12 @@ struct kgd2kfd_shared_resources {
- * @hqd_sdma_load: Loads the SDMA mqd structure to a H/W SDMA hqd slot.
- * used only for no HWS mode.
- *
-+ * @hqd_dump: Dumps CPC HQD registers to an array of address-value pairs.
-+ * Array is allocated with kmalloc, needs to be freed with kfree by caller.
-+ *
-+ * @hqd_sdma_dump: Dumps SDMA HQD registers to an array of address-value pairs.
-+ * Array is allocated with kmalloc, needs to be freed with kfree by caller.
-+ *
- * @hqd_is_occupies: Checks if a hqd slot is occupied.
- *
- * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
-@@ -121,8 +216,34 @@ struct kgd2kfd_shared_resources {
- * @hqd_sdma_destroy: Destructs and preempts the SDMA queue assigned to that
- * SDMA hqd slot.
- *
-+ * @map_memory_to_gpu: Allocates and pins BO, PD and all related PTs
-+ *
-+ * @unmap_memory_to_gpu: Releases and unpins BO, PD and all related PTs
-+ *
- * @get_fw_version: Returns FW versions from the header
- *
-+ * @set_num_of_requests: Sets number of Peripheral Page Request (PPR) sent to
-+ * IOMMU when address translation failed
-+ *
-+ * @get_cu_info: Retrieves activated cu info
-+ *
-+ * @get_dmabuf_info: Returns information about a dmabuf if it was
-+ * created by the GPU driver
-+ *
-+ * @import_dmabuf: Imports a DMA buffer, creating a new kgd_mem object
-+ * Supports only DMA buffers created by GPU driver on the same GPU
-+ *
-+ * @export_dmabuf: Emports a KFD BO for sharing with other process
-+ *
-+ * @submit_ib: Submits an IB to the engine specified by inserting the IB to
-+ * the corresonded ring (ring type).
-+ *
-+ * @restore_process_bos: Restore all BOs that belongs to the process
-+ *
-+ * @copy_mem_to_mem: Copies size bytes from source BO to destination BO
-+ *
-+ * @get_vram_usage: Returns current VRAM usage
-+ *
- * This structure contains function pointers to services that the kgd driver
- * provides to amdkfd driver.
- *
-@@ -134,11 +255,23 @@ struct kfd2kgd_calls {
-
- void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj);
-
-- uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
-+ void(*get_local_mem_info)(struct kgd_dev *kgd,
-+ struct kfd_local_mem_info *mem_info);
- uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
-
- uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
-
-+ int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
-+ void **process_info, struct dma_fence **ef);
-+ void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
-+
-+ int (*create_process_gpumem)(struct kgd_dev *kgd, uint64_t va, size_t size, void *vm, struct kgd_mem **mem);
-+ void (*destroy_process_gpumem)(struct kgd_dev *kgd, struct kgd_mem *mem);
-+
-+ uint32_t (*get_process_page_dir)(void *vm);
-+
-+ int (*open_graphic_handle)(struct kgd_dev *kgd, uint64_t va, void *vm, int fd, uint32_t handle, struct kgd_mem **mem);
-+
- /* Register access functions */
- void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid,
- uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
-@@ -151,16 +284,28 @@ struct kfd2kgd_calls {
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
-
- int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
-+
-
- int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-- uint32_t queue_id, uint32_t __user *wptr);
-+ uint32_t queue_id, uint32_t __user *wptr,
-+ uint32_t wptr_shift, uint32_t wptr_mask,
-+ struct mm_struct *mm);
-+
-+ int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd,
-+ uint32_t __user *wptr, struct mm_struct *mm);
-+
-+ int (*hqd_dump)(struct kgd_dev *kgd,
-+ uint32_t pipe_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-
-- int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);
-+ int (*hqd_sdma_dump)(struct kgd_dev *kgd,
-+ uint32_t engine_id, uint32_t queue_id,
-+ uint32_t (**dump)[2], uint32_t *n_regs);
-
- bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
- uint32_t pipe_id, uint32_t queue_id);
-
-- int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type,
-+ int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
- unsigned int timeout, uint32_t pipe_id,
- uint32_t queue_id);
-
-@@ -168,7 +313,7 @@ struct kfd2kgd_calls {
-
- int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd,
- unsigned int timeout);
--
-+
- int (*address_watch_disable)(struct kgd_dev *kgd);
- int (*address_watch_execute)(struct kgd_dev *kgd,
- unsigned int watch_point_id,
-@@ -187,11 +332,72 @@ struct kfd2kgd_calls {
- uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
- struct kgd_dev *kgd,
- uint8_t vmid);
-+ uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
- void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
- uint8_t vmid);
-
-+ int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
-+
-+ int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
-+
-+ int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va,
-+ uint64_t size, void *vm,
-+ struct kgd_mem **mem, uint64_t *offset,
-+ uint32_t flags);
-+ int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
-+ void *vm);
-+ int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
-+ void *vm);
-+ int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
-+ void *vm);
-+
- uint16_t (*get_fw_version)(struct kgd_dev *kgd,
- enum kgd_engine_type type);
-+
-+ void (*set_num_of_requests)(struct kgd_dev *kgd,
-+ uint8_t num_of_requests);
-+ int (*alloc_memory_of_scratch)(struct kgd_dev *kgd,
-+ uint64_t va, uint32_t vmid);
-+ int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
-+ uint8_t element_size, uint8_t index_stride, uint8_t mtype);
-+ void (*get_cu_info)(struct kgd_dev *kgd,
-+ struct kfd_cu_info *cu_info);
-+ int (*mmap_bo)(struct kgd_dev *kgd, struct vm_area_struct *vma);
-+ int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd,
-+ struct kgd_mem *mem, void **kptr);
-+ void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid,
-+ uint32_t page_table_base);
-+
-+ int (*pin_get_sg_table_bo)(struct kgd_dev *kgd,
-+ struct kgd_mem *mem, uint64_t offset,
-+ uint64_t size, struct sg_table **ret_sg);
-+ void (*unpin_put_sg_table_bo)(struct kgd_mem *mem,
-+ struct sg_table *sg);
-+
-+ int (*get_dmabuf_info)(struct kgd_dev *kgd, int dma_buf_fd,
-+ struct kgd_dev **dma_buf_kgd, uint64_t *bo_size,
-+ void *metadata_buffer, size_t buffer_size,
-+ uint32_t *metadata_size, uint32_t *flags);
-+ int (*import_dmabuf)(struct kgd_dev *kgd, struct dma_buf *dmabuf,
-+ uint64_t va, void *vm, struct kgd_mem **mem,
-+ uint64_t *size, uint64_t *mmap_offset);
-+ int (*export_dmabuf)(struct kgd_dev *kgd, void *vm, struct kgd_mem *mem,
-+ struct dma_buf **dmabuf);
-+
-+ int (*get_vm_fault_info)(struct kgd_dev *kgd,
-+ struct kfd_vm_fault_info *info);
-+ int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
-+ uint32_t vmid, uint64_t gpu_addr,
-+ uint32_t *ib_cmd, uint32_t ib_len);
-+ int (*get_tile_config)(struct kgd_dev *kgd,
-+ struct tile_config *config);
-+
-+ int (*restore_process_bos)(void *process_info, struct dma_fence **ef);
-+ int (*copy_mem_to_mem)(struct kgd_dev *kgd, struct kgd_mem *src_mem,
-+ uint64_t src_offset, struct kgd_mem *dst_mem,
-+ uint64_t dest_offset, uint64_t size,
-+ struct dma_fence **f, uint64_t *actual_size);
-+ uint64_t (*get_vram_usage)(struct kgd_dev *kgd);
- };
-
- /**
-@@ -210,6 +416,13 @@ struct kfd2kgd_calls {
- *
- * @resume: Notifies amdkfd about a resume action done to a kgd device
- *
-+ * @quiesce_mm: Quiesce all user queue access to specified MM address space
-+ *
-+ * @resume_mm: Resume user queue access to specified MM address space
-+ *
-+ * @schedule_evict_and_restore_process: Schedules work queue that will prepare
-+ * for safe eviction of KFD BOs that belong to the specified process.
-+ *
- * This structure contains function callback pointers so the kgd driver
- * will notify to the amdkfd about certain status changes.
- *
-@@ -224,9 +437,13 @@ struct kgd2kfd_calls {
- void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
- void (*suspend)(struct kfd_dev *kfd);
- int (*resume)(struct kfd_dev *kfd);
-+ int (*quiesce_mm)(struct kfd_dev *kfd, struct mm_struct *mm);
-+ int (*resume_mm)(struct kfd_dev *kfd, struct mm_struct *mm);
-+ int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
-+ struct dma_fence *fence);
- };
-
- int kgd2kfd_init(unsigned interface_version,
- const struct kgd2kfd_calls **g2f);
-
--#endif /* KGD_KFD_INTERFACE_H_INCLUDED */
-+#endif /* KGD_KFD_INTERFACE_H_INCLUDED */
-diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
-old mode 100644
-new mode 100755
-index 2fb25ab..ceaf493
---- a/drivers/gpu/drm/amd/include/v9_structs.h
-+++ b/drivers/gpu/drm/amd/include/v9_structs.h
-@@ -29,10 +29,10 @@ struct v9_sdma_mqd {
- uint32_t sdmax_rlcx_rb_base;
- uint32_t sdmax_rlcx_rb_base_hi;
- uint32_t sdmax_rlcx_rb_rptr;
-+ uint32_t sdmax_rlcx_rb_rptr_hi;
- uint32_t sdmax_rlcx_rb_wptr;
-+ uint32_t sdmax_rlcx_rb_wptr_hi;
- uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
-- uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
-- uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
- uint32_t sdmax_rlcx_rb_rptr_addr_hi;
- uint32_t sdmax_rlcx_rb_rptr_addr_lo;
- uint32_t sdmax_rlcx_ib_cntl;
-@@ -44,29 +44,29 @@ struct v9_sdma_mqd {
- uint32_t sdmax_rlcx_skip_cntl;
- uint32_t sdmax_rlcx_context_status;
- uint32_t sdmax_rlcx_doorbell;
-- uint32_t sdmax_rlcx_virtual_addr;
-- uint32_t sdmax_rlcx_ape1_cntl;
-+ uint32_t sdmax_rlcx_status;
- uint32_t sdmax_rlcx_doorbell_log;
-- uint32_t reserved_22;
-- uint32_t reserved_23;
-- uint32_t reserved_24;
-- uint32_t reserved_25;
-- uint32_t reserved_26;
-- uint32_t reserved_27;
-- uint32_t reserved_28;
-- uint32_t reserved_29;
-- uint32_t reserved_30;
-- uint32_t reserved_31;
-- uint32_t reserved_32;
-- uint32_t reserved_33;
-- uint32_t reserved_34;
-- uint32_t reserved_35;
-- uint32_t reserved_36;
-- uint32_t reserved_37;
-- uint32_t reserved_38;
-- uint32_t reserved_39;
-- uint32_t reserved_40;
-- uint32_t reserved_41;
-+ uint32_t sdmax_rlcx_watermark;
-+ uint32_t sdmax_rlcx_doorbell_offset;
-+ uint32_t sdmax_rlcx_csa_addr_lo;
-+ uint32_t sdmax_rlcx_csa_addr_hi;
-+ uint32_t sdmax_rlcx_ib_sub_remain;
-+ uint32_t sdmax_rlcx_preempt;
-+ uint32_t sdmax_rlcx_dummy_reg;
-+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
-+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
-+ uint32_t sdmax_rlcx_rb_aql_cntl;
-+ uint32_t sdmax_rlcx_minor_ptr_update;
-+ uint32_t sdmax_rlcx_midcmd_data0;
-+ uint32_t sdmax_rlcx_midcmd_data1;
-+ uint32_t sdmax_rlcx_midcmd_data2;
-+ uint32_t sdmax_rlcx_midcmd_data3;
-+ uint32_t sdmax_rlcx_midcmd_data4;
-+ uint32_t sdmax_rlcx_midcmd_data5;
-+ uint32_t sdmax_rlcx_midcmd_data6;
-+ uint32_t sdmax_rlcx_midcmd_data7;
-+ uint32_t sdmax_rlcx_midcmd_data8;
-+ uint32_t sdmax_rlcx_midcmd_cntl;
- uint32_t reserved_42;
- uint32_t reserved_43;
- uint32_t reserved_44;
-diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
-old mode 100644
-new mode 100755
-index 2023482..717fbae
---- a/drivers/gpu/drm/amd/include/vi_structs.h
-+++ b/drivers/gpu/drm/amd/include/vi_structs.h
-@@ -153,6 +153,8 @@ struct vi_sdma_mqd {
- uint32_t reserved_125;
- uint32_t reserved_126;
- uint32_t reserved_127;
-+ uint32_t sdma_engine_id;
-+ uint32_t sdma_queue_id;
- };
-
- struct vi_mqd {
-diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
-old mode 100644
-new mode 100755
-index 2292462..82d97f3
---- a/drivers/pci/pci.c
-+++ b/drivers/pci/pci.c
-@@ -2983,6 +2983,87 @@ bool pci_acs_path_enabled(struct pci_dev *start,
- }
-
- /**
-+ * pci_enable_atomic_ops_to_root - enable AtomicOp requests to root port
-+ * @dev: the PCI device
-+ *
-+ * Return 0 if the device is capable of generating AtomicOp requests,
-+ * all upstream bridges support AtomicOp routing, egress blocking is disabled
-+ * on all upstream ports, and the root port supports 32-bit, 64-bit and/or
-+ * 128-bit AtomicOp completion, or negative otherwise.
-+ */
-+int pci_enable_atomic_ops_to_root(struct pci_dev *dev)
-+{
-+ struct pci_bus *bus = dev->bus;
-+
-+ if (!pci_is_pcie(dev))
-+ return -EINVAL;
-+
-+ switch (pci_pcie_type(dev)) {
-+ /*
-+ * PCIe 3.0, 6.15 specifies that endpoints and root ports are permitted
-+ * to implement AtomicOp requester capabilities.
-+ */
-+ case PCI_EXP_TYPE_ENDPOINT:
-+ case PCI_EXP_TYPE_LEG_END:
-+ case PCI_EXP_TYPE_RC_END:
-+ break;
-+ default:
-+ return -EINVAL;
-+ }
-+
-+ while (bus->parent) {
-+ struct pci_dev *bridge = bus->self;
-+ u32 cap;
-+
-+ pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &cap);
-+
-+ switch (pci_pcie_type(bridge)) {
-+ /*
-+ * Upstream, downstream and root ports may implement AtomicOp
-+ * routing capabilities. AtomicOp routing via a root port is
-+ * not considered.
-+ */
-+ case PCI_EXP_TYPE_UPSTREAM:
-+ case PCI_EXP_TYPE_DOWNSTREAM:
-+ if (!(cap & PCI_EXP_DEVCAP2_ATOMIC_ROUTE))
-+ return -EINVAL;
-+ break;
-+
-+ /*
-+ * Root ports are permitted to implement AtomicOp completion
-+ * capabilities.
-+ */
-+ case PCI_EXP_TYPE_ROOT_PORT:
-+ if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
-+ PCI_EXP_DEVCAP2_ATOMIC_COMP64 |
-+ PCI_EXP_DEVCAP2_ATOMIC_COMP128)))
-+ return -EINVAL;
-+ break;
-+ }
-+
-+ /*
-+ * Upstream ports may block AtomicOps on egress.
-+ */
-+ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM) {
-+ u32 ctl2;
-+
-+ pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2,
-+ &ctl2);
-+ if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_BLOCK)
-+ return -EINVAL;
-+ }
-+
-+ bus = bus->parent;
-+ }
-+
-+ pcie_capability_set_word(dev, PCI_EXP_DEVCTL2,
-+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
-+
-+ return 0;
-+}
-+EXPORT_SYMBOL(pci_enable_atomic_ops_to_root);
-+
-+/**
- * pci_swizzle_interrupt_pin - swizzle INTx for device behind bridge
- * @dev: the PCI device
- * @pin: the INTx pin (1=INTA, 2=INTB, 3=INTC, 4=INTD)
-diff --git a/include/drm/amd_rdma.h b/include/drm/amd_rdma.h
-new file mode 100644
-index 0000000..b0cab3c
---- /dev/null
-+++ b/include/drm/amd_rdma.h
-@@ -0,0 +1,70 @@
-+/*
-+ * Copyright 2015 Advanced Micro Devices, Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+
-+/* @file This file defined kernel interfaces to communicate with amdkfd */
-+
-+#ifndef AMD_RDMA_H_
-+#define AMD_RDMA_H_
-+
-+
-+/**
-+ * Structure describing information needed to P2P access from another device
-+ * to specific location of GPU memory
-+ */
-+struct amd_p2p_info {
-+ uint64_t va; /**< Specify user virt. address
-+ * which this page table
-+ * described
-+ */
-+ uint64_t size; /**< Specify total size of
-+ * allocation
-+ */
-+ struct pid *pid; /**< Specify process pid to which
-+ * virtual address belongs
-+ */
-+ struct sg_table *pages; /**< Specify DMA/Bus addresses */
-+ void *priv; /**< Pointer set by AMD kernel
-+ * driver
-+ */
-+};
-+
-+/**
-+ * Structure providing function pointers to support rdma/p2p requirements.
-+ * to specific location of GPU memory
-+ */
-+struct amd_rdma_interface {
-+ int (*get_pages)(uint64_t address, uint64_t length, struct pid *pid,
-+ struct amd_p2p_info **amd_p2p_data,
-+ void (*free_callback)(void *client_priv),
-+ void *client_priv);
-+ int (*put_pages)(struct amd_p2p_info **amd_p2p_data);
-+ int (*is_gpu_address)(uint64_t address, struct pid *pid);
-+ int (*get_page_size)(uint64_t address, uint64_t length, struct pid *pid,
-+ unsigned long *page_size);
-+};
-+
-+
-+int amdkfd_query_rdma_interface(const struct amd_rdma_interface **rdma);
-+
-+
-+#endif /* AMD_RDMA_H_ */
-+
-diff --git a/include/linux/pci.h b/include/linux/pci.h
-old mode 100644
-new mode 100755
-index b1abbcc..3df545d
---- a/include/linux/pci.h
-+++ b/include/linux/pci.h
-@@ -2072,6 +2072,7 @@ void pci_request_acs(void);
- bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags);
- bool pci_acs_path_enabled(struct pci_dev *start,
- struct pci_dev *end, u16 acs_flags);
-+int pci_enable_atomic_ops_to_root(struct pci_dev *dev);
-
- #define PCI_VPD_LRDT 0x80 /* Large Resource Data Type */
- #define PCI_VPD_LRDT_ID(x) ((x) | PCI_VPD_LRDT)
-diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
-index 5bb2b45..de5367c 100644
---- a/include/uapi/linux/kfd_ioctl.h
-+++ b/include/uapi/linux/kfd_ioctl.h
-@@ -23,15 +23,15 @@
- #ifndef KFD_IOCTL_H_INCLUDED
- #define KFD_IOCTL_H_INCLUDED
-
--#include <drm/drm.h>
-+#include <linux/types.h>
- #include <linux/ioctl.h>
-
- #define KFD_IOCTL_MAJOR_VERSION 1
--#define KFD_IOCTL_MINOR_VERSION 1
-+#define KFD_IOCTL_MINOR_VERSION 2
-
- struct kfd_ioctl_get_version_args {
-- __u32 major_version; /* from KFD */
-- __u32 minor_version; /* from KFD */
-+ uint32_t major_version; /* from KFD */
-+ uint32_t minor_version; /* from KFD */
- };
-
- /* For kfd_ioctl_create_queue_args.queue_type. */
-@@ -43,36 +43,51 @@ struct kfd_ioctl_get_version_args {
- #define KFD_MAX_QUEUE_PRIORITY 15
-
- struct kfd_ioctl_create_queue_args {
-- __u64 ring_base_address; /* to KFD */
-- __u64 write_pointer_address; /* from KFD */
-- __u64 read_pointer_address; /* from KFD */
-- __u64 doorbell_offset; /* from KFD */
--
-- __u32 ring_size; /* to KFD */
-- __u32 gpu_id; /* to KFD */
-- __u32 queue_type; /* to KFD */
-- __u32 queue_percentage; /* to KFD */
-- __u32 queue_priority; /* to KFD */
-- __u32 queue_id; /* from KFD */
--
-- __u64 eop_buffer_address; /* to KFD */
-- __u64 eop_buffer_size; /* to KFD */
-- __u64 ctx_save_restore_address; /* to KFD */
-- __u64 ctx_save_restore_size; /* to KFD */
-+ uint64_t ring_base_address; /* to KFD */
-+ uint64_t write_pointer_address; /* from KFD */
-+ uint64_t read_pointer_address; /* from KFD */
-+ uint64_t doorbell_offset; /* from KFD */
-+
-+ uint32_t ring_size; /* to KFD */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t queue_type; /* to KFD */
-+ uint32_t queue_percentage; /* to KFD */
-+ uint32_t queue_priority; /* to KFD */
-+ uint32_t queue_id; /* from KFD */
-+
-+ uint64_t eop_buffer_address; /* to KFD */
-+ uint64_t eop_buffer_size; /* to KFD */
-+ uint64_t ctx_save_restore_address; /* to KFD */
-+ uint32_t ctx_save_restore_size; /* to KFD */
-+ uint32_t ctl_stack_size; /* to KFD */
- };
-
- struct kfd_ioctl_destroy_queue_args {
-- __u32 queue_id; /* to KFD */
-- __u32 pad;
-+ uint32_t queue_id; /* to KFD */
-+ uint32_t pad;
- };
-
- struct kfd_ioctl_update_queue_args {
-- __u64 ring_base_address; /* to KFD */
-+ uint64_t ring_base_address; /* to KFD */
-+
-+ uint32_t queue_id; /* to KFD */
-+ uint32_t ring_size; /* to KFD */
-+ uint32_t queue_percentage; /* to KFD */
-+ uint32_t queue_priority; /* to KFD */
-+};
-
-- __u32 queue_id; /* to KFD */
-- __u32 ring_size; /* to KFD */
-- __u32 queue_percentage; /* to KFD */
-- __u32 queue_priority; /* to KFD */
-+struct kfd_ioctl_set_cu_mask_args {
-+ uint32_t queue_id; /* to KFD */
-+ uint32_t num_cu_mask; /* to KFD */
-+ uint64_t cu_mask_ptr; /* to KFD */
-+};
-+
-+struct kfd_ioctl_get_queue_wave_state_args {
-+ uint64_t ctl_stack_address; /* to KFD */
-+ uint32_t ctl_stack_used_size; /* from KFD */
-+ uint32_t save_area_used_size; /* from KFD */
-+ uint32_t queue_id; /* to KFD */
-+ uint32_t pad;
- };
-
- /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
-@@ -80,13 +95,20 @@ struct kfd_ioctl_update_queue_args {
- #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
-
- struct kfd_ioctl_set_memory_policy_args {
-- __u64 alternate_aperture_base; /* to KFD */
-- __u64 alternate_aperture_size; /* to KFD */
-+ uint64_t alternate_aperture_base; /* to KFD */
-+ uint64_t alternate_aperture_size; /* to KFD */
-+
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t default_policy; /* to KFD */
-+ uint32_t alternate_policy; /* to KFD */
-+ uint32_t pad;
-+};
-
-- __u32 gpu_id; /* to KFD */
-- __u32 default_policy; /* to KFD */
-- __u32 alternate_policy; /* to KFD */
-- __u32 pad;
-+struct kfd_ioctl_set_trap_handler_args {
-+ uint64_t tba_addr;
-+ uint64_t tma_addr;
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t pad;
- };
-
- /*
-@@ -97,35 +119,52 @@ struct kfd_ioctl_set_memory_policy_args {
- */
-
- struct kfd_ioctl_get_clock_counters_args {
-- __u64 gpu_clock_counter; /* from KFD */
-- __u64 cpu_clock_counter; /* from KFD */
-- __u64 system_clock_counter; /* from KFD */
-- __u64 system_clock_freq; /* from KFD */
-+ uint64_t gpu_clock_counter; /* from KFD */
-+ uint64_t cpu_clock_counter; /* from KFD */
-+ uint64_t system_clock_counter; /* from KFD */
-+ uint64_t system_clock_freq; /* from KFD */
-
-- __u32 gpu_id; /* to KFD */
-- __u32 pad;
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t pad;
- };
-
- #define NUM_OF_SUPPORTED_GPUS 7
-
- struct kfd_process_device_apertures {
-- __u64 lds_base; /* from KFD */
-- __u64 lds_limit; /* from KFD */
-- __u64 scratch_base; /* from KFD */
-- __u64 scratch_limit; /* from KFD */
-- __u64 gpuvm_base; /* from KFD */
-- __u64 gpuvm_limit; /* from KFD */
-- __u32 gpu_id; /* from KFD */
-- __u32 pad;
-+ uint64_t lds_base; /* from KFD */
-+ uint64_t lds_limit; /* from KFD */
-+ uint64_t scratch_base; /* from KFD */
-+ uint64_t scratch_limit; /* from KFD */
-+ uint64_t gpuvm_base; /* from KFD */
-+ uint64_t gpuvm_limit; /* from KFD */
-+ uint32_t gpu_id; /* from KFD */
-+ uint32_t pad;
- };
-
-+/* This IOCTL and the limited NUM_OF_SUPPORTED_GPUS is deprecated. Use
-+ * kfd_ioctl_get_process_apertures_new instead, which supports
-+ * arbitrary numbers of GPUs.
-+ */
- struct kfd_ioctl_get_process_apertures_args {
- struct kfd_process_device_apertures
- process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
-
- /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-- __u32 num_of_nodes;
-- __u32 pad;
-+ uint32_t num_of_nodes;
-+ uint32_t pad;
-+};
-+
-+struct kfd_ioctl_get_process_apertures_new_args {
-+ /* User allocated. Pointer to struct kfd_process_device_apertures
-+ * filled in by Kernel
-+ */
-+ uint64_t kfd_process_device_apertures_ptr;
-+ /* to KFD - indicates amount of memory present in
-+ * kfd_process_device_apertures_ptr
-+ * from KFD - Number of entries filled by KFD.
-+ */
-+ uint32_t num_of_nodes;
-+ uint32_t pad;
- };
-
- #define MAX_ALLOWED_NUM_POINTS 100
-@@ -133,103 +172,245 @@ struct kfd_ioctl_get_process_apertures_args {
- #define MAX_ALLOWED_WAC_BUFF_SIZE 128
-
- struct kfd_ioctl_dbg_register_args {
-- __u32 gpu_id; /* to KFD */
-- __u32 pad;
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t pad;
- };
-
- struct kfd_ioctl_dbg_unregister_args {
-- __u32 gpu_id; /* to KFD */
-- __u32 pad;
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t pad;
- };
-
- struct kfd_ioctl_dbg_address_watch_args {
-- __u64 content_ptr; /* a pointer to the actual content */
-- __u32 gpu_id; /* to KFD */
-- __u32 buf_size_in_bytes; /*including gpu_id and buf_size */
-+ uint64_t content_ptr; /* a pointer to the actual content */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
- };
-
- struct kfd_ioctl_dbg_wave_control_args {
-- __u64 content_ptr; /* a pointer to the actual content */
-- __u32 gpu_id; /* to KFD */
-- __u32 buf_size_in_bytes; /*including gpu_id and buf_size */
-+ uint64_t content_ptr; /* a pointer to the actual content */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
- };
-
- /* Matching HSA_EVENTTYPE */
--#define KFD_IOC_EVENT_SIGNAL 0
--#define KFD_IOC_EVENT_NODECHANGE 1
--#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
--#define KFD_IOC_EVENT_HW_EXCEPTION 3
--#define KFD_IOC_EVENT_SYSTEM_EVENT 4
--#define KFD_IOC_EVENT_DEBUG_EVENT 5
--#define KFD_IOC_EVENT_PROFILE_EVENT 6
--#define KFD_IOC_EVENT_QUEUE_EVENT 7
--#define KFD_IOC_EVENT_MEMORY 8
--
--#define KFD_IOC_WAIT_RESULT_COMPLETE 0
--#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
--#define KFD_IOC_WAIT_RESULT_FAIL 2
--
--#define KFD_SIGNAL_EVENT_LIMIT 256
-+#define KFD_IOC_EVENT_SIGNAL 0
-+#define KFD_IOC_EVENT_NODECHANGE 1
-+#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
-+#define KFD_IOC_EVENT_HW_EXCEPTION 3
-+#define KFD_IOC_EVENT_SYSTEM_EVENT 4
-+#define KFD_IOC_EVENT_DEBUG_EVENT 5
-+#define KFD_IOC_EVENT_PROFILE_EVENT 6
-+#define KFD_IOC_EVENT_QUEUE_EVENT 7
-+#define KFD_IOC_EVENT_MEMORY 8
-+
-+#define KFD_IOC_WAIT_RESULT_COMPLETE 0
-+#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
-+#define KFD_IOC_WAIT_RESULT_FAIL 2
-+
-+#define KFD_SIGNAL_EVENT_LIMIT 4096
-
- struct kfd_ioctl_create_event_args {
-- __u64 event_page_offset; /* from KFD */
-- __u32 event_trigger_data; /* from KFD - signal events only */
-- __u32 event_type; /* to KFD */
-- __u32 auto_reset; /* to KFD */
-- __u32 node_id; /* to KFD - only valid for certain
-+ uint64_t event_page_offset; /* from KFD */
-+ uint32_t event_trigger_data; /* from KFD - signal events only */
-+ uint32_t event_type; /* to KFD */
-+ uint32_t auto_reset; /* to KFD */
-+ uint32_t node_id; /* to KFD - only valid for certain
- event types */
-- __u32 event_id; /* from KFD */
-- __u32 event_slot_index; /* from KFD */
-+ uint32_t event_id; /* from KFD */
-+ uint32_t event_slot_index; /* from KFD */
- };
-
- struct kfd_ioctl_destroy_event_args {
-- __u32 event_id; /* to KFD */
-- __u32 pad;
-+ uint32_t event_id; /* to KFD */
-+ uint32_t pad;
- };
-
- struct kfd_ioctl_set_event_args {
-- __u32 event_id; /* to KFD */
-- __u32 pad;
-+ uint32_t event_id; /* to KFD */
-+ uint32_t pad;
- };
-
- struct kfd_ioctl_reset_event_args {
-- __u32 event_id; /* to KFD */
-- __u32 pad;
-+ uint32_t event_id; /* to KFD */
-+ uint32_t pad;
- };
-
- struct kfd_memory_exception_failure {
-- __u32 NotPresent; /* Page not present or supervisor privilege */
-- __u32 ReadOnly; /* Write access to a read-only page */
-- __u32 NoExecute; /* Execute access to a page marked NX */
-- __u32 pad;
-+ uint32_t NotPresent; /* Page not present or supervisor privilege */
-+ uint32_t ReadOnly; /* Write access to a read-only page */
-+ uint32_t NoExecute; /* Execute access to a page marked NX */
-+ uint32_t imprecise; /* Can't determine the exact fault address */
- };
-
--/* memory exception data*/
-+/* memory exception data */
- struct kfd_hsa_memory_exception_data {
- struct kfd_memory_exception_failure failure;
-- __u64 va;
-- __u32 gpu_id;
-- __u32 pad;
-+ uint64_t va;
-+ uint32_t gpu_id;
-+ uint32_t pad;
- };
-
--/* Event data*/
-+/* Event data */
- struct kfd_event_data {
- union {
- struct kfd_hsa_memory_exception_data memory_exception_data;
- }; /* From KFD */
-- __u64 kfd_event_data_ext; /* pointer to an extension structure
-- for future exception types */
-- __u32 event_id; /* to KFD */
-- __u32 pad;
-+ uint64_t kfd_event_data_ext; /* pointer to an extension structure
-+ for future exception types */
-+ uint32_t event_id; /* to KFD */
-+ uint32_t pad;
- };
-
- struct kfd_ioctl_wait_events_args {
-- __u64 events_ptr; /* pointed to struct
-+ uint64_t events_ptr; /* pointed to struct
- kfd_event_data array, to KFD */
-- __u32 num_events; /* to KFD */
-- __u32 wait_for_all; /* to KFD */
-- __u32 timeout; /* to KFD */
-- __u32 wait_result; /* from KFD */
-+ uint32_t num_events; /* to KFD */
-+ uint32_t wait_for_all; /* to KFD */
-+ uint32_t timeout; /* to KFD */
-+ uint32_t wait_result; /* from KFD */
-+};
-+
-+struct kfd_ioctl_alloc_memory_of_scratch_args {
-+ uint64_t va_addr; /* to KFD */
-+ uint64_t size; /* to KFD */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t pad;
-+};
-+
-+/* Allocation flags: memory types */
-+#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
-+/* Allocation flags: attributes/access options */
-+#define KFD_IOC_ALLOC_MEM_FLAGS_NONPAGED (1 << 31)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_READONLY (1 << 30)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTE_ACCESS (1 << 26)
-+#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 25)
-+
-+struct kfd_ioctl_alloc_memory_of_gpu_args {
-+ uint64_t va_addr; /* to KFD */
-+ uint64_t size; /* to KFD */
-+ uint64_t handle; /* from KFD */
-+ uint64_t mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t flags;
-+};
-+
-+struct kfd_ioctl_free_memory_of_gpu_args {
-+ uint64_t handle; /* to KFD */
-+};
-+
-+struct kfd_ioctl_map_memory_to_gpu_args {
-+ uint64_t handle; /* to KFD */
-+ uint64_t device_ids_array_ptr; /* to KFD */
-+ uint32_t device_ids_array_size; /* to KFD */
-+ uint32_t pad;
-+};
-+
-+struct kfd_ioctl_unmap_memory_from_gpu_args {
-+ uint64_t handle; /* to KFD */
-+ uint64_t device_ids_array_ptr; /* to KFD */
-+ uint32_t device_ids_array_size; /* to KFD */
-+ uint32_t pad;
-+};
-+
-+struct kfd_ioctl_set_process_dgpu_aperture_args {
-+ uint64_t dgpu_base;
-+ uint64_t dgpu_limit;
-+ uint32_t gpu_id;
-+ uint32_t pad;
-+};
-+
-+struct kfd_ioctl_get_dmabuf_info_args {
-+ uint64_t size; /* from KFD */
-+ uint64_t metadata_ptr; /* to KFD */
-+ uint32_t metadata_size; /* to KFD (space allocated by user)
-+ * from KFD (actual metadata size) */
-+ uint32_t gpu_id; /* from KFD */
-+ uint32_t flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
-+ uint32_t dmabuf_fd; /* to KFD */
-+};
-+
-+struct kfd_ioctl_import_dmabuf_args {
-+ uint64_t va_addr; /* to KFD */
-+ uint64_t handle; /* from KFD */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t dmabuf_fd; /* to KFD */
-+};
-+
-+struct kfd_ioctl_ipc_export_handle_args {
-+ uint64_t handle; /* to KFD */
-+ uint32_t share_handle[4]; /* from KFD */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t pad;
-+};
-+
-+struct kfd_ioctl_ipc_import_handle_args {
-+ uint64_t handle; /* from KFD */
-+ uint64_t va_addr; /* to KFD */
-+ uint64_t mmap_offset; /* from KFD */
-+ uint32_t share_handle[4]; /* to KFD */
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t pad;
-+};
-+
-+struct kfd_ioctl_get_tile_config_args {
-+ /* to KFD: pointer to tile array */
-+ uint64_t tile_config_ptr;
-+ /* to KFD: pointer to macro tile array */
-+ uint64_t macro_tile_config_ptr;
-+ /* to KFD: array size allocated by user mode
-+ * from KFD: array size filled by kernel
-+ */
-+ uint32_t num_tile_configs;
-+ /* to KFD: array size allocated by user mode
-+ * from KFD: array size filled by kernel
-+ */
-+ uint32_t num_macro_tile_configs;
-+
-+ uint32_t gpu_id; /* to KFD */
-+ uint32_t gb_addr_config; /* from KFD */
-+ uint32_t num_banks; /* from KFD */
-+ uint32_t num_ranks; /* from KFD */
-+ /* struct size can be extended later if needed
-+ * without breaking ABI compatibility
-+ */
-+};
-+
-+struct kfd_memory_range {
-+ uint64_t va_addr;
-+ uint64_t size;
-+};
-+
-+/* flags definitions
-+ * BIT0: 0: read operation, 1: write operation.
-+ * This also identifies if the src or dst array belongs to remote process
-+ */
-+#define KFD_CROSS_MEMORY_RW_BIT (1 << 0)
-+#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT)
-+#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT)
-+#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT)
-+
-+struct kfd_ioctl_cross_memory_copy_args {
-+ /* to KFD: Process ID of the remote process */
-+ uint32_t pid;
-+ /* to KFD: See above definition */
-+ uint32_t flags;
-+ /* to KFD: Source GPU VM range */
-+ uint64_t src_mem_range_array;
-+ /* to KFD: Size of above array */
-+ uint64_t src_mem_array_size;
-+ /* to KFD: Destination GPU VM range */
-+ uint64_t dst_mem_range_array;
-+ /* to KFD: Size of above array */
-+ uint64_t dst_mem_array_size;
-+ /* from KFD: Total amount of bytes copied */
-+ uint64_t bytes_copied;
- };
-
-
-@@ -287,7 +468,56 @@ struct kfd_ioctl_wait_events_args {
- #define AMDKFD_IOC_DBG_WAVE_CONTROL \
- AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
-
-+#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \
-+ AMDKFD_IOWR(0x11, struct kfd_ioctl_alloc_memory_of_gpu_args)
-+
-+#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \
-+ AMDKFD_IOWR(0x12, struct kfd_ioctl_free_memory_of_gpu_args)
-+
-+#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \
-+ AMDKFD_IOWR(0x13, struct kfd_ioctl_map_memory_to_gpu_args)
-+
-+#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
-+ AMDKFD_IOWR(0x14, struct kfd_ioctl_unmap_memory_from_gpu_args)
-+
-+#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH \
-+ AMDKFD_IOWR(0x15, struct kfd_ioctl_alloc_memory_of_scratch_args)
-+
-+#define AMDKFD_IOC_SET_CU_MASK \
-+ AMDKFD_IOW(0x16, struct kfd_ioctl_set_cu_mask_args)
-+
-+#define AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE \
-+ AMDKFD_IOW(0x17, \
-+ struct kfd_ioctl_set_process_dgpu_aperture_args)
-+
-+#define AMDKFD_IOC_SET_TRAP_HANDLER \
-+ AMDKFD_IOW(0x18, struct kfd_ioctl_set_trap_handler_args)
-+
-+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
-+ AMDKFD_IOWR(0x19, struct kfd_ioctl_get_process_apertures_new_args)
-+
-+#define AMDKFD_IOC_GET_DMABUF_INFO \
-+ AMDKFD_IOWR(0x1A, struct kfd_ioctl_get_dmabuf_info_args)
-+
-+#define AMDKFD_IOC_IMPORT_DMABUF \
-+ AMDKFD_IOWR(0x1B, struct kfd_ioctl_import_dmabuf_args)
-+
-+#define AMDKFD_IOC_GET_TILE_CONFIG \
-+ AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_tile_config_args)
-+
-+#define AMDKFD_IOC_IPC_IMPORT_HANDLE \
-+ AMDKFD_IOWR(0x1D, struct kfd_ioctl_ipc_import_handle_args)
-+
-+#define AMDKFD_IOC_IPC_EXPORT_HANDLE \
-+ AMDKFD_IOWR(0x1E, struct kfd_ioctl_ipc_export_handle_args)
-+
-+#define AMDKFD_IOC_CROSS_MEMORY_COPY \
-+ AMDKFD_IOWR(0x1F, struct kfd_ioctl_cross_memory_copy_args)
-+
-+#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \
-+ AMDKFD_IOWR(0x20, struct kfd_ioctl_get_queue_wave_state_args)
-+
- #define AMDKFD_COMMAND_START 0x01
--#define AMDKFD_COMMAND_END 0x11
-+#define AMDKFD_COMMAND_END 0x21
-
- #endif
-diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
-old mode 100644
-new mode 100755
-index 87c2c84..1256851
---- a/include/uapi/linux/pci_regs.h
-+++ b/include/uapi/linux/pci_regs.h
-@@ -624,7 +624,9 @@
- #define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */
- #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */
- #define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */
-+#define PCI_EXP_DEVCAP2_ATOMIC_COMP32 0x00000080 /* 32b AtomicOp completion */
- #define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* Atomic 64-bit compare */
-+#define PCI_EXP_DEVCAP2_ATOMIC_COMP128 0x00000200 /* 128b AtomicOp completion*/
- #define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */
- #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */
- #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */
-@@ -634,6 +636,7 @@
- #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */
- #define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */
- #define PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK 0x0080 /* Block atomic egress */
-+#define PCI_EXP_DEVCTL2_ATOMIC_BLOCK 0x0040 /* Block AtomicOp on egress */
- #define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */
- #define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */
- #define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */
-diff --git a/kernel/fork.c b/kernel/fork.c
-index a19ee25..70d8d5b 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -1082,6 +1082,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
-
- return mm;
- }
-+EXPORT_SYMBOL_GPL(mm_access);
-
- static void complete_vfork_done(struct task_struct *tsk)
- {
---
-2.7.4
-