From 86922491a22c1d248435465d25697e5cfaf144a4 Mon Sep 17 00:00:00 2001
From: Sanjay R Mehta <sanju.mehta@amd.com>
Date: Thu, 17 May 2018 17:11:08 +0530
Subject: [PATCH 3445/4131] Revert "compilation fix for raven rocm"

This reverts commit b8ab947de73c88baaf2f1ae4aecdf420cdeb0181.
---
 drivers/gpu/drm/amd/amdgpu/Makefile                |    0
 drivers/gpu/drm/amd/amdgpu/amdgpu.h                |    5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c         |  152 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h         |   69 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c   |   43 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  |   39 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  |   33 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c  |   44 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c   | 1188 ++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c            |   14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c             |  119 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h             |   21 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |   16 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h    |   28 +-
 .../drm/amd/powerplay/hwmgr/cz_clockpowergating.c  |    8 +-
 include/uapi/linux/kfd_ioctl.h                     |   28 +-
 16 files changed, 920 insertions(+), 887 deletions(-)
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/Makefile
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu.h
 mode change 100644 => 100755 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
 mode change 100755 => 100644 drivers/gpu/drm/amd/amdkfd/kfd_device.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
old mode 100644
new mode 100755
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
old mode 100644
new mode 100755
index 18478d4..e8017ee
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -130,7 +130,6 @@ extern int amdgpu_job_hang_limit;
 extern int amdgpu_lbpw;
 extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
-extern int amdgpu_emu_mode;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
@@ -193,8 +192,8 @@ struct amdgpu_cs_parser;
 struct amdgpu_job;
 struct amdgpu_irq_src;
 struct amdgpu_fpriv;
-struct amdgpu_bo_va_mapping;
 struct kfd_vm_fault_info;
+struct amdgpu_bo_va_mapping;
 
 enum amdgpu_cp_irq {
 	AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -412,8 +411,6 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
 
-//extern const struct dma_buf_ops amdgpu_dmabuf_ops;
-
 /* sub-allocation manager, it has to be protected by another lock.
  * By conception this is an helper for other part of the driver
  * like the indirect buffer or semaphore, which both have their
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
old mode 100644
new mode 100755
index c24a2f4..fdaf5b3
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#undef pr_fmt
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
 #include "amdgpu_amdkfd.h"
@@ -29,10 +30,12 @@
 #include "amdgpu_gfx.h"
 #include <linux/module.h>
 
+#define AMDKFD_SKIP_UNCOMPILED_CODE 1
+
 const struct kgd2kfd_calls *kgd2kfd;
 bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
 
-static unsigned int compute_vmid_bitmap = 0xFF00;
+unsigned int global_compute_vmid_bitmap = 0xFF00;
 
 int amdgpu_amdkfd_init(void)
 {
@@ -95,6 +98,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 		break;
 	case CHIP_VEGA10:
 	case CHIP_RAVEN:
+		if (adev->asic_type == CHIP_RAVEN) {
+			dev_dbg(adev->dev, "DKMS installed kfd does not support Raven for kernel < 4.16\n");
+			return;
+		}
 		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
 		break;
 	default:
@@ -146,12 +153,10 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
-			.compute_vmid_bitmap = compute_vmid_bitmap,
+			.compute_vmid_bitmap = global_compute_vmid_bitmap,
 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
-			.gpuvm_size = min(adev->vm_manager.max_pfn
-					  << AMDGPU_GPU_PAGE_SHIFT,
-					  AMDGPU_VA_HOLE_START),
+			.gpuvm_size = (uint64_t)amdgpu_vm_size << 30,
 			.drm_render_minor = adev->ddev->render->index
 		};
 
@@ -268,6 +273,61 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
 	amdgpu_device_gpu_recover(adev, NULL, false);
 }
 
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+				uint32_t vmid, uint64_t gpu_addr,
+				uint32_t *ib_cmd, uint32_t ib_len)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct amdgpu_ring *ring;
+	struct dma_fence *f = NULL;
+	int ret;
+
+	switch (engine) {
+	case KGD_ENGINE_MEC1:
+		ring = &adev->gfx.compute_ring[0];
+		break;
+	case KGD_ENGINE_SDMA1:
+		ring = &adev->sdma.instance[0].ring;
+		break;
+	case KGD_ENGINE_SDMA2:
+		ring = &adev->sdma.instance[1].ring;
+		break;
+	default:
+		pr_err("Invalid engine in IB submission: %d\n", engine);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+	if (ret)
+		goto err;
+
+	ib = &job->ibs[0];
+	memset(ib, 0, sizeof(struct amdgpu_ib));
+
+	ib->gpu_addr = gpu_addr;
+	ib->ptr = ib_cmd;
+	ib->length_dw = ib_len;
+	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
+	job->vmid = vmid;
+
+	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
+	if (ret) {
+		DRM_ERROR("amdgpu: failed to schedule IB.\n");
+		goto err_ib_sched;
+	}
+
+	ret = dma_fence_wait(f, false);
+
+err_ib_sched:
+	dma_fence_put(f);
+	amdgpu_job_free(job);
+err:
+	return ret;
+}
+
 u32 pool_to_domain(enum kgd_memory_pool p)
 {
 	switch (p) {
@@ -356,7 +416,8 @@ void get_local_mem_info(struct kgd_dev *kgd,
 	aper_limit = adev->gmc.aper_base + adev->gmc.aper_size;
 
 	memset(mem_info, 0, sizeof(*mem_info));
-	if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) {
+	if (!(adev->gmc.aper_base & address_mask ||
+			aper_limit & address_mask)) {
 		mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
 		mem_info->local_mem_size_private = adev->gmc.real_vram_size -
 				adev->gmc.visible_vram_size;
@@ -371,11 +432,6 @@ void get_local_mem_info(struct kgd_dev *kgd,
 			mem_info->local_mem_size_public,
 			mem_info->local_mem_size_private);
 
-	if (amdgpu_emu_mode == 1) {
-		mem_info->mem_clk_max = 100;
-		return;
-	}
-
 	if (amdgpu_sriov_vf(adev))
 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
 	else
@@ -396,9 +452,6 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
 	/* the sclk is in quantas of 10kHz */
-        if (amdgpu_emu_mode == 1)
-                return 100;
-
 
         if (amdgpu_sriov_vf(adev))
                 return adev->clock.default_sclk / 100;
@@ -458,8 +511,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
         adev = obj->dev->dev_private;
         bo = gem_to_amdgpu_bo(obj);
 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-                                    AMDGPU_GEM_DOMAIN_GTT)))
-                /* Only VRAM and GTT BOs are supported */
+                                    AMDGPU_GEM_DOMAIN_GTT |
+                                    AMDGPU_GEM_DOMAIN_DGMA)))
+                /* Only VRAM, GTT and DGMA BOs are supported */
                 goto out_put;
 
         r = 0;
@@ -473,9 +527,12 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
                 r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
                                            metadata_size, &metadata_flags);
         if (flags) {
-                *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-                        ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
-
+                /* If the preferred domain is DGMA, set flags to VRAM because
+                 * KFD doesn't support allocating DGMA memory
+                 */
+                *flags = (bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+                                AMDGPU_GEM_DOMAIN_DGMA)) ?
+                                ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
                 if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
                         *flags |= ALLOC_MEM_FLAGS_PUBLIC;
         }
@@ -493,66 +550,11 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
 	return usage;
 }
 
-int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
-				uint32_t vmid, uint64_t gpu_addr,
-				uint32_t *ib_cmd, uint32_t ib_len)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-	struct amdgpu_job *job;
-	struct amdgpu_ib *ib;
-	struct amdgpu_ring *ring;
-	struct dma_fence *f = NULL;
-	int ret;
-
-	switch (engine) {
-	case KGD_ENGINE_MEC1:
-		ring = &adev->gfx.compute_ring[0];
-		break;
-	case KGD_ENGINE_SDMA1:
-		ring = &adev->sdma.instance[0].ring;
-		break;
-	case KGD_ENGINE_SDMA2:
-		ring = &adev->sdma.instance[1].ring;
-		break;
-	default:
-		pr_err("Invalid engine in IB submission: %d\n", engine);
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
-	if (ret)
-		goto err;
-
-	ib = &job->ibs[0];
-	memset(ib, 0, sizeof(struct amdgpu_ib));
-
-	ib->gpu_addr = gpu_addr;
-	ib->ptr = ib_cmd;
-	ib->length_dw = ib_len;
-	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
-	job->vmid = vmid;
-
-	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
-	if (ret) {
-		DRM_ERROR("amdgpu: failed to schedule IB.\n");
-		goto err_ib_sched;
-	}
-
-	ret = dma_fence_wait(f, false);
-
-err_ib_sched:
-	dma_fence_put(f);
-	amdgpu_job_free(job);
-err:
-	return ret;
-}
-
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev,
 			u32 vmid)
 {
 	if (adev->kfd) {
-		if ((1 << vmid) & compute_vmid_bitmap)
+		if ((1 << vmid) & global_compute_vmid_bitmap)
 			return true;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index f0efde7..1fb4915 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -55,6 +55,7 @@ struct kgd_mem {
 	struct ttm_validate_buffer resv_list;
         uint32_t domain;
         unsigned int mapped_to_gpu_memory;
+	void *kptr;
         uint64_t va;
 
 	uint32_t mapping_flags;
@@ -65,21 +66,24 @@ struct kgd_mem {
 
         struct amdgpu_sync sync;
 
-	bool aql_queue;
+        /* flags bitfied */
+        bool coherent      : 1;
+        bool no_substitute : 1;
+        bool aql_queue     : 1;
 };
 
 
 /* KFD Memory Eviction */
 struct amdgpu_amdkfd_fence {
 	struct dma_fence base;
-	struct mm_struct *mm;
+	void *mm;
 	spinlock_t lock;
 	char timeline_name[TASK_COMM_LEN];
 };
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-                                                       struct mm_struct *mm);
-bool amd_kfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+                                                       void *mm);
+bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
 
 struct amdkfd_process_info {
@@ -104,6 +108,27 @@ struct amdkfd_process_info {
 	struct pid *pid;
 };
 
+/* struct amdkfd_vm -
+ * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs
+ * belonging to a KFD process. All the VMs belonging to the same process point
+ * to the same amdkfd_process_info.
+ */
+struct amdkfd_vm {
+	/* Keep base as the first parameter for pointer compatibility between
+	 * amdkfd_vm and amdgpu_vm.
+	 */
+	struct amdgpu_vm base;
+
+	/* List node in amdkfd_process_info.vm_list_head*/
+	struct list_head vm_list_node;
+
+	struct amdgpu_device *adev;
+	/* Points to the KFD process VM info*/
+	struct amdkfd_process_info *process_info;
+
+	uint64_t pd_phys_addr;
+};
+
 int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
@@ -119,6 +144,8 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
+					    struct dma_fence **ef);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
@@ -137,6 +164,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
 void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
 
 /* Shared API */
+int map_bo(struct amdgpu_device *rdev, uint64_t va, void *vm,
+		struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va);
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr);
@@ -170,38 +199,31 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 	})
 
 /* GPUVM API */
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-					  void **process_info,
-					  struct dma_fence **ef);
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					   struct file *filp,
-					   void **vm, void **process_info,
-					   struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm);
-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		struct kgd_dev *kgd, uint64_t va, uint64_t size,
 		void *vm, struct kgd_mem **mem,
 		uint64_t *offset, uint32_t flags);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem);
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
-int amdgpu_amdkfd_gpuvm_sync_memory(
-		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
-		struct kgd_mem *mem, void **kptr, uint64_t *size);
-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
-					    struct dma_fence **ef);
 
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+					  void **process_info,
+					  struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
+
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 					      struct kfd_vm_fault_info *info);
 
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+		struct kgd_mem *mem, void **kptr);
 
 int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd,
 		struct kgd_mem *mem, uint64_t offset,
@@ -216,9 +238,10 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm,
 				      struct kgd_mem *mem,
 				      struct dma_buf **dmabuf);
+int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm);
+int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm);
 
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
-
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index cf2f1e9..3961937 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ * Copyright 2016 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,18 +20,18 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <linux/dma-fence.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/stacktrace.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/sched/mm.h>
 #include "amdgpu_amdkfd.h"
 
 const struct dma_fence_ops amd_kfd_fence_ops;
 static atomic_t fence_seq = ATOMIC_INIT(0);
 
+static int amd_kfd_fence_signal(struct dma_fence *f);
+
 /* Eviction Fence
  * Fence helper functions to deal with KFD memory eviction.
  * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
@@ -60,7 +60,7 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
  */
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-						       struct mm_struct *mm)
+						       void *mm)
 {
 	struct amdgpu_amdkfd_fence *fence = NULL;
 
@@ -68,8 +68,10 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
 	if (fence == NULL)
 		return NULL;
 
-	/* This reference gets released in amd_kfd_fence_release */
-	mmgrab(mm);
+	/* mm_struct mm is used as void pointer to identify the parent
+	 * KFD process. Don't dereference it. Fence and any threads using
+	 * mm is guranteed to be released before process termination.
+	 */
 	fence->mm = mm;
 	get_task_comm(fence->timeline_name, current);
 	spin_lock_init(&fence->lock);
@@ -122,31 +124,45 @@ static bool amd_kfd_fence_enable_signaling(struct dma_fence *f)
 	if (dma_fence_is_signaled(f))
 		return true;
 
-	if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
+	if (!kgd2kfd->schedule_evict_and_restore_process(
+				(struct mm_struct *)fence->mm, f))
 		return true;
 
 	return false;
 }
 
+static int amd_kfd_fence_signal(struct dma_fence *f)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(f->lock, flags);
+	/* Set enabled bit so cb will called */
+	set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &f->flags);
+	ret = dma_fence_signal_locked(f);
+	spin_unlock_irqrestore(f->lock, flags);
+
+	return ret;
+}
+
 /**
  * amd_kfd_fence_release - callback that fence can be freed
  *
  * @fence: fence
  *
  * This function is called when the reference count becomes zero.
- * Drops the mm_struct reference and RCU schedules freeing up the fence.
- */
+ * It just RCU schedules freeing up the fence.
+*/
 static void amd_kfd_fence_release(struct dma_fence *f)
 {
 	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
-
 	/* Unconditionally signal the fence. The process is getting
 	 * terminated.
 	 */
 	if (WARN_ON(!fence))
 		return; /* Not an amdgpu_amdkfd_fence */
 
-	mmdrop(fence->mm);
+	amd_kfd_fence_signal(f);
 	kfree_rcu(f, rcu);
 }
 
@@ -156,8 +172,8 @@ static void amd_kfd_fence_release(struct dma_fence *f)
  *
  * @f: [IN] fence
  * @mm: [IN] mm that needs to be verified
- */
-bool amd_kfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+*/
+bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm)
 {
 	struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
 
@@ -177,3 +193,4 @@ const struct dma_fence_ops amd_kfd_fence_ops = {
 	.wait = dma_fence_default_wait,
 	.release = amd_kfd_fence_release,
 };
+
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index c541656..fcc1add 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#undef pr_fmt
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
 #include <linux/fdtable.h>
@@ -41,6 +42,8 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 
+#define AMDKFD_SKIP_UNCOMPILED_CODE 1
+
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
 	DRAIN_PIPE,
@@ -89,6 +92,9 @@ union TCP_WATCH_CNTL_BITS {
 	float f32All;
 };
 
+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
+			int fd, uint32_t handle, struct kgd_mem **mem);
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 
 /*
@@ -100,6 +106,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 			uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 			unsigned int vmid);
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+			uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
@@ -140,6 +148,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 							uint8_t vmid);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req);
 static int alloc_memory_of_scratch(struct kgd_dev *kgd,
 					 uint64_t va, uint32_t vmid);
 static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
@@ -170,6 +179,7 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
 	config->num_macro_tile_configs =
 			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
 
+
 	return 0;
 }
 
@@ -180,13 +190,14 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
-	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.open_graphic_handle = open_graphic_handle,
 	.alloc_pasid = amdgpu_pasid_alloc,
 	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -213,6 +224,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
 	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
 	.get_fw_version = get_fw_version,
+	.set_num_of_requests = set_num_of_requests,
 	.get_cu_info = get_cu_info,
 	.alloc_memory_of_scratch = alloc_memory_of_scratch,
 	.write_config_static_mem = write_config_static_mem,
@@ -236,6 +248,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
 	return (struct kfd2kgd_calls *)&kfd2kgd;
 }
 
+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
+				int fd, uint32_t handle, struct kgd_mem **mem)
+{
+	return 0;
+}
+
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 {
 	return (struct amdgpu_device *)kgd;
@@ -319,6 +337,13 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 	return 0;
 }
 
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+				uint32_t hpd_size, uint64_t hpd_gpu_addr)
+{
+	/* amdgpu owns the per-pipe state */
+	return 0;
+}
+
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -927,6 +952,18 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	return hdr->common.ucode_version;
 }
 
+static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req)
+{
+	uint32_t value;
+	struct amdgpu_device *adev = get_amdgpu_device(dev);
+
+	value = RREG32(mmATC_ATS_DEBUG);
+	value &= ~ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR_MASK;
+	value |= (num_of_req << ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR__SHIFT);
+
+	WREG32(mmATC_ATS_DEBUG, value);
+}
+
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 			uint32_t page_table_base)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index dfd0026..ea8e948 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -20,6 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#undef pr_fmt
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
 #include <linux/module.h>
@@ -56,10 +57,15 @@ static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
 };
 
 
+struct vi_sdma_mqd;
+
 static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
 		void *vm, struct kgd_mem **mem);
 static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem);
 
+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
+				int fd, uint32_t handle, struct kgd_mem **mem);
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 
 /*
@@ -72,6 +78,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t sh_mem_bases);
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 		unsigned int vmid);
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+		uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
@@ -111,6 +119,8 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
+static void set_num_of_requests(struct kgd_dev *kgd,
+			uint8_t num_of_requests);
 static int alloc_memory_of_scratch(struct kgd_dev *kgd,
 				 uint64_t va, uint32_t vmid);
 static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
@@ -152,15 +162,16 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
-	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
 	.create_process_gpumem = create_process_gpumem,
 	.destroy_process_gpumem = destroy_process_gpumem,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.open_graphic_handle = open_graphic_handle,
 	.alloc_pasid = amdgpu_pasid_alloc,
 	.free_pasid = amdgpu_pasid_free,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -186,6 +197,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
 	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
 	.get_fw_version = get_fw_version,
+	.set_num_of_requests = set_num_of_requests,
 	.get_cu_info = get_cu_info,
 	.alloc_memory_of_scratch = alloc_memory_of_scratch,
 	.write_config_static_mem = write_config_static_mem,
@@ -221,6 +233,12 @@ static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem)
 
 }
 
+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
+				int fd, uint32_t handle, struct kgd_mem **mem)
+{
+	return 0;
+}
+
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 {
 	return (struct amdgpu_device *)kgd;
@@ -305,6 +323,13 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 	return 0;
 }
 
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+				uint32_t hpd_size, uint64_t hpd_gpu_addr)
+{
+	/* amdgpu owns the per-pipe state */
+	return 0;
+}
+
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
@@ -998,6 +1023,12 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	return hdr->common.ucode_version;
 }
 
+static void set_num_of_requests(struct kgd_dev *kgd,
+			uint8_t num_of_requests)
+{
+	pr_debug("This is a stub\n");
+}
+
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t page_table_base)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index f044739..2b74a65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -19,7 +19,7 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-
+#undef pr_fmt
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
 #include <linux/module.h>
@@ -80,9 +80,6 @@
 #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32		0x0728
 #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX	0
 
-#define V9_PIPE_PER_MEC		(4)
-#define V9_QUEUES_PER_PIPE_MEC	(8)
-
 enum hqd_dequeue_request_type {
 	NO_ACTION = 0,
 	DRAIN_PIPE,
@@ -102,6 +99,9 @@ static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size,
 		void *vm, struct kgd_mem **mem);
 static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem);
 
+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
+				int fd, uint32_t handle, struct kgd_mem **mem);
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 
 /*
@@ -114,6 +114,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t sh_mem_bases);
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 		unsigned int vmid);
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+		uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr,
@@ -154,6 +156,8 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
+static void set_num_of_requests(struct kgd_dev *kgd,
+			uint8_t num_of_requests);
 static int alloc_memory_of_scratch(struct kgd_dev *kgd,
 				 uint64_t va, uint32_t vmid);
 static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
@@ -202,15 +206,16 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
 	.create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
-	.acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
 	.destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
 	.create_process_gpumem = create_process_gpumem,
 	.destroy_process_gpumem = destroy_process_gpumem,
 	.get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+	.open_graphic_handle = open_graphic_handle,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.alloc_pasid = amdgpu_pasid_alloc,
 	.free_pasid = amdgpu_pasid_free,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_pipeline = kgd_init_pipeline,
 	.init_interrupts = kgd_init_interrupts,
 	.hqd_load = kgd_hqd_load,
 	.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -236,6 +241,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
 	.unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
 	.get_fw_version = get_fw_version,
+	.set_num_of_requests = set_num_of_requests,
 	.get_cu_info = get_cu_info,
 	.alloc_memory_of_scratch = alloc_memory_of_scratch,
 	.write_config_static_mem = write_config_static_mem,
@@ -271,6 +277,12 @@ static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem)
 
 }
 
+static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm,
+				int fd, uint32_t handle, struct kgd_mem **mem)
+{
+	return 0;
+}
+
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 {
 	return (struct amdgpu_device *)kgd;
@@ -307,7 +319,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 static uint32_t get_queue_mask(struct amdgpu_device *adev,
 			       uint32_t pipe_id, uint32_t queue_id)
 {
-	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+	unsigned int bit = (pipe_id * adev->gfx.mec.num_pipe_per_mec +
 			    queue_id) & 31;
 
 	return ((uint32_t)1) << bit;
@@ -392,6 +404,13 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 	return 0;
 }
 
+static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
+				uint32_t hpd_size, uint64_t hpd_gpu_addr)
+{
+	/* amdgpu owns the per-pipe state */
+	return 0;
+}
+
 /* TODO - RING0 form of field is obsolete, seems to date back to SI
  * but still works
  */
@@ -908,7 +927,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
 		VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
 
-	mutex_lock(&adev->srbm_mutex);
+	spin_lock(&adev->tlb_invalidation_lock);
 
 	/* Use legacy mode tlb invalidation.
 	 *
@@ -950,9 +969,8 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 					mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
 					(1 << vmid)))
 		cpu_relax();
-
-	mutex_unlock(&adev->srbm_mutex);
-
+	
+	spin_unlock(&adev->tlb_invalidation_lock);
 }
 
 static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
@@ -1181,6 +1199,12 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	return hdr->common.ucode_version;
 }
 
+static void set_num_of_requests(struct kgd_dev *kgd,
+			uint8_t num_of_requests)
+{
+	pr_debug("This is a stub\n");
+}
+
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t page_table_base)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f42a891..8f0aa93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -20,14 +20,27 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#undef pr_fmt
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
 #include <linux/list.h>
 #include <linux/sched/mm.h>
 #include <drm/drmP.h>
-#include "amdgpu_object.h"
-#include "amdgpu_vm.h"
+#include <linux/dma-buf.h>
+#include <linux/pagemap.h>
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_enum.h"
+#include "oss/oss_3_0_sh_mask.h"
+#include "oss/oss_3_0_d.h"
+#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_8_1_d.h"
 
 /* Special VM and GART address alignment needed for VI pre-Fiji due to
  * a HW bug.
@@ -38,13 +51,15 @@
 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
 
 /* Impose limit on how much memory KFD can use */
-static struct {
+struct kfd_mem_usage_limit {
 	uint64_t max_system_mem_limit;
 	uint64_t max_userptr_mem_limit;
 	int64_t system_mem_used;
 	int64_t userptr_mem_used;
 	spinlock_t mem_limit_lock;
-} kfd_mem_limit;
+};
+
+static struct kfd_mem_usage_limit kfd_mem_limit;
 
 /* Struct used for amdgpu_amdkfd_bo_validate */
 struct amdgpu_vm_parser {
@@ -167,8 +182,7 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
 	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
 		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
 		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
-	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT &&
-		   !bo->tbo.sg) {
+	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -=
 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
 	}
@@ -255,6 +269,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 	/* Alloc memory for count number of eviction fence pointers. Fill the
 	 * ef_list array and ef_count
 	 */
+
 	fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
 			     GFP_KERNEL);
 	if (!fence_list)
@@ -321,7 +336,6 @@ static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 				     bool wait)
 {
-	struct ttm_operation_ctx ctx = { false, false };
 	int ret;
 
 	if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
@@ -357,23 +371,6 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
 	return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
 }
 
-static u64 get_vm_pd_gpu_offset(struct amdgpu_vm *vm)
-{
-	struct amdgpu_device *adev =
-		amdgpu_ttm_adev(vm->root.base.bo->tbo.bdev);
-	u64 offset;
-	uint64_t flags = AMDGPU_PTE_VALID;
-
-	offset = amdgpu_bo_gpu_offset(vm->root.base.bo);
-
-	/* On some ASICs the FB doesn't start at 0. Adjust FB offset
-	 * to an actual MC address.
-	 */
-	adev->gmc.gmc_funcs->get_vm_pde(adev, -1, &offset, &flags);
-
-	return offset;
-}
-
 /* vm_validate_pt_pd_bos - Validate page table and directory BOs
  *
  * Page directories are not updated here because huge page handling
@@ -381,17 +378,18 @@ static u64 get_vm_pd_gpu_offset(struct amdgpu_vm *vm)
  * again. Page directories are only updated after updating page
  * tables.
  */
-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
 {
-	struct amdgpu_bo *pd = vm->root.base.bo;
+	struct amdgpu_bo *pd = vm->base.root.base.bo;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	struct amdgpu_vm_parser param;
+	uint64_t addr, flags = AMDGPU_PTE_VALID;
 	int ret;
 
 	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
 	param.wait = false;
 
-	ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
+	ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
 					&param);
 	if (ret) {
 		pr_err("amdgpu: failed to validate PT BOs\n");
@@ -404,9 +402,11 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 		return ret;
 	}
 
-	vm->pd_phys_addr = get_vm_pd_gpu_offset(vm);
+	addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
+	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
+	vm->pd_phys_addr = addr;
 
-	if (vm->use_cpu_for_update) {
+	if (vm->base.use_cpu_for_update) {
 		ret = amdgpu_bo_kmap(pd, NULL);
 		if (ret) {
 			pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
@@ -417,6 +417,23 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 	return 0;
 }
 
+static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+			 struct dma_fence *f)
+{
+	int ret = amdgpu_sync_fence(adev, sync, f, false);
+
+	/* Sync objects can't handle multiple GPUs (contexts) updating
+	 * sync->last_vm_update. Fortunately we don't need it for
+	 * KFD's purposes, so we can just drop that fence.
+	 */
+	if (sync->last_vm_update) {
+		dma_fence_put(sync->last_vm_update);
+		sync->last_vm_update = NULL;
+	}
+
+	return ret;
+}
+
 static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo *pd = vm->root.base.bo;
@@ -427,7 +444,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 	if (ret)
 		return ret;
 
-	return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
+	return sync_vm_fence(adev, sync, vm->last_update);
 }
 
 /* add_bo_to_vm - Add a BO to a VM
@@ -443,12 +460,14 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
  * 4a.  Validate new page tables and directories
  */
 static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
-		struct amdgpu_vm *vm, bool is_aql,
+		struct amdgpu_vm *avm, bool is_aql,
 		struct kfd_bo_va_list **p_bo_va_entry)
 {
 	int ret;
 	struct kfd_bo_va_list *bo_va_entry;
-	struct amdgpu_bo *pd = vm->root.base.bo;
+	struct amdkfd_vm *kvm = container_of(avm,
+					     struct amdkfd_vm, base);
+	struct amdgpu_bo *pd = avm->root.base.bo;
 	struct amdgpu_bo *bo = mem->bo;
 	uint64_t va = mem->va;
 	struct list_head *list_bo_va = &mem->bo_va_list;
@@ -467,11 +486,11 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 		return -ENOMEM;
 
 	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-			va + bo_size, vm);
+			va + bo_size, avm);
 
 	/* Add BO to VM internal data structures*/
-	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
-	if (!bo_va_entry->bo_va) {
+	bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
+	if (bo_va_entry->bo_va == NULL) {
 		ret = -EINVAL;
 		pr_err("Failed to add BO object to VM. ret == %d\n",
 				ret);
@@ -493,28 +512,28 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 	 * fence, so remove it temporarily.
 	 */
 	amdgpu_amdkfd_remove_eviction_fence(pd,
-					vm->process_info->eviction_fence,
+					kvm->process_info->eviction_fence,
 					NULL, NULL);
 
-	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
+	ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
 	if (ret) {
 		pr_err("Failed to allocate pts, err=%d\n", ret);
 		goto err_alloc_pts;
 	}
 
-	ret = vm_validate_pt_pd_bos(vm);
-	if (ret) {
+	ret = vm_validate_pt_pd_bos(kvm);
+	if (ret != 0) {
 		pr_err("validate_pt_pd_bos() failed\n");
 		goto err_alloc_pts;
 	}
 
 	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
 
 	return 0;
 
 err_alloc_pts:
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
 	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
 	list_del(&bo_va_entry->bo_list);
 err_vmadd:
@@ -568,7 +587,6 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
 	struct amdgpu_bo *bo = mem->bo;
-	struct ttm_operation_ctx ctx = { true, false };
 	int ret = 0;
 
 	mutex_lock(&process_info->lock);
@@ -633,25 +651,134 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 	return ret;
 }
 
+static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
+		uint64_t size, void *vm, struct kgd_mem **mem,
+		uint64_t *offset, u32 domain, u64 flags,
+		struct sg_table *sg, bool aql_queue,
+		bool readonly, bool execute, bool coherent, bool no_sub,
+		bool userptr)
+{
+	struct amdgpu_device *adev;
+	int ret;
+	struct amdgpu_bo *bo;
+	uint64_t user_addr = 0;
+	int byte_align;
+	u32 alloc_domain;
+	uint32_t mapping_flags;
+	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
+
+	if (aql_queue)
+		size = size >> 1;
+	if (userptr) {
+		if (!offset || !*offset)
+			return -EINVAL;
+		user_addr = *offset;
+	}
+
+	adev = get_amdgpu_device(kgd);
+	byte_align = (adev->family == AMDGPU_FAMILY_VI &&
+			adev->asic_type != CHIP_FIJI &&
+			adev->asic_type != CHIP_POLARIS10 &&
+			adev->asic_type != CHIP_POLARIS11) ?
+			VI_BO_SIZE_ALIGN : 1;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (*mem == NULL) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	INIT_LIST_HEAD(&(*mem)->bo_va_list);
+	mutex_init(&(*mem)->lock);
+	(*mem)->coherent = coherent;
+	(*mem)->no_substitute = no_sub;
+	(*mem)->aql_queue = aql_queue;
+
+	mapping_flags = AMDGPU_VM_PAGE_READABLE;
+	if (!readonly)
+		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+	if (execute)
+		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+	if (coherent)
+		mapping_flags |= AMDGPU_VM_MTYPE_UC;
+	else
+		mapping_flags |= AMDGPU_VM_MTYPE_NC;
+
+	(*mem)->mapping_flags = mapping_flags;
+
+	alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain;
+
+	amdgpu_sync_create(&(*mem)->sync);
+
+	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+	if (ret) {
+		pr_debug("Insufficient system memory\n");
+		goto err_bo_create;
+	}
+
+	pr_debug("\t create BO VA 0x%llx size 0x%llx domain %s\n",
+			va, size, domain_string(alloc_domain));
+
+	/* Allocate buffer object. Userptr objects need to start out
+	 * in the CPU domain, get moved to GTT when pinned.
+	 */
+	ret = amdgpu_bo_create(adev, size, byte_align, false,
+				alloc_domain,
+			       flags, sg, NULL, &bo);
+	if (ret != 0) {
+		pr_debug("Failed to create BO on domain %s. ret %d\n",
+				domain_string(alloc_domain), ret);
+		unreserve_system_mem_limit(adev, size, alloc_domain);
+		goto err_bo_create;
+	}
+	bo->kfd_bo = *mem;
+	(*mem)->bo = bo;
+	if (userptr)
+		bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
+
+	(*mem)->va = va;
+	(*mem)->domain = domain;
+	(*mem)->mapped_to_gpu_memory = 0;
+	(*mem)->process_info = kfd_vm->process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, userptr);
+
+	if (userptr) {
+		ret = init_user_pages(*mem, current->mm, user_addr);
+		if (ret) {
+			mutex_lock(&kfd_vm->process_info->lock);
+			list_del(&(*mem)->validate_list.head);
+			mutex_unlock(&kfd_vm->process_info->lock);
+			goto allocate_init_user_pages_failed;
+		}
+	}
+
+	if (offset)
+		*offset = amdgpu_bo_mmap_offset(bo);
+
+	return 0;
+
+allocate_init_user_pages_failed:
+	amdgpu_bo_unref(&bo);
+err_bo_create:
+	kfree(*mem);
+err:
+	return ret;
+}
+
 /* Reserving a BO and its page table BOs must happen atomically to
- * avoid deadlocks. Some operations update multiple VMs at once. Track
- * all the reservation info in a context structure. Optionally a sync
- * object can track VM updates.
+ * avoid deadlocks. When updating userptrs we need to temporarily
+ * back-off the reservation and then reacquire it. Track all the
+ * reservation info in a context structure. Buffers can be mapped to
+ * multiple VMs simultaneously (buffers being restored on multiple
+ * GPUs).
  */
 struct bo_vm_reservation_context {
-	struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
-	unsigned int n_vms;		    /* Number of VMs reserved	    */
-	struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
-	struct ww_acquire_ctx ticket;	    /* Reservation ticket	    */
-	struct list_head list, duplicates;  /* BO lists			    */
-	struct amdgpu_sync *sync;	    /* Pointer to sync object	    */
-	bool reserved;			    /* Whether BOs are reserved	    */
-};
-
-enum bo_vm_match {
-	BO_VM_NOT_MAPPED = 0,	/* Match VMs where a BO is not mapped */
-	BO_VM_MAPPED,		/* Match VMs where a BO is mapped     */
-	BO_VM_ALL,		/* Match all VMs a BO was added to    */
+	struct amdgpu_bo_list_entry kfd_bo;
+	unsigned int n_vms;
+	struct amdgpu_bo_list_entry *vm_pd;
+	struct ww_acquire_ctx ticket;
+	struct list_head list, duplicates;
+	struct amdgpu_sync *sync;
+	bool reserved;
 };
 
 /**
@@ -676,8 +803,9 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	INIT_LIST_HEAD(&ctx->list);
 	INIT_LIST_HEAD(&ctx->duplicates);
 
-	ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
-	if (!ctx->vm_pd)
+	ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry)
+			      * ctx->n_vms, GFP_KERNEL);
+	if (ctx->vm_pd == NULL)
 		return -ENOMEM;
 
 	ctx->kfd_bo.robj = bo;
@@ -693,8 +821,10 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 				     false, &ctx->duplicates);
 	if (!ret)
 		ctx->reserved = true;
-	else {
+	else
 		pr_err("Failed to reserve buffers in ttm\n");
+
+	if (ret) {
 		kfree(ctx->vm_pd);
 		ctx->vm_pd = NULL;
 	}
@@ -702,19 +832,24 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	return ret;
 }
 
+enum VA_TYPE {
+	VA_NOT_MAPPED = 0,
+	VA_MAPPED,
+	VA_DO_NOT_CARE,
+};
+
 /**
- * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
+ * reserve_bo_and_vm - reserve a BO and some VMs that the BO has been added
+ * to, conditionally based on map_type.
  * @mem: KFD BO structure.
  * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
  * is used. Otherwise, a single VM associated with the BO.
  * @map_type: the mapping status that will be used to filter the VMs.
  * @ctx: the struct that will be used in unreserve_bo_and_vms().
- *
- * Returns 0 for success, negative for failure.
  */
 static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
-				struct amdgpu_vm *vm, enum bo_vm_match map_type,
-				struct bo_vm_reservation_context *ctx)
+			      struct amdgpu_vm *vm, enum VA_TYPE map_type,
+			      struct bo_vm_reservation_context *ctx)
 {
 	struct amdgpu_bo *bo = mem->bo;
 	struct kfd_bo_va_list *entry;
@@ -732,16 +867,16 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
 		if ((vm && vm != entry->bo_va->base.vm) ||
 			(entry->is_mapped != map_type
-			&& map_type != BO_VM_ALL))
+			&& map_type != VA_DO_NOT_CARE))
 			continue;
 
 		ctx->n_vms++;
 	}
 
 	if (ctx->n_vms != 0) {
-		ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
-				     GFP_KERNEL);
-		if (!ctx->vm_pd)
+		ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry)
+			      * ctx->n_vms, GFP_KERNEL);
+		if (ctx->vm_pd == NULL)
 			return -ENOMEM;
 	}
 
@@ -756,7 +891,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
 		if ((vm && vm != entry->bo_va->base.vm) ||
 			(entry->is_mapped != map_type
-			&& map_type != BO_VM_ALL))
+			&& map_type != VA_DO_NOT_CARE))
 			continue;
 
 		amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
@@ -779,16 +914,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	return ret;
 }
 
-/**
- * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
- * @ctx: Reservation context to unreserve
- * @wait: Optionally wait for a sync object representing pending VM updates
- * @intr: Whether the wait is interruptible
- *
- * Also frees any resources allocated in
- * reserve_bo_and_(cond_)vm(s). Returns the status from
- * amdgpu_sync_wait.
- */
 static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
 				 bool wait, bool intr)
 {
@@ -815,25 +940,25 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
 	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
 	struct amdgpu_bo *pd = vm->root.base.bo;
 
-	/* Remove eviction fence from PD (and thereby from PTs too as
-	 * they share the resv. object). Otherwise during PT update
-	 * job (see amdgpu_vm_bo_update_mapping), eviction fence would
-	 * get added to job->sync object and job execution would
-	 * trigger the eviction fence.
+	/* Remove eviction fence from PD (and thereby from PTs too as they
+	 * share the resv. object. Otherwise during PT update job (see
+	 * amdgpu_vm_bo_update_mapping), eviction fence will get added to
+	 * job->sync object
 	 */
 	amdgpu_amdkfd_remove_eviction_fence(pd,
-					    vm->process_info->eviction_fence,
+					    kvm->process_info->eviction_fence,
 					    NULL, NULL);
 	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
 
 	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
 
 	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+	amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
 
-	amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+	sync_vm_fence(adev, sync, bo_va->last_pt_update);
 
 	return 0;
 }
@@ -853,12 +978,12 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
 
 	/* Update the page tables  */
 	ret = amdgpu_vm_bo_update(adev, bo_va, false);
-	if (ret) {
+	if (ret != 0) {
 		pr_err("amdgpu_vm_bo_update failed\n");
 		return ret;
 	}
 
-	return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
+	return sync_vm_fence(adev, sync, bo_va->last_pt_update);
 }
 
 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -869,9 +994,8 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 
 	/* Set virtual address for the allocation */
 	ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
-			       amdgpu_bo_size(entry->bo_va->base.bo),
-			       entry->pte_flags);
-	if (ret) {
+			amdgpu_bo_size(entry->bo_va->base.bo), entry->pte_flags);
+	if (ret != 0) {
 		pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
 				entry->va, ret);
 		return ret;
@@ -881,7 +1005,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 		return 0;
 
 	ret = update_gpuvm_pte(adev, entry, sync);
-	if (ret) {
+	if (ret != 0) {
 		pr_err("update_gpuvm_pte() failed\n");
 		goto update_gpuvm_pte_failed;
 	}
@@ -911,424 +1035,116 @@ static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
 	return sg;
 }
 
-static int process_validate_vms(struct amdkfd_process_info *process_info)
-{
-	struct amdgpu_vm *peer_vm;
-	int ret;
-
-	list_for_each_entry(peer_vm, &process_info->vm_list_head,
-			    vm_list_node) {
-		ret = vm_validate_pt_pd_bos(peer_vm);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
-				 struct amdgpu_sync *sync)
-{
-	struct amdgpu_vm *peer_vm;
-	int ret;
-
-	list_for_each_entry(peer_vm, &process_info->vm_list_head,
-			    vm_list_node) {
-		struct amdgpu_bo *pd = peer_vm->root.base.bo;
-
-		ret = amdgpu_sync_resv(NULL,
-					sync, pd->tbo.resv,
-					AMDGPU_FENCE_OWNER_UNDEFINED, false);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int process_update_pds(struct amdkfd_process_info *process_info,
-			      struct amdgpu_sync *sync)
-{
-	struct amdgpu_vm *peer_vm;
-	int ret;
-
-	list_for_each_entry(peer_vm, &process_info->vm_list_head,
-			    vm_list_node) {
-		ret = vm_update_pds(peer_vm, sync);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
-		       struct dma_fence **ef)
-{
-	struct amdkfd_process_info *info = NULL;
-	int ret;
-
-	if (!*process_info) {
-		info = kzalloc(sizeof(*info), GFP_KERNEL);
-		if (!info)
-			return -ENOMEM;
-
-		mutex_init(&info->lock);
-		INIT_LIST_HEAD(&info->vm_list_head);
-		INIT_LIST_HEAD(&info->kfd_bo_list);
-		INIT_LIST_HEAD(&info->userptr_valid_list);
-		INIT_LIST_HEAD(&info->userptr_inval_list);
-
-		info->eviction_fence =
-			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
-						   current->mm);
-		if (!info->eviction_fence) {
-			pr_err("Failed to create eviction fence\n");
-			ret = -ENOMEM;
-			goto create_evict_fence_fail;
-		}
-
-		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
-		atomic_set(&info->evicted_bos, 0);
-		INIT_DELAYED_WORK(&info->work,
-				  amdgpu_amdkfd_restore_userptr_worker);
-
-		*process_info = info;
-		*ef = dma_fence_get(&info->eviction_fence->base);
-	}
-
-	vm->process_info = *process_info;
-
-	/* Validate page directory and attach eviction fence */
-	ret = amdgpu_bo_reserve(vm->root.base.bo, true);
-	if (ret)
-		goto reserve_pd_fail;
-	ret = vm_validate_pt_pd_bos(vm);
-	if (ret) {
-		pr_err("validate_pt_pd_bos() failed\n");
-		goto validate_pd_fail;
-	}
-	amdgpu_bo_fence(vm->root.base.bo,
-			&vm->process_info->eviction_fence->base, true);
-	amdgpu_bo_unreserve(vm->root.base.bo);
-
-	/* Update process info */
-	mutex_lock(&vm->process_info->lock);
-	list_add_tail(&vm->vm_list_node,
-			&(vm->process_info->vm_list_head));
-	vm->process_info->n_vms++;
-	mutex_unlock(&vm->process_info->lock);
-
-	return 0;
-
-validate_pd_fail:
-	amdgpu_bo_unreserve(vm->root.base.bo);
-reserve_pd_fail:
-	vm->process_info = NULL;
-	if (info) {
-		/* Two fence references: one in info and one in *ef */
-		dma_fence_put(&info->eviction_fence->base);
-		dma_fence_put(*ef);
-		*ef = NULL;
-		*process_info = NULL;
-create_evict_fence_fail:
-		kfree(info);
-	}
-	return ret;
-}
-
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-					  void **process_info,
-					  struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_sync_memory(
+		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdgpu_vm *new_vm;
-	int ret;
-
-	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
-	if (!new_vm)
-		return -ENOMEM;
-
-	/* Initialize AMDGPU part of the VM */
-	ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
-	if (ret) {
-		pr_err("Failed init vm ret %d\n", ret);
-		goto amdgpu_vm_init_fail;
-	}
-
-	/* Initialize KFD part of the VM and process info */
-	ret = init_kfd_vm(new_vm, process_info, ef);
-	if (ret)
-		goto init_kfd_vm_fail;
+	int ret = 0;
+	struct amdgpu_sync sync;
+	struct amdgpu_device *adev;
 
-	*vm = (void *) new_vm;
+	adev = get_amdgpu_device(kgd);
+	amdgpu_sync_create(&sync);
 
-	return 0;
+	mutex_lock(&mem->lock);
+	amdgpu_sync_clone(adev, &mem->sync, &sync);
+	mutex_unlock(&mem->lock);
 
-init_kfd_vm_fail:
-	amdgpu_vm_fini(adev, new_vm);
-amdgpu_vm_init_fail:
-	kfree(new_vm);
+	ret = amdgpu_sync_wait(&sync, intr);
+	amdgpu_sync_free(&sync);
 	return ret;
 }
 
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					   struct file *filp,
-					   void **vm, void **process_info,
-					   struct dma_fence **ef)
-{
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct drm_file *drm_priv = filp->private_data;
-	struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
-	struct amdgpu_vm *avm = &drv_priv->vm;
-	int ret;
-
-	/* Convert VM into a compute VM */
-	ret = amdgpu_vm_make_compute(adev, avm);
-	if (ret)
-		return ret;
-
-	/* Initialize KFD part of the VM and process info */
-	ret = init_kfd_vm(avm, process_info, ef);
-	if (ret)
-		return ret;
-
-	*vm = (void *)avm;
-
-	return 0;
-}
-
-void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
-				    struct amdgpu_vm *vm)
-{
-	struct amdkfd_process_info *process_info = vm->process_info;
-	struct amdgpu_bo *pd = vm->root.base.bo;
-
-	if (vm->vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
-		return;
-
-	/* Release eviction fence from PD */
-	amdgpu_bo_reserve(pd, false);
-	amdgpu_bo_fence(pd, NULL, false);
-	amdgpu_bo_unreserve(pd);
-
-	if (!process_info)
-		return;
-
-	/* Update process info */
-	mutex_lock(&process_info->lock);
-	process_info->n_vms--;
-	list_del(&vm->vm_list_node);
-	mutex_unlock(&process_info->lock);
-
-	/* Release per-process resources when last compute VM is destroyed */
-	if (!process_info->n_vms) {
-		WARN_ON(!list_empty(&process_info->kfd_bo_list));
-		WARN_ON(!list_empty(&process_info->userptr_valid_list));
-		WARN_ON(!list_empty(&process_info->userptr_inval_list));
-
-		dma_fence_put(&process_info->eviction_fence->base);
-		cancel_delayed_work_sync(&process_info->work);
-		put_pid(process_info->pid);
-		kfree(process_info);
-	}
-}
-
-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
-{
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
-
-	if (WARN_ON(!kgd || !vm))
-		return;
-
-	pr_debug("Destroying process vm %p\n", vm);
-
-	/* Release the VM context */
-	amdgpu_vm_fini(adev, avm);
-	kfree(vm);
-}
-
-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
-{
-	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
-
-	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
-}
+#define BOOL_TO_STR(b)	(b == true) ? "true" : "false"
 
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		struct kgd_dev *kgd, uint64_t va, uint64_t size,
 		void *vm, struct kgd_mem **mem,
 		uint64_t *offset, uint32_t flags)
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
-	uint64_t user_addr = 0;
+	bool aql_queue, public, readonly, execute, coherent, no_sub, userptr;
+	u64 alloc_flag;
+	uint32_t domain;
 	struct sg_table *sg = NULL;
-	enum ttm_bo_type bo_type = ttm_bo_type_device;
-	struct amdgpu_bo *bo;
-	int byte_align;
-	u32 domain, alloc_domain;
-	u64 alloc_flags;
-	uint32_t mapping_flags;
-	int ret;
+
+	if (!(flags & ALLOC_MEM_FLAGS_NONPAGED)) {
+		pr_debug("current hw doesn't support paged memory\n");
+		return -EINVAL;
+	}
+
+	domain = 0;
+	alloc_flag = 0;
+
+	aql_queue = (flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) ? true : false;
+	public    = (flags & ALLOC_MEM_FLAGS_PUBLIC) ? true : false;
+	readonly  = (flags & ALLOC_MEM_FLAGS_READONLY) ? true : false;
+	execute   = (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS) ? true : false;
+	coherent  = (flags & ALLOC_MEM_FLAGS_COHERENT) ? true : false;
+	no_sub    = (flags & ALLOC_MEM_FLAGS_NO_SUBSTITUTE) ? true : false;
+	userptr   = (flags & ALLOC_MEM_FLAGS_USERPTR) ? true : false;
 
 	/*
 	 * Check on which domain to allocate BO
 	 */
 	if (flags & ALLOC_MEM_FLAGS_VRAM) {
-		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
-		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
-		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
-			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
-			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
-		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
-		alloc_flags = 0;
-	} else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+		domain = AMDGPU_GEM_DOMAIN_VRAM;
+		alloc_flag = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+		if (public) {
+			alloc_flag = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+		}
+		alloc_flag |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+	} else if (flags & (ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_USERPTR)) {
 		domain = AMDGPU_GEM_DOMAIN_GTT;
-		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
-		alloc_flags = 0;
-		if (!offset || !*offset)
-			return -EINVAL;
-		user_addr = *offset;
+		alloc_flag = 0;
 	} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
 		domain = AMDGPU_GEM_DOMAIN_GTT;
-		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
-		alloc_flags = 0;
+		alloc_flag = 0;
 		if (size > UINT_MAX)
 			return -EINVAL;
 		sg = create_doorbell_sg(*offset, size);
 		if (!sg)
 			return -ENOMEM;
-		bo_type = ttm_bo_type_sg;
-	} else {
-		return -EINVAL;
-	}
-
-	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem) {
-		ret = -ENOMEM;
-		goto err;
 	}
-	INIT_LIST_HEAD(&(*mem)->bo_va_list);
-	mutex_init(&(*mem)->lock);
-	(*mem)->aql_queue     = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
-
-	/* Workaround for AQL queue wraparound bug. Map the same
-	 * memory twice. That means we only actually allocate half
-	 * the memory.
-	 */
-	if ((*mem)->aql_queue)
-		size = size >> 1;
 
-	/* Workaround for TLB bug on older VI chips */
-	byte_align = (adev->family == AMDGPU_FAMILY_VI &&
-			adev->asic_type != CHIP_FIJI &&
-			adev->asic_type != CHIP_POLARIS10 &&
-			adev->asic_type != CHIP_POLARIS11) ?
-			VI_BO_SIZE_ALIGN : 1;
+	if (offset && !userptr)
+		*offset = 0;
 
-	mapping_flags = AMDGPU_VM_PAGE_READABLE;
-	if (!(flags & ALLOC_MEM_FLAGS_READONLY))
-		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
-	if (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS)
-		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
-	if (flags & ALLOC_MEM_FLAGS_COHERENT)
-		mapping_flags |= AMDGPU_VM_MTYPE_UC;
-	else
-		mapping_flags |= AMDGPU_VM_MTYPE_NC;
-	(*mem)->mapping_flags = mapping_flags;
+	pr_debug("Allocate VA 0x%llx - 0x%llx domain %s aql %s\n",
+			va, va + size, domain_string(domain),
+			BOOL_TO_STR(aql_queue));
 
-	amdgpu_sync_create(&(*mem)->sync);
-
-	if (!sg) {
-		ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
-							     alloc_domain);
-		if (ret) {
-			pr_debug("Insufficient system memory\n");
-			goto err_reserve_limit;
-		}
-	}
-
-	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
-			va, size, domain_string(alloc_domain));
-
-	/* Allocate buffer object. Userptr objects need to start out
-	 * in the CPU domain, get moved to GTT when pinned.
-	 */
-#if 0	 
-	ret = amdgpu_bo_create(adev, size, byte_align, alloc_domain,
-			       alloc_flags, bo_type, NULL, &bo);
-#else
-	ret = amdgpu_bo_create(adev, size, byte_align, false , alloc_domain,
-			       alloc_flags, sg , NULL, &bo);
-#endif				   
-	if (ret) {
-		pr_debug("Failed to create BO on domain %s. ret %d\n",
-				domain_string(alloc_domain), ret);
-		goto err_bo_create;
-	}
-	if (bo_type == ttm_bo_type_sg) {
-		bo->tbo.sg = sg;
-		bo->tbo.ttm->sg = sg;
-	}
-	bo->kfd_bo = *mem;
-	(*mem)->bo = bo;
-	if (user_addr)
-		bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
-
-	(*mem)->va = va;
-	(*mem)->domain = domain;
-	(*mem)->mapped_to_gpu_memory = 0;
-	(*mem)->process_info = avm->process_info;
-	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
-
-	if (user_addr) {
-		ret = init_user_pages(*mem, current->mm, user_addr);
-		if (ret) {
-			mutex_lock(&avm->process_info->lock);
-			list_del(&(*mem)->validate_list.head);
-			mutex_unlock(&avm->process_info->lock);
-			goto allocate_init_user_pages_failed;
-		}
-	}
-
-	if (offset)
-		*offset = amdgpu_bo_mmap_offset(bo);
-
-	return 0;
-
-allocate_init_user_pages_failed:
-	amdgpu_bo_unref(&bo);
-err_bo_create:
-	if (!sg)
-		unreserve_system_mem_limit(adev, size, alloc_domain);
-err_reserve_limit:
-	kfree(*mem);
-err:
-	if (sg) {
-		sg_free_table(sg);
-		kfree(sg);
-	}
-	return ret;
+	pr_debug("\t alloc_flag 0x%llx public %s readonly %s execute %s coherent %s no_sub %s\n",
+			alloc_flag, BOOL_TO_STR(public),
+			BOOL_TO_STR(readonly), BOOL_TO_STR(execute),
+			BOOL_TO_STR(coherent), BOOL_TO_STR(no_sub));
+
+	return __alloc_memory_of_gpu(kgd, va, size, vm, mem,
+			offset, domain,
+			alloc_flag, sg,
+			aql_queue, readonly, execute,
+			coherent, no_sub, userptr);
 }
 
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
-		struct kgd_dev *kgd, struct kgd_mem *mem)
+		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
 {
-	struct amdkfd_process_info *process_info = mem->process_info;
-	unsigned long bo_size = mem->bo->tbo.mem.size;
+	struct amdgpu_device *adev;
 	struct kfd_bo_va_list *entry, *tmp;
 	struct bo_vm_reservation_context ctx;
+	int ret = 0;
 	struct ttm_validate_buffer *bo_list_entry;
-	int ret;
+	struct amdkfd_process_info *process_info;
+	unsigned long bo_size;
+
+	adev = get_amdgpu_device(kgd);
+	process_info = ((struct amdkfd_vm *)vm)->process_info;
+
+	bo_size = mem->bo->tbo.mem.size;
 
 	mutex_lock(&mem->lock);
 
 	if (mem->mapped_to_gpu_memory > 0) {
-		pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
-				mem->va, bo_size);
+		pr_debug("BO VA 0x%llx size 0x%lx is already mapped to vm %p.\n",
+				mem->va, bo_size, vm);
 		mutex_unlock(&mem->lock);
 		return -EBUSY;
 	}
@@ -1356,8 +1172,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		kvfree(mem->user_pages);
 	}
 
-	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
-	if (unlikely(ret))
+	ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx);
+	if (unlikely(ret != 0))
 		return ret;
 
 	/* The eviction fence should be removed by the last unmap.
@@ -1371,9 +1187,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 		mem->va + bo_size * (1 + mem->aql_queue));
 
 	/* Remove from VM internal data structures */
-	list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
+	list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) {
 		remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
 				entry, bo_size);
+	}
 
 	ret = unreserve_bo_and_vms(&ctx, false, false);
 
@@ -1398,8 +1215,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	struct amdgpu_device *adev;
 	int ret;
 	struct amdgpu_bo *bo;
 	uint32_t domain;
@@ -1407,14 +1223,11 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	struct bo_vm_reservation_context ctx;
 	struct kfd_bo_va_list *bo_va_entry = NULL;
 	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
+	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
 	unsigned long bo_size;
-	bool is_invalid_userptr = false;
+	bool is_invalid_userptr;
 
-	bo = mem->bo;
-	if (!bo) {
-		pr_err("Invalid BO when mapping memory to GPU\n");
-		return -EINVAL;
-	}
+	adev = get_amdgpu_device(kgd);
 
 	/* Make sure restore is not running concurrently. Since we
 	 * don't map invalid userptr BOs, we rely on the next restore
@@ -1426,14 +1239,20 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	 * sure that the MMU notifier is no longer running
 	 * concurrently and the queues are actually stopped
 	 */
-	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-		down_write(&current->mm->mmap_sem);
-		is_invalid_userptr = atomic_read(&mem->invalid);
-		up_write(&current->mm->mmap_sem);
-	}
+	down_read(&current->mm->mmap_sem);
+	is_invalid_userptr = atomic_read(&mem->invalid);
+	up_read(&current->mm->mmap_sem);
 
 	mutex_lock(&mem->lock);
 
+	bo = mem->bo;
+
+	if (!bo) {
+		pr_err("Invalid BO when mapping memory to GPU\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
 	domain = mem->domain;
 	bo_size = bo->tbo.mem.size;
 
@@ -1443,7 +1262,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			vm, domain_string(domain));
 
 	ret = reserve_bo_and_vm(mem, vm, &ctx);
-	if (unlikely(ret))
+	if (unlikely(ret != 0))
 		goto out;
 
 	/* Userptr can be marked as "not invalid", but not actually be
@@ -1454,20 +1273,20 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
 		is_invalid_userptr = true;
 
-	if (check_if_add_bo_to_vm(avm, mem)) {
-		ret = add_bo_to_vm(adev, mem, avm, false,
+	if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) {
+		ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false,
 				&bo_va_entry);
-		if (ret)
+		if (ret != 0)
 			goto add_bo_to_vm_failed;
 		if (mem->aql_queue) {
-			ret = add_bo_to_vm(adev, mem, avm,
+			ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm,
 					true, &bo_va_entry_aql);
-			if (ret)
+			if (ret != 0)
 				goto add_bo_to_vm_failed_aql;
 		}
 	} else {
-		ret = vm_validate_pt_pd_bos(avm);
-		if (unlikely(ret))
+		ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+		if (unlikely(ret != 0))
 			goto add_bo_to_vm_failed;
 	}
 
@@ -1492,7 +1311,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 
 			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
 					      is_invalid_userptr);
-			if (ret) {
+			if (ret != 0) {
 				pr_err("Failed to map radeon bo to gpuvm\n");
 				goto map_bo_to_gpuvm_failed;
 			}
@@ -1510,9 +1329,15 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		}
 	}
 
-	if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
+	if (mem->domain & AMDGPU_GEM_DOMAIN_DGMA) {
+		ret = amdgpu_bo_pin(bo, mem->domain, NULL);
+		if (ret != 0) {
+			pr_err("Unable to pin DGMA BO\n");
+			goto map_bo_to_gpuvm_failed;
+		}
+	} else if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
 		amdgpu_bo_fence(bo,
-				&avm->process_info->eviction_fence->base,
+				&kfd_vm->process_info->eviction_fence->base,
 				true);
 	ret = unreserve_bo_and_vms(&ctx, false, false);
 
@@ -1533,30 +1358,200 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	return ret;
 }
 
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+					  void **process_info,
+					  struct dma_fence **ef)
+{
+	int ret;
+	struct amdkfd_vm *new_vm;
+	struct amdkfd_process_info *info;
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
+	if (new_vm == NULL)
+		return -ENOMEM;
+
+	/* Initialize the VM context, allocate the page directory and zero it */
+	ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+	if (ret != 0) {
+		pr_err("Failed init vm ret %d\n", ret);
+		/* Undo everything related to the new VM context */
+		goto vm_init_fail;
+	}
+	new_vm->adev = adev;
+
+	if (!*process_info) {
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
+		if (!info) {
+			pr_err("Failed to create amdkfd_process_info");
+			ret = -ENOMEM;
+			goto alloc_process_info_fail;
+		}
+
+		mutex_init(&info->lock);
+		INIT_LIST_HEAD(&info->vm_list_head);
+		INIT_LIST_HEAD(&info->kfd_bo_list);
+		INIT_LIST_HEAD(&info->userptr_valid_list);
+		INIT_LIST_HEAD(&info->userptr_inval_list);
+
+		info->eviction_fence =
+			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+						   current->mm);
+		if (info->eviction_fence == NULL) {
+			pr_err("Failed to create eviction fence\n");
+			goto create_evict_fence_fail;
+		}
+
+		info->pid = get_task_pid(current->group_leader,
+					 PIDTYPE_PID);
+		atomic_set(&info->evicted_bos, 0);
+		INIT_DELAYED_WORK(&info->work,
+				  amdgpu_amdkfd_restore_userptr_worker);
+
+		*process_info = info;
+		*ef = dma_fence_get(&info->eviction_fence->base);
+	}
+
+	new_vm->process_info = *process_info;
+
+	mutex_lock(&new_vm->process_info->lock);
+	list_add_tail(&new_vm->vm_list_node,
+			&(new_vm->process_info->vm_list_head));
+	new_vm->process_info->n_vms++;
+	mutex_unlock(&new_vm->process_info->lock);
+
+	*vm = (void *) new_vm;
+
+	pr_debug("Created process vm %p\n", *vm);
+
+	return ret;
+
+create_evict_fence_fail:
+	kfree(info);
+alloc_process_info_fail:
+	amdgpu_vm_fini(adev, &new_vm->base);
+vm_init_fail:
+	kfree(new_vm);
+	return ret;
+
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm;
+	struct amdgpu_vm *avm = &kfd_vm->base;
+	struct amdgpu_bo *pd;
+	struct amdkfd_process_info *process_info;
+
+	if (WARN_ON(!kgd || !vm))
+		return;
+
+	pr_debug("Destroying process vm %p\n", vm);
+	/* Release eviction fence from PD */
+	pd = avm->root.base.bo;
+	amdgpu_bo_reserve(pd, false);
+	amdgpu_bo_fence(pd, NULL, false);
+	amdgpu_bo_unreserve(pd);
+
+	process_info = kfd_vm->process_info;
+
+	mutex_lock(&process_info->lock);
+	process_info->n_vms--;
+	list_del(&kfd_vm->vm_list_node);
+	mutex_unlock(&process_info->lock);
+
+	/* Release per-process resources */
+	if (!process_info->n_vms) {
+		WARN_ON(!list_empty(&process_info->kfd_bo_list));
+		WARN_ON(!list_empty(&process_info->userptr_valid_list));
+		WARN_ON(!list_empty(&process_info->userptr_inval_list));
+
+		dma_fence_put(&process_info->eviction_fence->base);
+		cancel_delayed_work_sync(&process_info->work);
+		put_pid(process_info->pid);
+		kfree(process_info);
+	}
+
+	/* Release the VM context */
+	amdgpu_vm_fini(adev, avm);
+	kfree(vm);
+}
+
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+{
+	struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
+
+	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+}
+
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+					      struct kfd_vm_fault_info *mem)
+{
+	struct amdgpu_device *adev;
+
+	adev = (struct amdgpu_device *) kgd;
+	if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+		*mem = *adev->gmc.vm_fault_info;
+		mb();
+		atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+	}
+	return 0;
+}
+
+static bool is_mem_on_local_device(struct kgd_dev *kgd,
+		struct list_head *bo_va_list, void *vm)
+{
+	struct kfd_bo_va_list *entry;
+
+	list_for_each_entry(entry, bo_va_list, bo_list) {
+		if (entry->kgd_dev == kgd && entry->bo_va->base.vm == vm)
+			return true;
+	}
+
+	return false;
+}
+
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	struct amdkfd_process_info *process_info =
-		((struct amdgpu_vm *)vm)->process_info;
-	unsigned long bo_size = mem->bo->tbo.mem.size;
 	struct kfd_bo_va_list *entry;
+	struct amdgpu_device *adev;
+	unsigned int mapped_before;
+	int ret = 0;
 	struct bo_vm_reservation_context ctx;
-	int ret;
+	struct amdkfd_process_info *process_info;
+	unsigned long bo_size;
+
+	adev = (struct amdgpu_device *) kgd;
+	process_info = ((struct amdkfd_vm *)vm)->process_info;
+
+	bo_size = mem->bo->tbo.mem.size;
 
 	mutex_lock(&mem->lock);
 
-	ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
-	if (unlikely(ret))
+	/*
+	 * Make sure that this BO mapped on KGD before unmappping it
+	 */
+	if (!is_mem_on_local_device(kgd, &mem->bo_va_list, vm)) {
+		ret = -EINVAL;
 		goto out;
-	/* If no VMs were reserved, it means the BO wasn't actually mapped */
-	if (ctx.n_vms == 0) {
+	}
+
+	if (mem->mapped_to_gpu_memory == 0) {
+		pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n",
+				mem->va, bo_size, vm);
 		ret = -EINVAL;
-		goto unreserve_out;
+		goto out;
 	}
+	mapped_before = mem->mapped_to_gpu_memory;
 
-	ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
-	if (unlikely(ret))
+	ret = reserve_bo_and_cond_vms(mem, vm, VA_MAPPED, &ctx);
+	if (unlikely(ret != 0))
+		goto out;
+
+	ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm);
+	if (unlikely(ret != 0))
 		goto unreserve_out;
 
 	pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
@@ -1589,11 +1584,20 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 	/* If BO is unmapped from all VMs, unfence it. It can be evicted if
 	 * required.
 	 */
-	if (mem->mapped_to_gpu_memory == 0 &&
-	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
-		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+	if (mem->mapped_to_gpu_memory == 0) {
+		if (mem->domain & AMDGPU_GEM_DOMAIN_DGMA)
+			amdgpu_bo_unpin(mem->bo);
+		else if (!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
+			amdgpu_amdkfd_remove_eviction_fence(mem->bo,
 						process_info->eviction_fence,
-						    NULL, NULL);
+						NULL, NULL);
+	}
+
+	if (mapped_before == mem->mapped_to_gpu_memory) {
+		pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n",
+			mem->va, bo_size, vm);
+		ret = -EINVAL;
+	}
 
 unreserve_out:
 	unreserve_bo_and_vms(&ctx, false, false);
@@ -1602,28 +1606,8 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 	return ret;
 }
 
-int amdgpu_amdkfd_gpuvm_sync_memory(
-		struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
-{
-	struct amdgpu_sync sync;
-	int ret;
-	struct amdgpu_device *adev;
-
-	adev = get_amdgpu_device(kgd);	
-
-	amdgpu_sync_create(&sync);
-
-	mutex_lock(&mem->lock);
-	amdgpu_sync_clone(adev , &mem->sync, &sync);
-	mutex_unlock(&mem->lock);
-
-	ret = amdgpu_sync_wait(&sync, intr);
-	amdgpu_sync_free(&sync);
-	return ret;
-}
-
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
-		struct kgd_mem *mem, void **kptr, uint64_t *size)
+		struct kgd_mem *mem, void **kptr)
 {
 	int ret;
 	struct amdgpu_bo *bo = mem->bo;
@@ -1660,11 +1644,10 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 		bo, mem->process_info->eviction_fence, NULL, NULL);
 	list_del_init(&mem->validate_list.head);
 
-	if (size)
-		*size = amdgpu_bo_size(bo);
-
 	amdgpu_bo_unreserve(bo);
 
+	mem->kptr = *kptr;
+
 	mutex_unlock(&mem->process_info->lock);
 	return 0;
 
@@ -1678,27 +1661,13 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 	return ret;
 }
 
-int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
-					      struct kfd_vm_fault_info *mem)
-{
-	struct amdgpu_device *adev;
-
-	adev = (struct amdgpu_device *) kgd;
-	if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
-		*mem = *adev->gmc.vm_fault_info;
-		mb();
-		atomic_set(&adev->gmc.vm_fault_info_updated, 0);
-	}
-	return 0;
-}
-
 static int pin_bo_wo_map(struct kgd_mem *mem)
 {
 	struct amdgpu_bo *bo = mem->bo;
 	int ret = 0;
 
 	ret = amdgpu_bo_reserve(bo, false);
-	if (unlikely(ret))
+	if (unlikely(ret != 0))
 		return ret;
 
 	ret = amdgpu_bo_pin(bo, mem->domain, NULL);
@@ -1713,7 +1682,7 @@ static void unpin_bo_wo_map(struct kgd_mem *mem)
 	int ret = 0;
 
 	ret = amdgpu_bo_reserve(bo, false);
-	if (unlikely(ret))
+	if (unlikely(ret != 0))
 		return;
 
 	amdgpu_bo_unpin(bo);
@@ -1758,8 +1727,7 @@ static int get_sg_table(struct amdgpu_device *adev,
 		goto out;
 
 	if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) {
-		bus_addr = amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start
-			   + adev->gmc.aper_base + offset;
+		bus_addr = bo->tbo.offset + adev->gmc.aper_base + offset;
 
 		for_each_sg(sg->sgl, s, sg->orig_nents, i) {
 			uint64_t chunk_size, length;
@@ -1814,7 +1782,7 @@ int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd,
 	struct amdgpu_device *adev;
 
 	ret = pin_bo_wo_map(mem);
-	if (unlikely(ret))
+	if (unlikely(ret != 0))
 		return ret;
 
 	adev = get_amdgpu_device(kgd);
@@ -1844,7 +1812,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct drm_gem_object *obj;
 	struct amdgpu_bo *bo;
-	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm;
 
 	if (dma_buf->ops != &drm_gem_prime_dmabuf_ops)
 		/* Can't handle non-graphics buffers */
@@ -1857,12 +1825,13 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 
 	bo = gem_to_amdgpu_bo(obj);
 	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
-				    AMDGPU_GEM_DOMAIN_GTT)))
+				    AMDGPU_GEM_DOMAIN_GTT |
+				    AMDGPU_GEM_DOMAIN_DGMA)))
 		/* Only VRAM and GTT BOs are supported */
 		return -EINVAL;
 
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem)
+	if (*mem == NULL)
 		return -ENOMEM;
 
 	if (size)
@@ -1879,11 +1848,15 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 
 	(*mem)->bo = amdgpu_bo_ref(bo);
 	(*mem)->va = va;
-	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+	if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+		(*mem)->domain = AMDGPU_GEM_DOMAIN_VRAM;
+	else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT)
+		(*mem)->domain = AMDGPU_GEM_DOMAIN_GTT;
+	else
+		(*mem)->domain = AMDGPU_GEM_DOMAIN_DGMA;
 	(*mem)->mapped_to_gpu_memory = 0;
-	(*mem)->process_info = avm->process_info;
-	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
+	(*mem)->process_info = kfd_vm->process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, false);
 	amdgpu_sync_create(&(*mem)->sync);
 
 	return 0;
@@ -1913,6 +1886,37 @@ int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm,
 	return 0;
 }
 
+static int process_validate_vms(struct amdkfd_process_info *process_info)
+{
+	struct amdkfd_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_validate_pt_pd_bos(peer_vm);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int process_update_pds(struct amdkfd_process_info *process_info,
+			      struct amdgpu_sync *sync)
+{
+	struct amdkfd_vm *peer_vm;
+	int ret;
+
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		ret = vm_update_pds(&peer_vm->base, sync);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* Evict a userptr BO by stopping the queues if necessary
  *
  * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
@@ -1936,7 +1940,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
 	if (evicted_bos == 1) {
 		/* First eviction, stop the queues */
 		r = kgd2kfd->quiesce_mm(NULL, mm);
-		if (r)
+		if (r != 0)
 			pr_err("Failed to quiesce KFD\n");
 		schedule_delayed_work(&process_info->work, 1);
 	}
@@ -1955,7 +1959,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 {
 	struct kgd_mem *mem, *tmp_mem;
 	struct amdgpu_bo *bo;
-	struct ttm_operation_ctx ctx = { false, false };
 	int invalid, ret;
 
 	/* Move all invalidated BOs to the userptr_inval_list and
@@ -2002,8 +2005,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 		if (!mem->user_pages) {
 			mem->user_pages =
 				kvmalloc_array(bo->tbo.ttm->num_pages,
-						 sizeof(struct page *),
-						 GFP_KERNEL | __GFP_ZERO);
+					   sizeof(struct page *),
+					   GFP_KERNEL | __GFP_ZERO);
 			if (!mem->user_pages) {
 				pr_err("%s: Failed to allocate pages array\n",
 				       __func__);
@@ -2034,7 +2037,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
 			return -EAGAIN;
 	}
-
 	return 0;
 }
 
@@ -2051,10 +2053,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_sync sync;
 
-	struct amdgpu_vm *peer_vm;
+	struct amdkfd_vm *peer_vm;
 	struct kgd_mem *mem, *tmp_mem;
 	struct amdgpu_bo *bo;
-	struct ttm_operation_ctx ctx = { false, false };
 	int i, ret;
 
 	pd_bo_list_entries = kcalloc(process_info->n_vms,
@@ -2072,7 +2073,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	i = 0;
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node)
-		amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+		amdgpu_vm_get_pd_bo(&peer_vm->base, &resv_list,
 				    &pd_bo_list_entries[i++]);
 	/* Add the userptr_inval_list entries to resv_list */
 	list_for_each_entry(mem, &process_info->userptr_inval_list,
@@ -2096,7 +2097,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	 */
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node)
-		amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+		amdgpu_amdkfd_remove_eviction_fence(peer_vm->base.root.base.bo,
 						process_info->eviction_fence,
 						NULL, NULL);
 
@@ -2162,7 +2163,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 unreserve_out:
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node)
-		amdgpu_bo_fence(peer_vm->root.base.bo,
+		amdgpu_bo_fence(peer_vm->base.root.base.bo,
 				&process_info->eviction_fence->base, true);
 	ttm_eu_backoff_reservation(&ticket, &resv_list);
 	amdgpu_sync_wait(&sync, false);
@@ -2265,7 +2266,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 {
 	struct amdgpu_bo_list_entry *pd_bo_list;
 	struct amdkfd_process_info *process_info = info;
-	struct amdgpu_vm *peer_vm;
+	struct amdkfd_vm *peer_vm;
 	struct kgd_mem *mem;
 	struct bo_vm_reservation_context ctx;
 	struct amdgpu_amdkfd_fence *new_fence;
@@ -2280,14 +2281,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	pd_bo_list = kcalloc(process_info->n_vms,
 			     sizeof(struct amdgpu_bo_list_entry),
 			     GFP_KERNEL);
-	if (!pd_bo_list)
+	if (pd_bo_list == NULL)
 		return -ENOMEM;
 
 	i = 0;
 	mutex_lock(&process_info->lock);
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			vm_list_node)
-		amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
+		amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
+				    &pd_bo_list[i++]);
 
 	/* Reserve all BOs and page tables/directory. Add all BOs from
 	 * kfd_bo_list to ctx.list
@@ -2308,16 +2310,20 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	}
 
 	amdgpu_sync_create(&sync_obj);
+	ctx.sync = &sync_obj;
 
 	/* Validate PDs and PTs */
 	ret = process_validate_vms(process_info);
 	if (ret)
 		goto validate_map_fail;
 
-	ret = process_sync_pds_resv(process_info, &sync_obj);
-	if (ret) {
-		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
-		goto validate_map_fail;
+	/* Wait for PD/PTs validate to finish */
+	/* FIXME: I think this isn't needed */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node) {
+		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
+
+		ttm_bo_wait(&bo->tbo, false, false);
 	}
 
 	/* Validate BOs and map them to GPUVM (update VM page tables). */
@@ -2333,17 +2339,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
 			goto validate_map_fail;
 		}
-		ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
-		if (ret) {
-			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
-			goto validate_map_fail;
-		}
+
 		list_for_each_entry(bo_va_entry, &mem->bo_va_list,
 				    bo_list) {
 			ret = update_gpuvm_pte((struct amdgpu_device *)
 					      bo_va_entry->kgd_dev,
 					      bo_va_entry,
-					      &sync_obj);
+					      ctx.sync);
 			if (ret) {
 				pr_debug("Memory eviction: update PTE failed. Try again\n");
 				goto validate_map_fail;
@@ -2352,14 +2354,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	}
 
 	/* Update page directories */
-	ret = process_update_pds(process_info, &sync_obj);
+	ret = process_update_pds(process_info, ctx.sync);
 	if (ret) {
 		pr_debug("Memory eviction: update PDs failed. Try again\n");
 		goto validate_map_fail;
 	}
 
-	/* Wait for validate and PT updates to finish */
-	amdgpu_sync_wait(&sync_obj, false);
+	amdgpu_sync_wait(ctx.sync, false);
 
 	/* Release old eviction fence and create new one, because fence only
 	 * goes from unsignaled to signaled, fence cannot be reused.
@@ -2377,7 +2378,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	process_info->eviction_fence = new_fence;
 	*ef = dma_fence_get(&new_fence->base);
 
-	/* Attach new eviction fence to all BOs */
+	/* Wait for validate to finish and attach new eviction fence */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+		validate_list.head)
+		ttm_bo_wait(&mem->bo->tbo, false, false);
 	list_for_each_entry(mem, &process_info->kfd_bo_list,
 		validate_list.head)
 		amdgpu_bo_fence(mem->bo,
@@ -2386,7 +2390,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	/* Attach eviction fence to PD / PT BOs */
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node) {
-		struct amdgpu_bo *bo = peer_vm->root.base.bo;
+		struct amdgpu_bo *bo = peer_vm->base.root.base.bo;
 
 		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 7ac07a3..6414b50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -132,7 +132,6 @@ int amdgpu_job_hang_limit = 0;
 int amdgpu_lbpw = -1;
 int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
-int amdgpu_emu_mode = 0;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -291,9 +290,6 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
 MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto");
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
 
-MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
-module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
-
 #ifdef CONFIG_DRM_AMDGPU_SI
 
 int amdgpu_si_support = 1;
@@ -573,7 +569,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 {
 	struct drm_device *dev;
 	unsigned long flags = ent->driver_data;
-	int ret, retry = 0;
+	int ret;
 	bool supports_atomic = false;
 
 	if (!amdgpu_virtual_display &&
@@ -618,14 +614,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, dev);
 
-retry_init:
 	ret = drm_dev_register(dev, ent->driver_data);
-	if (ret == -EAGAIN && ++retry <= 3) {
-		DRM_INFO("retry init %d\n", retry);
-		/* Don't request EX mode too frequently which is attacking */
-		msleep(5000);
-		goto retry_init;
-	} else if (ret)
+	if (ret)
 		goto err_pci;
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ef9a24d..00477a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -32,7 +32,6 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
-#include "amdgpu_amdkfd.h"
 
 /*
  * GPUVM
@@ -2336,22 +2335,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
 		 adev->vm_manager.fragment_size);
 }
 
-static void amdgpu_inc_compute_vms(struct amdgpu_device *adev)
-{
-	/* Temporary use only the first VM manager */
-	unsigned int vmhub = 0; /*ring->funcs->vmhub;*/
-	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-
-	mutex_lock(&id_mgr->lock);
-	if ((adev->vm_manager.n_compute_vms++ == 0) &&
-	    (!amdgpu_sriov_vf(adev))) {
-		/* First Compute VM: enable compute power profile */
-		if (adev->powerplay.pp_funcs->switch_power_profile)
-			amdgpu_dpm_switch_power_profile(adev,PP_SMC_POWER_PROFILE_COMPUTE);
-	}
-	mutex_unlock(&id_mgr->lock);
-}
-
 /**
  * amdgpu_vm_init - initialize a vm instance
  *
@@ -2456,8 +2439,21 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	vm->fault_credit = 16;
 
 	vm->vm_context = vm_context;
-	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
-		amdgpu_inc_compute_vms(adev);
+	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
+		struct amdgpu_vmid_mgr *id_mgr =
+				&adev->vm_manager.id_mgr[AMDGPU_GFXHUB];
+
+		mutex_lock(&id_mgr->lock);
+
+		if ((adev->vm_manager.n_compute_vms++ == 0) &&
+			(!amdgpu_sriov_vf(adev))) {
+			/* First Compute VM: enable compute power profile */
+			if (adev->powerplay.pp_funcs->switch_power_profile)
+				amdgpu_dpm_switch_power_profile(adev,
+						AMD_PP_COMPUTE_PROFILE);
+		}
+		mutex_unlock(&id_mgr->lock);
+	}
 
 	return 0;
 
@@ -2476,86 +2472,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 }
 
 /**
- * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
- *
- * This only works on GFX VMs that don't have any BOs added and no
- * page tables allocated yet.
- *
- * Changes the following VM parameters:
- * - vm_context
- * - use_cpu_for_update
- * - pte_supports_ats
- * - pasid (old PASID is released, because compute manages its own PASIDs)
- *
- * Reinitializes the page directory to reflect the changed ATS
- * setting. May also switch to the compute power profile if this is
- * the first compute VM. May leave behind an unused shadow BO for the
- * page directory when switching from SDMA updates to CPU updates.
- *
- * Returns 0 for success, -errno for errors.
- */
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
-{
-	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
-	int r;
-
-	r = amdgpu_bo_reserve(vm->root.base.bo, true);
-	if (r)
-		return r;
-
-	/* Sanity checks */
-	if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
-		/* Can happen if ioctl is interrupted by a signal after
-		 * this function already completed. Just return success.
-		 */
-		r = 0;
-		goto error;
-	}
-	if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
-		r = -EINVAL;
-		goto error;
-	}
-
-	/* Check if PD needs to be reinitialized and do it before
-	 * changing any other state, in case it fails.
-	 */
-	if (pte_support_ats != vm->pte_support_ats) {
-		r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
-			       adev->vm_manager.root_level,
-			       pte_support_ats);
-		if (r)
-			goto error;
-	}
-
-	/* Update VM state */
-	vm->vm_context = AMDGPU_VM_CONTEXT_COMPUTE;
-	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
-				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
-	vm->pte_support_ats = pte_support_ats;
-	DRM_DEBUG_DRIVER("VM update mode is %s\n",
-			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
-		  "CPU update of VM recommended only for large BAR system\n");
-
-	if (vm->pasid) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
-		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-
-		vm->pasid = 0;
-	}
-
-	/* Count the new compute VM */
-	amdgpu_inc_compute_vms(adev);
-
-error:
-	amdgpu_bo_unreserve(vm->root.base.bo);
-	return r;
-}
-
-/**
  * amdgpu_vm_free_levels - free PD/PT levels
  *
  * @adev: amdgpu device structure
@@ -2616,7 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
 		struct amdgpu_vmid_mgr *id_mgr =
-			&adev->vm_manager.id_mgr[AMDGPU_GFXHUB];
+				&adev->vm_manager.id_mgr[AMDGPU_GFXHUB];
+
 		mutex_lock(&id_mgr->lock);
 
 		WARN(adev->vm_manager.n_compute_vms == 0, "Unbalanced number of Compute VMs");
@@ -2737,9 +2654,9 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	adev->vm_manager.vm_update_mode = 0;
 #endif
 
+	adev->vm_manager.n_compute_vms = 0;
 	idr_init(&adev->vm_manager.pasid_idr);
 	spin_lock_init(&adev->vm_manager.pasid_lock);
-	adev->vm_manager.n_compute_vms = 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index beba1a5..beee443 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -199,6 +199,9 @@ struct amdgpu_vm {
 	/* dedicated to vm */
 	struct amdgpu_vmid	*reserved_vmid[AMDGPU_MAX_VMHUBS];
 
+	/* Whether this is a Compute or GFX Context */
+	int			vm_context;
+
 	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
 	bool                    use_cpu_for_update;
 
@@ -210,18 +213,6 @@ struct amdgpu_vm {
 
 	/* Limit non-retry fault storms */
 	unsigned int		fault_credit;
-
-	/* Whether this is a Compute or GFX Context */
-	int			vm_context;
-
-	/* Points to the KFD process VM info */
-	struct amdkfd_process_info *process_info;
-
-	/* List node in amdkfd_process_info.vm_list_head */
-	struct list_head	vm_list_node;
-
-	/* Valid while the PD is reserved or fenced */
-	uint64_t		pd_phys_addr;
 };
 
 struct amdgpu_vm_manager {
@@ -254,22 +245,20 @@ struct amdgpu_vm_manager {
 	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
 	 */
 	int					vm_update_mode;
+	/* Number of Compute VMs, used for detecting Compute activity */
+	unsigned                                n_compute_vms;
 
 	/* PASID to VM mapping, will be used in interrupt context to
 	 * look up VM of a page fault
 	 */
 	struct idr				pasid_idr;
 	spinlock_t				pasid_lock;
-
-	/* Number of Compute VMs, used for detecting Compute activity */
-	unsigned                                n_compute_vms;
 };
 
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid);
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 				  unsigned int pasid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
old mode 100755
new mode 100644
index 52f456e..47dfce9
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -366,14 +366,14 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 		 * 32 and 64-bit requests are possible and must be
 		 * supported.
 		 */
-
-                if (pci_enable_atomic_ops_to_root(pdev) < 0) {
-                        dev_info(kfd_device,
-                                "skipped device %x:%x, PCI rejects atomics",
-                                 pdev->vendor, pdev->device);
-                        return NULL;
-                }
-
+		if (pci_enable_atomic_ops_to_root(pdev,
+				PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+				PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
+			dev_info(kfd_device,
+				"skipped device %x:%x, PCI rejects atomics",
+				 pdev->vendor, pdev->device);
+			return NULL;
+		}
 	}
 
 	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index e164abb..4dcc7d0 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -29,11 +29,8 @@
 #define KGD_KFD_INTERFACE_H_INCLUDED
 
 #include <linux/types.h>
-#include <linux/mm_types.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-fence.h>
-#include <linux/dma-buf.h>
 #include <linux/bitmap.h>
+#include <linux/dma-buf.h>
 
 struct pci_dev;
 
@@ -200,6 +197,8 @@ struct tile_config {
  * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
  * scheduling mode. Only used for no cp scheduling mode.
  *
+ * @init_pipeline: Initialized the compute pipelines.
+ *
  * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
  * sceduling mode.
  *
@@ -227,6 +226,9 @@ struct tile_config {
  *
  * @get_fw_version: Returns FW versions from the header
  *
+ * @set_num_of_requests: Sets number of Peripheral Page Request (PPR) sent to
+ * IOMMU when address translation failed
+ *
  * @get_cu_info: Retrieves activated cu info
  *
  * @get_dmabuf_info: Returns information about a dmabuf if it was
@@ -261,15 +263,13 @@ struct kfd2kgd_calls {
 
 	void(*get_local_mem_info)(struct kgd_dev *kgd,
 			struct kfd_local_mem_info *mem_info);
+	uint64_t (*get_vmem_size)(struct kgd_dev *kgd);
 	uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd);
 
 	uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
 
 	int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
 				 void **process_info, struct dma_fence **ef);
-	int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp,
-				  void **vm, void **process_info,
-				  struct dma_fence **ef);
 	void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
 
 	int (*create_process_gpumem)(struct kgd_dev *kgd, uint64_t va, size_t size, void *vm, struct kgd_mem **mem);
@@ -277,6 +277,8 @@ struct kfd2kgd_calls {
 
 	uint32_t (*get_process_page_dir)(void *vm);
 
+	int (*open_graphic_handle)(struct kgd_dev *kgd, uint64_t va, void *vm, int fd, uint32_t handle, struct kgd_mem **mem);
+
 	int (*alloc_pasid)(unsigned int bits);
 	void (*free_pasid)(unsigned int pasid);
 
@@ -288,6 +290,9 @@ struct kfd2kgd_calls {
 	int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
 					unsigned int vmid);
 
+	int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
+				uint32_t hpd_size, uint64_t hpd_gpu_addr);
+
 	int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
 	
 
@@ -337,6 +342,8 @@ struct kfd2kgd_calls {
 	uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
 					struct kgd_dev *kgd,
 					uint8_t vmid);
+	void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
+						uint8_t vmid);
 	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
 
 	int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid);
@@ -348,7 +355,8 @@ struct kfd2kgd_calls {
 			uint64_t size, void *vm,
 			struct kgd_mem **mem, uint64_t *offset,
 			uint32_t flags);
-	int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem);
+	int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
+			void *vm);
 	int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
 			void *vm);
 	int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem,
@@ -357,6 +365,8 @@ struct kfd2kgd_calls {
 	uint16_t (*get_fw_version)(struct kgd_dev *kgd,
 				enum kgd_engine_type type);
 
+	void (*set_num_of_requests)(struct kgd_dev *kgd,
+			uint8_t num_of_requests);
 	int (*alloc_memory_of_scratch)(struct kgd_dev *kgd,
 			uint64_t va, uint32_t vmid);
 	int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable,
@@ -364,7 +374,7 @@ struct kfd2kgd_calls {
 	void (*get_cu_info)(struct kgd_dev *kgd,
 			struct kfd_cu_info *cu_info);
 	int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd,
-			struct kgd_mem *mem, void **kptr, uint64_t *size);
+			struct kgd_mem *mem, void **kptr);
 	void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid,
 			uint32_t page_table_base);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index 416abeb..44de087 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
@@ -166,10 +166,10 @@ void cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 		cz_dpm_powerup_uvd(hwmgr);
 		cgs_set_clockgating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_UVD,
-						AMD_CG_STATE_UNGATE);
+						AMD_PG_STATE_UNGATE);
 		cgs_set_powergating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_UVD,
-						AMD_PG_STATE_UNGATE);
+						AMD_CG_STATE_UNGATE);
 		cz_dpm_update_uvd_dpm(hwmgr, false);
 	}
 
@@ -197,11 +197,11 @@ void cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 		cgs_set_clockgating_state(
 					hwmgr->device,
 					AMD_IP_BLOCK_TYPE_VCE,
-					AMD_CG_STATE_UNGATE);
+					AMD_PG_STATE_UNGATE);
 		cgs_set_powergating_state(
 					hwmgr->device,
 					AMD_IP_BLOCK_TYPE_VCE,
-					AMD_PG_STATE_UNGATE);
+					AMD_CG_STATE_UNGATE);
 		cz_dpm_update_vce_dpm(hwmgr);
 		cz_enable_disable_vce_dpm(hwmgr, true);
 	}
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index ec0574e..26e0abc 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -208,7 +208,7 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_IOC_WAIT_RESULT_TIMEOUT	1
 #define KFD_IOC_WAIT_RESULT_FAIL	2
 
-#define KFD_SIGNAL_EVENT_LIMIT		4096
+#define KFD_SIGNAL_EVENT_LIMIT		(4096 + 512)
 
 struct kfd_ioctl_create_event_args {
 	uint64_t event_page_offset;	/* from KFD */
@@ -278,11 +278,6 @@ struct kfd_ioctl_alloc_memory_of_scratch_args {
 	uint32_t pad;
 };
 
-struct kfd_ioctl_acquire_vm_args {
-	uint32_t drm_fd;	/* to KFD */
-	uint32_t gpu_id;	/* to KFD */
-};
-
 /* Allocation flags: memory types */
 #define KFD_IOC_ALLOC_MEM_FLAGS_VRAM		(1 << 0)
 #define KFD_IOC_ALLOC_MEM_FLAGS_GTT		(1 << 1)
@@ -366,22 +361,22 @@ struct kfd_ioctl_ipc_import_handle_args {
 
 struct kfd_ioctl_get_tile_config_args {
 	/* to KFD: pointer to tile array */
-	__u64 tile_config_ptr;
+	uint64_t tile_config_ptr;
 	/* to KFD: pointer to macro tile array */
-	__u64 macro_tile_config_ptr;
+	uint64_t macro_tile_config_ptr;
 	/* to KFD: array size allocated by user mode
 	 * from KFD: array size filled by kernel
 	 */
-	__u32 num_tile_configs;
+	uint32_t num_tile_configs;
 	/* to KFD: array size allocated by user mode
 	 * from KFD: array size filled by kernel
 	 */
-	__u32 num_macro_tile_configs;
+	uint32_t num_macro_tile_configs;
 
-	__u32 gpu_id;		/* to KFD */
-	__u32 gb_addr_config;	/* from KFD */
-	__u32 num_banks;		/* from KFD */
-	__u32 num_ranks;		/* from KFD */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t gb_addr_config;	/* from KFD */
+	uint32_t num_banks;		/* from KFD */
+	uint32_t num_ranks;		/* from KFD */
 	/* struct size can be extended later if needed
 	 * without breaking ABI compatibility
 	 */
@@ -522,10 +517,7 @@ struct kfd_ioctl_cross_memory_copy_args {
 #define AMDKFD_IOC_GET_QUEUE_WAVE_STATE		\
 		AMDKFD_IOWR(0x20, struct kfd_ioctl_get_queue_wave_state_args)
 
-#define AMDKFD_IOC_ACQUIRE_VM			\
-		AMDKFD_IOW(0x21, struct kfd_ioctl_acquire_vm_args)
-
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x22
+#define AMDKFD_COMMAND_END		0x21
 
 #endif
-- 
2.7.4