diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3446-compilation-fix-for-raven-rocm.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3446-compilation-fix-for-raven-rocm.patch | 3302 |
1 files changed, 3302 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3446-compilation-fix-for-raven-rocm.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3446-compilation-fix-for-raven-rocm.patch new file mode 100644 index 00000000..1f38cdf4 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3446-compilation-fix-for-raven-rocm.patch @@ -0,0 +1,3302 @@ +From f6037fc2c073f58aa9c30ce0d039892940b6954f Mon Sep 17 00:00:00 2001 +From: Sanjay R Mehta <sanju.mehta@amd.com> +Date: Thu, 17 May 2018 17:12:55 +0530 +Subject: [PATCH 3446/4131] compilation fix for raven rocm + +Signed-off-by: Sanjay R Mehta <sanju.mehta@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/Makefile | 0 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 152 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 70 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 43 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 39 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 33 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 44 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1186 ++++++++++---------- + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 14 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 119 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 21 +- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 +- + drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 28 +- + .../drm/amd/powerplay/hwmgr/cz_clockpowergating.c | 8 +- + drivers/gpu/drm/radeon/Makefile | 3 +- + include/uapi/linux/kfd_ioctl.h | 28 +- + 17 files changed, 887 insertions(+), 922 deletions(-) + mode change 100755 => 100644 drivers/gpu/drm/amd/amdgpu/Makefile + mode change 100755 => 100644 drivers/gpu/drm/amd/amdgpu/amdgpu.h + mode change 100755 => 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c + mode change 100644 => 100755 drivers/gpu/drm/amd/amdkfd/kfd_device.c + mode change 100644 => 100755 drivers/gpu/drm/radeon/Makefile + +diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile +old mode 100755 +new mode 100644 +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +old mode 100755 +new mode 100644 +index e8017ee..18478d4 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -130,6 +130,7 @@ extern int amdgpu_job_hang_limit; + extern int amdgpu_lbpw; + extern int amdgpu_compute_multipipe; + extern int amdgpu_gpu_recovery; ++extern int amdgpu_emu_mode; + + #ifdef CONFIG_DRM_AMDGPU_SI + extern int amdgpu_si_support; +@@ -192,8 +193,8 @@ struct amdgpu_cs_parser; + struct amdgpu_job; + struct amdgpu_irq_src; + struct amdgpu_fpriv; +-struct kfd_vm_fault_info; + struct amdgpu_bo_va_mapping; ++struct kfd_vm_fault_info; + + enum amdgpu_cp_irq { + AMDGPU_CP_IRQ_GFX_EOP = 0, +@@ -411,6 +412,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); + void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); + int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + ++//extern const struct dma_buf_ops amdgpu_dmabuf_ops; ++ + /* sub-allocation manager, it has to be protected by another lock. + * By conception this is an helper for other part of the driver + * like the indirect buffer or semaphore, which both have their +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +old mode 100755 +new mode 100644 +index fdaf5b3..62e3a04 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -20,7 +20,6 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + +-#undef pr_fmt + #define pr_fmt(fmt) "kfd2kgd: " fmt + + #include "amdgpu_amdkfd.h" +@@ -30,12 +29,10 @@ + #include "amdgpu_gfx.h" + #include <linux/module.h> + +-#define AMDKFD_SKIP_UNCOMPILED_CODE 1 +- + const struct kgd2kfd_calls *kgd2kfd; + bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); + +-unsigned int global_compute_vmid_bitmap = 0xFF00; ++static unsigned int compute_vmid_bitmap = 0xFF00; + + int amdgpu_amdkfd_init(void) + { +@@ -98,10 +95,6 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) + break; + case CHIP_VEGA10: + case CHIP_RAVEN: +- if (adev->asic_type == CHIP_RAVEN) { +- dev_dbg(adev->dev, "DKMS installed kfd does not support Raven for kernel < 4.16\n"); +- return; +- } + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; + default: +@@ -153,10 +146,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) + + if (adev->kfd) { + struct kgd2kfd_shared_resources gpu_resources = { +- .compute_vmid_bitmap = global_compute_vmid_bitmap, ++ .compute_vmid_bitmap = compute_vmid_bitmap, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, +- .gpuvm_size = (uint64_t)amdgpu_vm_size << 30, ++ .gpuvm_size = min(adev->vm_manager.max_pfn ++ << AMDGPU_GPU_PAGE_SHIFT, ++ AMDGPU_VA_HOLE_START), + .drm_render_minor = adev->ddev->render->index + }; + +@@ -273,61 +268,6 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) + amdgpu_device_gpu_recover(adev, NULL, false); + } + +-int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, +- uint32_t vmid, uint64_t gpu_addr, +- uint32_t *ib_cmd, uint32_t ib_len) +-{ +- struct amdgpu_device *adev = (struct amdgpu_device *)kgd; +- struct amdgpu_job *job; +- struct amdgpu_ib *ib; +- struct amdgpu_ring *ring; +- struct dma_fence *f = NULL; +- int ret; +- +- switch (engine) { +- case KGD_ENGINE_MEC1: +- ring = &adev->gfx.compute_ring[0]; +- break; +- case KGD_ENGINE_SDMA1: +- ring = &adev->sdma.instance[0].ring; +- break; +- case KGD_ENGINE_SDMA2: +- ring = &adev->sdma.instance[1].ring; +- break; +- default: +- pr_err("Invalid engine in IB submission: %d\n", engine); +- ret = -EINVAL; +- goto err; +- } +- +- ret = amdgpu_job_alloc(adev, 1, &job, NULL); +- if (ret) +- goto err; +- +- ib = &job->ibs[0]; +- memset(ib, 0, sizeof(struct amdgpu_ib)); +- +- ib->gpu_addr = gpu_addr; +- ib->ptr = ib_cmd; +- ib->length_dw = ib_len; +- /* This works for NO_HWS. TODO: need to handle without knowing VMID */ +- job->vmid = vmid; +- +- ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); +- if (ret) { +- DRM_ERROR("amdgpu: failed to schedule IB.\n"); +- goto err_ib_sched; +- } +- +- ret = dma_fence_wait(f, false); +- +-err_ib_sched: +- dma_fence_put(f); +- amdgpu_job_free(job); +-err: +- return ret; +-} +- + u32 pool_to_domain(enum kgd_memory_pool p) + { + switch (p) { +@@ -416,8 +356,7 @@ void get_local_mem_info(struct kgd_dev *kgd, + aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; + + memset(mem_info, 0, sizeof(*mem_info)); +- if (!(adev->gmc.aper_base & address_mask || +- aper_limit & address_mask)) { ++ if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { + mem_info->local_mem_size_public = adev->gmc.visible_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size - + adev->gmc.visible_vram_size; +@@ -432,6 +371,11 @@ void get_local_mem_info(struct kgd_dev *kgd, + mem_info->local_mem_size_public, + mem_info->local_mem_size_private); + ++ if (amdgpu_emu_mode == 1) { ++ mem_info->mem_clk_max = 100; ++ return; ++ } ++ + if (amdgpu_sriov_vf(adev)) + mem_info->mem_clk_max = adev->clock.default_mclk / 100; + else +@@ -452,6 +396,9 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + /* the sclk is in quantas of 10kHz */ ++ if (amdgpu_emu_mode == 1) ++ return 100; ++ + + if (amdgpu_sriov_vf(adev)) + return adev->clock.default_sclk / 100; +@@ -511,9 +458,8 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + adev = obj->dev->dev_private; + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | +- AMDGPU_GEM_DOMAIN_GTT | +- AMDGPU_GEM_DOMAIN_DGMA))) +- /* Only VRAM, GTT and DGMA BOs are supported */ ++ AMDGPU_GEM_DOMAIN_GTT))) ++ /* Only VRAM and GTT BOs are supported */ + goto out_put; + + r = 0; +@@ -527,12 +473,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, + metadata_size, &metadata_flags); + if (flags) { +- /* If the preferred domain is DGMA, set flags to VRAM because +- * KFD doesn't support allocating DGMA memory +- */ +- *flags = (bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | +- AMDGPU_GEM_DOMAIN_DGMA)) ? +- ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; ++ *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? ++ ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; ++ + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + *flags |= ALLOC_MEM_FLAGS_PUBLIC; + } +@@ -550,11 +493,66 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) + return usage; + } + ++int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, ++ uint32_t vmid, uint64_t gpu_addr, ++ uint32_t *ib_cmd, uint32_t ib_len) ++{ ++ struct amdgpu_device *adev = (struct amdgpu_device *)kgd; ++ struct amdgpu_job *job; ++ struct amdgpu_ib *ib; ++ struct amdgpu_ring *ring; ++ struct dma_fence *f = NULL; ++ int ret; ++ ++ switch (engine) { ++ case KGD_ENGINE_MEC1: ++ ring = &adev->gfx.compute_ring[0]; ++ break; ++ case KGD_ENGINE_SDMA1: ++ ring = &adev->sdma.instance[0].ring; ++ break; ++ case KGD_ENGINE_SDMA2: ++ ring = &adev->sdma.instance[1].ring; ++ break; ++ default: ++ pr_err("Invalid engine in IB submission: %d\n", engine); ++ ret = -EINVAL; ++ goto err; ++ } ++ ++ ret = amdgpu_job_alloc(adev, 1, &job, NULL); ++ if (ret) ++ goto err; ++ ++ ib = &job->ibs[0]; ++ memset(ib, 0, sizeof(struct amdgpu_ib)); ++ ++ ib->gpu_addr = gpu_addr; ++ ib->ptr = ib_cmd; ++ ib->length_dw = ib_len; ++ /* This works for NO_HWS. TODO: need to handle without knowing VMID */ ++ job->vmid = vmid; ++ ++ ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); ++ if (ret) { ++ DRM_ERROR("amdgpu: failed to schedule IB.\n"); ++ goto err_ib_sched; ++ } ++ ++ ret = dma_fence_wait(f, false); ++ ++err_ib_sched: ++ dma_fence_put(f); ++ amdgpu_job_free(job); ++err: ++ return ret; ++} ++ + bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, + u32 vmid) + { + if (adev->kfd) { +- if ((1 << vmid) & global_compute_vmid_bitmap) ++ if ((1 << vmid) & compute_vmid_bitmap) + return true; + } + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index 1fb4915..f79b419 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -55,7 +55,6 @@ struct kgd_mem { + struct ttm_validate_buffer resv_list; + uint32_t domain; + unsigned int mapped_to_gpu_memory; +- void *kptr; + uint64_t va; + + uint32_t mapping_flags; +@@ -65,25 +64,21 @@ struct kgd_mem { + struct page **user_pages; + + struct amdgpu_sync sync; +- +- /* flags bitfied */ +- bool coherent : 1; +- bool no_substitute : 1; +- bool aql_queue : 1; ++ bool aql_queue; + }; + + + /* KFD Memory Eviction */ + struct amdgpu_amdkfd_fence { + struct dma_fence base; +- void *mm; ++ struct mm_struct *mm; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; + }; + + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, +- void *mm); +-bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm); ++ struct mm_struct *mm); ++bool amd_kfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); + struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); + + struct amdkfd_process_info { +@@ -108,27 +103,6 @@ struct amdkfd_process_info { + struct pid *pid; + }; + +-/* struct amdkfd_vm - +- * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs +- * belonging to a KFD process. All the VMs belonging to the same process point +- * to the same amdkfd_process_info. +- */ +-struct amdkfd_vm { +- /* Keep base as the first parameter for pointer compatibility between +- * amdkfd_vm and amdgpu_vm. +- */ +- struct amdgpu_vm base; +- +- /* List node in amdkfd_process_info.vm_list_head*/ +- struct list_head vm_list_node; +- +- struct amdgpu_device *adev; +- /* Points to the KFD process VM info*/ +- struct amdkfd_process_info *process_info; +- +- uint64_t pd_phys_addr; +-}; +- + int amdgpu_amdkfd_init(void); + void amdgpu_amdkfd_fini(void); + +@@ -144,8 +118,6 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len); +-int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, +- struct dma_fence **ef); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); +@@ -164,8 +136,6 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); + void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); + + /* Shared API */ +-int map_bo(struct amdgpu_device *rdev, uint64_t va, void *vm, +- struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va); + int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr); +@@ -199,31 +169,38 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); + }) + + /* GPUVM API */ +-int amdgpu_amdkfd_gpuvm_sync_memory( +- struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); ++int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, ++ void **process_info, ++ struct dma_fence **ef); ++int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, ++ struct file *filp, ++ void **vm, void **process_info, ++ struct dma_fence **ef); ++void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, ++ struct amdgpu_vm *vm); ++void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); ++uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags); + int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( +- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); ++ struct kgd_dev *kgd, struct kgd_mem *mem); + int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); + int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); ++int amdgpu_amdkfd_gpuvm_sync_memory( ++ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); ++int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, ++ struct kgd_mem *mem, void **kptr, uint64_t *size); ++int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, ++ struct dma_fence **ef); + +-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, +- void **process_info, +- struct dma_fence **ef); +-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); +- +-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); + + int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); + +-int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, +- struct kgd_mem *mem, void **kptr); + + int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd, + struct kgd_mem *mem, uint64_t offset, +@@ -238,10 +215,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm, + struct kgd_mem *mem, + struct dma_buf **dmabuf); +-int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm); +-int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm); + + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); + void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); ++ + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +index 3961937..cf2f1e9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2016 Advanced Micro Devices, Inc. ++ * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), +@@ -20,18 +20,18 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + ++#include <linux/dma-fence.h> + #include <linux/spinlock.h> + #include <linux/atomic.h> + #include <linux/stacktrace.h> + #include <linux/sched.h> + #include <linux/slab.h> ++#include <linux/sched/mm.h> + #include "amdgpu_amdkfd.h" + + const struct dma_fence_ops amd_kfd_fence_ops; + static atomic_t fence_seq = ATOMIC_INIT(0); + +-static int amd_kfd_fence_signal(struct dma_fence *f); +- + /* Eviction Fence + * Fence helper functions to deal with KFD memory eviction. + * Big Idea - Since KFD submissions are done by user queues, a BO cannot be +@@ -60,7 +60,7 @@ static int amd_kfd_fence_signal(struct dma_fence *f); + */ + + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, +- void *mm) ++ struct mm_struct *mm) + { + struct amdgpu_amdkfd_fence *fence = NULL; + +@@ -68,10 +68,8 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + if (fence == NULL) + return NULL; + +- /* mm_struct mm is used as void pointer to identify the parent +- * KFD process. Don't dereference it. Fence and any threads using +- * mm is guranteed to be released before process termination. +- */ ++ /* This reference gets released in amd_kfd_fence_release */ ++ mmgrab(mm); + fence->mm = mm; + get_task_comm(fence->timeline_name, current); + spin_lock_init(&fence->lock); +@@ -124,45 +122,31 @@ static bool amd_kfd_fence_enable_signaling(struct dma_fence *f) + if (dma_fence_is_signaled(f)) + return true; + +- if (!kgd2kfd->schedule_evict_and_restore_process( +- (struct mm_struct *)fence->mm, f)) ++ if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) + return true; + + return false; + } + +-static int amd_kfd_fence_signal(struct dma_fence *f) +-{ +- unsigned long flags; +- int ret; +- +- spin_lock_irqsave(f->lock, flags); +- /* Set enabled bit so cb will called */ +- set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &f->flags); +- ret = dma_fence_signal_locked(f); +- spin_unlock_irqrestore(f->lock, flags); +- +- return ret; +-} +- + /** + * amd_kfd_fence_release - callback that fence can be freed + * + * @fence: fence + * + * This function is called when the reference count becomes zero. +- * It just RCU schedules freeing up the fence. +-*/ ++ * Drops the mm_struct reference and RCU schedules freeing up the fence. ++ */ + static void amd_kfd_fence_release(struct dma_fence *f) + { + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); ++ + /* Unconditionally signal the fence. The process is getting + * terminated. + */ + if (WARN_ON(!fence)) + return; /* Not an amdgpu_amdkfd_fence */ + +- amd_kfd_fence_signal(f); ++ mmdrop(fence->mm); + kfree_rcu(f, rcu); + } + +@@ -172,8 +156,8 @@ static void amd_kfd_fence_release(struct dma_fence *f) + * + * @f: [IN] fence + * @mm: [IN] mm that needs to be verified +-*/ +-bool amd_kfd_fence_check_mm(struct dma_fence *f, void *mm) ++ */ ++bool amd_kfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) + { + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + +@@ -193,4 +177,3 @@ const struct dma_fence_ops amd_kfd_fence_ops = { + .wait = dma_fence_default_wait, + .release = amd_kfd_fence_release, + }; +- +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index fcc1add..c541656 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -20,7 +20,6 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + +-#undef pr_fmt + #define pr_fmt(fmt) "kfd2kgd: " fmt + + #include <linux/fdtable.h> +@@ -42,8 +41,6 @@ + #include "gmc/gmc_7_1_sh_mask.h" + #include "cik_structs.h" + +-#define AMDKFD_SKIP_UNCOMPILED_CODE 1 +- + enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, +@@ -92,9 +89,6 @@ union TCP_WATCH_CNTL_BITS { + float f32All; + }; + +-static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, +- int fd, uint32_t handle, struct kgd_mem **mem); +- + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + + /* +@@ -106,8 +100,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); + static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, +- uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, +@@ -148,7 +140,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); + static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); + static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +-static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req); + static int alloc_memory_of_scratch(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, +@@ -179,7 +170,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + +- + return 0; + } + +@@ -190,14 +180,13 @@ static const struct kfd2kgd_calls kfd2kgd = { + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, ++ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, +- .open_graphic_handle = open_graphic_handle, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, +- .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, +@@ -224,7 +213,6 @@ static const struct kfd2kgd_calls kfd2kgd = { + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .get_fw_version = get_fw_version, +- .set_num_of_requests = set_num_of_requests, + .get_cu_info = get_cu_info, + .alloc_memory_of_scratch = alloc_memory_of_scratch, + .write_config_static_mem = write_config_static_mem, +@@ -248,12 +236,6 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions() + return (struct kfd2kgd_calls *)&kfd2kgd; + } + +-static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, +- int fd, uint32_t handle, struct kgd_mem **mem) +-{ +- return 0; +-} +- + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) + { + return (struct amdgpu_device *)kgd; +@@ -337,13 +319,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + return 0; + } + +-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, +- uint32_t hpd_size, uint64_t hpd_gpu_addr) +-{ +- /* amdgpu owns the per-pipe state */ +- return 0; +-} +- + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); +@@ -952,18 +927,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + return hdr->common.ucode_version; + } + +-static void set_num_of_requests(struct kgd_dev *dev, uint8_t num_of_req) +-{ +- uint32_t value; +- struct amdgpu_device *adev = get_amdgpu_device(dev); +- +- value = RREG32(mmATC_ATS_DEBUG); +- value &= ~ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR_MASK; +- value |= (num_of_req << ATC_ATS_DEBUG__NUM_REQUESTS_AT_ERR__SHIFT); +- +- WREG32(mmATC_ATS_DEBUG, value); +-} +- + static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) + { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index ea8e948..dfd0026 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -20,7 +20,6 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + +-#undef pr_fmt + #define pr_fmt(fmt) "kfd2kgd: " fmt + + #include <linux/module.h> +@@ -57,15 +56,10 @@ static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { + }; + + +-struct vi_sdma_mqd; +- + static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size, + void *vm, struct kgd_mem **mem); + static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem); + +-static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, +- int fd, uint32_t handle, struct kgd_mem **mem); +- + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + + /* +@@ -78,8 +72,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_bases); + static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, +- uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, +@@ -119,8 +111,6 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); + static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +-static void set_num_of_requests(struct kgd_dev *kgd, +- uint8_t num_of_requests); + static int alloc_memory_of_scratch(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, +@@ -162,16 +152,15 @@ static const struct kfd2kgd_calls kfd2kgd = { + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, ++ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .create_process_gpumem = create_process_gpumem, + .destroy_process_gpumem = destroy_process_gpumem, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, +- .open_graphic_handle = open_graphic_handle, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, +- .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, +@@ -197,7 +186,6 @@ static const struct kfd2kgd_calls kfd2kgd = { + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .get_fw_version = get_fw_version, +- .set_num_of_requests = set_num_of_requests, + .get_cu_info = get_cu_info, + .alloc_memory_of_scratch = alloc_memory_of_scratch, + .write_config_static_mem = write_config_static_mem, +@@ -233,12 +221,6 @@ static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem) + + } + +-static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, +- int fd, uint32_t handle, struct kgd_mem **mem) +-{ +- return 0; +-} +- + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) + { + return (struct amdgpu_device *)kgd; +@@ -323,13 +305,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + return 0; + } + +-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, +- uint32_t hpd_size, uint64_t hpd_gpu_addr) +-{ +- /* amdgpu owns the per-pipe state */ +- return 0; +-} +- + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) + { + struct amdgpu_device *adev = get_amdgpu_device(kgd); +@@ -1023,12 +998,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + return hdr->common.ucode_version; + } + +-static void set_num_of_requests(struct kgd_dev *kgd, +- uint8_t num_of_requests) +-{ +- pr_debug("This is a stub\n"); +-} +- + static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) + { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +index 2b74a65..f044739 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +@@ -19,7 +19,7 @@ + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +-#undef pr_fmt ++ + #define pr_fmt(fmt) "kfd2kgd: " fmt + + #include <linux/module.h> +@@ -80,6 +80,9 @@ + #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 + #define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 + ++#define V9_PIPE_PER_MEC (4) ++#define V9_QUEUES_PER_PIPE_MEC (8) ++ + enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, +@@ -99,9 +102,6 @@ static int create_process_gpumem(struct kgd_dev *kgd, uint64_t va, size_t size, + void *vm, struct kgd_mem **mem); + static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem); + +-static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, +- int fd, uint32_t handle, struct kgd_mem **mem); +- + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); + + /* +@@ -114,8 +114,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_bases); + static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, +- uint32_t hpd_size, uint64_t hpd_gpu_addr); + static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); + static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, +@@ -156,8 +154,6 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); + static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +-static void set_num_of_requests(struct kgd_dev *kgd, +- uint8_t num_of_requests); + static int alloc_memory_of_scratch(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable, +@@ -206,16 +202,15 @@ static const struct kfd2kgd_calls kfd2kgd = { + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, ++ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .create_process_gpumem = create_process_gpumem, + .destroy_process_gpumem = destroy_process_gpumem, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, +- .open_graphic_handle = open_graphic_handle, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, +- .init_pipeline = kgd_init_pipeline, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, +@@ -241,7 +236,6 @@ static const struct kfd2kgd_calls kfd2kgd = { + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .get_fw_version = get_fw_version, +- .set_num_of_requests = set_num_of_requests, + .get_cu_info = get_cu_info, + .alloc_memory_of_scratch = alloc_memory_of_scratch, + .write_config_static_mem = write_config_static_mem, +@@ -277,12 +271,6 @@ static void destroy_process_gpumem(struct kgd_dev *kgd, struct kgd_mem *mem) + + } + +-static int open_graphic_handle(struct kgd_dev *kgd, uint64_t va, void *vm, +- int fd, uint32_t handle, struct kgd_mem **mem) +-{ +- return 0; +-} +- + static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) + { + return (struct amdgpu_device *)kgd; +@@ -319,7 +307,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) + { +- unsigned int bit = (pipe_id * adev->gfx.mec.num_pipe_per_mec + ++ unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +@@ -404,13 +392,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + return 0; + } + +-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, +- uint32_t hpd_size, uint64_t hpd_gpu_addr) +-{ +- /* amdgpu owns the per-pipe state */ +- return 0; +-} +- + /* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ +@@ -927,7 +908,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; + +- spin_lock(&adev->tlb_invalidation_lock); ++ mutex_lock(&adev->srbm_mutex); + + /* Use legacy mode tlb invalidation. + * +@@ -969,8 +950,9 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) + mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); +- +- spin_unlock(&adev->tlb_invalidation_lock); ++ ++ mutex_unlock(&adev->srbm_mutex); ++ + } + + static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +@@ -1199,12 +1181,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) + return hdr->common.ucode_version; + } + +-static void set_num_of_requests(struct kgd_dev *kgd, +- uint8_t num_of_requests) +-{ +- pr_debug("This is a stub\n"); +-} +- + static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) + { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index 8f0aa93..f42a891 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -20,27 +20,14 @@ + * OTHER DEALINGS IN THE SOFTWARE. + */ + +-#undef pr_fmt + #define pr_fmt(fmt) "kfd2kgd: " fmt + +-#include <linux/module.h> +-#include <linux/fdtable.h> +-#include <linux/uaccess.h> +-#include <linux/firmware.h> + #include <linux/list.h> + #include <linux/sched/mm.h> + #include <drm/drmP.h> +-#include <linux/dma-buf.h> +-#include <linux/pagemap.h> ++#include "amdgpu_object.h" ++#include "amdgpu_vm.h" + #include "amdgpu_amdkfd.h" +-#include "amdgpu_ucode.h" +-#include "gca/gfx_8_0_sh_mask.h" +-#include "gca/gfx_8_0_d.h" +-#include "gca/gfx_8_0_enum.h" +-#include "oss/oss_3_0_sh_mask.h" +-#include "oss/oss_3_0_d.h" +-#include "gmc/gmc_8_1_sh_mask.h" +-#include "gmc/gmc_8_1_d.h" + + /* Special VM and GART address alignment needed for VI pre-Fiji due to + * a HW bug. +@@ -51,15 +38,13 @@ + #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) + + /* Impose limit on how much memory KFD can use */ +-struct kfd_mem_usage_limit { ++static struct { + uint64_t max_system_mem_limit; + uint64_t max_userptr_mem_limit; + int64_t system_mem_used; + int64_t userptr_mem_used; + spinlock_t mem_limit_lock; +-}; +- +-static struct kfd_mem_usage_limit kfd_mem_limit; ++} kfd_mem_limit; + + /* Struct used for amdgpu_amdkfd_bo_validate */ + struct amdgpu_vm_parser { +@@ -182,7 +167,8 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) + if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; + kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); +- } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { ++ } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT && ++ !bo->tbo.sg) { + kfd_mem_limit.system_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + } +@@ -269,7 +255,6 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, + /* Alloc memory for count number of eviction fence pointers. Fill the + * ef_list array and ef_count + */ +- + fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), + GFP_KERNEL); + if (!fence_list) +@@ -336,6 +321,7 @@ static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, + static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, + bool wait) + { ++ struct ttm_operation_ctx ctx = { false, false }; + int ret; + + if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), +@@ -371,6 +357,23 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) + return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); + } + ++static u64 get_vm_pd_gpu_offset(struct amdgpu_vm *vm) ++{ ++ struct amdgpu_device *adev = ++ amdgpu_ttm_adev(vm->root.base.bo->tbo.bdev); ++ u64 offset; ++ uint64_t flags = AMDGPU_PTE_VALID; ++ ++ offset = amdgpu_bo_gpu_offset(vm->root.base.bo); ++ ++ /* On some ASICs the FB doesn't start at 0. Adjust FB offset ++ * to an actual MC address. ++ */ ++ adev->gmc.gmc_funcs->get_vm_pde(adev, -1, &offset, &flags); ++ ++ return offset; ++} ++ + /* vm_validate_pt_pd_bos - Validate page table and directory BOs + * + * Page directories are not updated here because huge page handling +@@ -378,18 +381,17 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) + * again. Page directories are only updated after updating page + * tables. + */ +-static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) ++static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) + { +- struct amdgpu_bo *pd = vm->base.root.base.bo; ++ struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + struct amdgpu_vm_parser param; +- uint64_t addr, flags = AMDGPU_PTE_VALID; + int ret; + + param.domain = AMDGPU_GEM_DOMAIN_VRAM; + param.wait = false; + +- ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, ++ ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate, + ¶m); + if (ret) { + pr_err("amdgpu: failed to validate PT BOs\n"); +@@ -402,11 +404,9 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) + return ret; + } + +- addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); +- amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); +- vm->pd_phys_addr = addr; ++ vm->pd_phys_addr = get_vm_pd_gpu_offset(vm); + +- if (vm->base.use_cpu_for_update) { ++ if (vm->use_cpu_for_update) { + ret = amdgpu_bo_kmap(pd, NULL); + if (ret) { + pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); +@@ -417,23 +417,6 @@ static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) + return 0; + } + +-static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, +- struct dma_fence *f) +-{ +- int ret = amdgpu_sync_fence(adev, sync, f, false); +- +- /* Sync objects can't handle multiple GPUs (contexts) updating +- * sync->last_vm_update. Fortunately we don't need it for +- * KFD's purposes, so we can just drop that fence. +- */ +- if (sync->last_vm_update) { +- dma_fence_put(sync->last_vm_update); +- sync->last_vm_update = NULL; +- } +- +- return ret; +-} +- + static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) + { + struct amdgpu_bo *pd = vm->root.base.bo; +@@ -444,7 +427,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) + if (ret) + return ret; + +- return sync_vm_fence(adev, sync, vm->last_update); ++ return amdgpu_sync_fence(NULL, sync, vm->last_update, false); + } + + /* add_bo_to_vm - Add a BO to a VM +@@ -460,14 +443,12 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) + * 4a. Validate new page tables and directories + */ + static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, +- struct amdgpu_vm *avm, bool is_aql, ++ struct amdgpu_vm *vm, bool is_aql, + struct kfd_bo_va_list **p_bo_va_entry) + { + int ret; + struct kfd_bo_va_list *bo_va_entry; +- struct amdkfd_vm *kvm = container_of(avm, +- struct amdkfd_vm, base); +- struct amdgpu_bo *pd = avm->root.base.bo; ++ struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_bo *bo = mem->bo; + uint64_t va = mem->va; + struct list_head *list_bo_va = &mem->bo_va_list; +@@ -486,11 +467,11 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + return -ENOMEM; + + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, +- va + bo_size, avm); ++ va + bo_size, vm); + + /* Add BO to VM internal data structures*/ +- bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); +- if (bo_va_entry->bo_va == NULL) { ++ bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo); ++ if (!bo_va_entry->bo_va) { + ret = -EINVAL; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); +@@ -512,28 +493,28 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + * fence, so remove it temporarily. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, +- kvm->process_info->eviction_fence, ++ vm->process_info->eviction_fence, + NULL, NULL); + +- ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); ++ ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); + if (ret) { + pr_err("Failed to allocate pts, err=%d\n", ret); + goto err_alloc_pts; + } + +- ret = vm_validate_pt_pd_bos(kvm); +- if (ret != 0) { ++ ret = vm_validate_pt_pd_bos(vm); ++ if (ret) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; + } + + /* Add the eviction fence back */ +- amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); ++ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); + + return 0; + + err_alloc_pts: +- amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); ++ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); + amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); + list_del(&bo_va_entry->bo_list); + err_vmadd: +@@ -587,6 +568,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, + { + struct amdkfd_process_info *process_info = mem->process_info; + struct amdgpu_bo *bo = mem->bo; ++ struct ttm_operation_ctx ctx = { true, false }; + int ret = 0; + + mutex_lock(&process_info->lock); +@@ -651,134 +633,25 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, + return ret; + } + +-static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, +- uint64_t size, void *vm, struct kgd_mem **mem, +- uint64_t *offset, u32 domain, u64 flags, +- struct sg_table *sg, bool aql_queue, +- bool readonly, bool execute, bool coherent, bool no_sub, +- bool userptr) +-{ +- struct amdgpu_device *adev; +- int ret; +- struct amdgpu_bo *bo; +- uint64_t user_addr = 0; +- int byte_align; +- u32 alloc_domain; +- uint32_t mapping_flags; +- struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; +- +- if (aql_queue) +- size = size >> 1; +- if (userptr) { +- if (!offset || !*offset) +- return -EINVAL; +- user_addr = *offset; +- } +- +- adev = get_amdgpu_device(kgd); +- byte_align = (adev->family == AMDGPU_FAMILY_VI && +- adev->asic_type != CHIP_FIJI && +- adev->asic_type != CHIP_POLARIS10 && +- adev->asic_type != CHIP_POLARIS11) ? +- VI_BO_SIZE_ALIGN : 1; +- +- *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); +- if (*mem == NULL) { +- ret = -ENOMEM; +- goto err; +- } +- INIT_LIST_HEAD(&(*mem)->bo_va_list); +- mutex_init(&(*mem)->lock); +- (*mem)->coherent = coherent; +- (*mem)->no_substitute = no_sub; +- (*mem)->aql_queue = aql_queue; +- +- mapping_flags = AMDGPU_VM_PAGE_READABLE; +- if (!readonly) +- mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; +- if (execute) +- mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; +- if (coherent) +- mapping_flags |= AMDGPU_VM_MTYPE_UC; +- else +- mapping_flags |= AMDGPU_VM_MTYPE_NC; +- +- (*mem)->mapping_flags = mapping_flags; +- +- alloc_domain = userptr ? AMDGPU_GEM_DOMAIN_CPU : domain; +- +- amdgpu_sync_create(&(*mem)->sync); +- +- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); +- if (ret) { +- pr_debug("Insufficient system memory\n"); +- goto err_bo_create; +- } +- +- pr_debug("\t create BO VA 0x%llx size 0x%llx domain %s\n", +- va, size, domain_string(alloc_domain)); +- +- /* Allocate buffer object. Userptr objects need to start out +- * in the CPU domain, get moved to GTT when pinned. +- */ +- ret = amdgpu_bo_create(adev, size, byte_align, false, +- alloc_domain, +- flags, sg, NULL, &bo); +- if (ret != 0) { +- pr_debug("Failed to create BO on domain %s. ret %d\n", +- domain_string(alloc_domain), ret); +- unreserve_system_mem_limit(adev, size, alloc_domain); +- goto err_bo_create; +- } +- bo->kfd_bo = *mem; +- (*mem)->bo = bo; +- if (userptr) +- bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; +- +- (*mem)->va = va; +- (*mem)->domain = domain; +- (*mem)->mapped_to_gpu_memory = 0; +- (*mem)->process_info = kfd_vm->process_info; +- add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, userptr); +- +- if (userptr) { +- ret = init_user_pages(*mem, current->mm, user_addr); +- if (ret) { +- mutex_lock(&kfd_vm->process_info->lock); +- list_del(&(*mem)->validate_list.head); +- mutex_unlock(&kfd_vm->process_info->lock); +- goto allocate_init_user_pages_failed; +- } +- } +- +- if (offset) +- *offset = amdgpu_bo_mmap_offset(bo); +- +- return 0; +- +-allocate_init_user_pages_failed: +- amdgpu_bo_unref(&bo); +-err_bo_create: +- kfree(*mem); +-err: +- return ret; +-} +- + /* Reserving a BO and its page table BOs must happen atomically to +- * avoid deadlocks. When updating userptrs we need to temporarily +- * back-off the reservation and then reacquire it. Track all the +- * reservation info in a context structure. Buffers can be mapped to +- * multiple VMs simultaneously (buffers being restored on multiple +- * GPUs). ++ * avoid deadlocks. Some operations update multiple VMs at once. Track ++ * all the reservation info in a context structure. Optionally a sync ++ * object can track VM updates. + */ + struct bo_vm_reservation_context { +- struct amdgpu_bo_list_entry kfd_bo; +- unsigned int n_vms; +- struct amdgpu_bo_list_entry *vm_pd; +- struct ww_acquire_ctx ticket; +- struct list_head list, duplicates; +- struct amdgpu_sync *sync; +- bool reserved; ++ struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ ++ unsigned int n_vms; /* Number of VMs reserved */ ++ struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ ++ struct ww_acquire_ctx ticket; /* Reservation ticket */ ++ struct list_head list, duplicates; /* BO lists */ ++ struct amdgpu_sync *sync; /* Pointer to sync object */ ++ bool reserved; /* Whether BOs are reserved */ ++}; ++ ++enum bo_vm_match { ++ BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ ++ BO_VM_MAPPED, /* Match VMs where a BO is mapped */ ++ BO_VM_ALL, /* Match all VMs a BO was added to */ + }; + + /** +@@ -803,9 +676,8 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + +- ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry) +- * ctx->n_vms, GFP_KERNEL); +- if (ctx->vm_pd == NULL) ++ ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); ++ if (!ctx->vm_pd) + return -ENOMEM; + + ctx->kfd_bo.robj = bo; +@@ -821,10 +693,8 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; +- else ++ else { + pr_err("Failed to reserve buffers in ttm\n"); +- +- if (ret) { + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } +@@ -832,24 +702,19 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, + return ret; + } + +-enum VA_TYPE { +- VA_NOT_MAPPED = 0, +- VA_MAPPED, +- VA_DO_NOT_CARE, +-}; +- + /** +- * reserve_bo_and_vm - reserve a BO and some VMs that the BO has been added +- * to, conditionally based on map_type. ++ * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally + * @mem: KFD BO structure. + * @vm: the VM to reserve. If NULL, then all VMs associated with the BO + * is used. Otherwise, a single VM associated with the BO. + * @map_type: the mapping status that will be used to filter the VMs. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). ++ * ++ * Returns 0 for success, negative for failure. + */ + static int reserve_bo_and_cond_vms(struct kgd_mem *mem, +- struct amdgpu_vm *vm, enum VA_TYPE map_type, +- struct bo_vm_reservation_context *ctx) ++ struct amdgpu_vm *vm, enum bo_vm_match map_type, ++ struct bo_vm_reservation_context *ctx) + { + struct amdgpu_bo *bo = mem->bo; + struct kfd_bo_va_list *entry; +@@ -867,16 +732,16 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type +- && map_type != VA_DO_NOT_CARE)) ++ && map_type != BO_VM_ALL)) + continue; + + ctx->n_vms++; + } + + if (ctx->n_vms != 0) { +- ctx->vm_pd = kzalloc(sizeof(struct amdgpu_bo_list_entry) +- * ctx->n_vms, GFP_KERNEL); +- if (ctx->vm_pd == NULL) ++ ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), ++ GFP_KERNEL); ++ if (!ctx->vm_pd) + return -ENOMEM; + } + +@@ -891,7 +756,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type +- && map_type != VA_DO_NOT_CARE)) ++ && map_type != BO_VM_ALL)) + continue; + + amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, +@@ -914,6 +779,16 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + return ret; + } + ++/** ++ * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context ++ * @ctx: Reservation context to unreserve ++ * @wait: Optionally wait for a sync object representing pending VM updates ++ * @intr: Whether the wait is interruptible ++ * ++ * Also frees any resources allocated in ++ * reserve_bo_and_(cond_)vm(s). Returns the status from ++ * amdgpu_sync_wait. ++ */ + static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, + bool wait, bool intr) + { +@@ -940,25 +815,25 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, + { + struct amdgpu_bo_va *bo_va = entry->bo_va; + struct amdgpu_vm *vm = bo_va->base.vm; +- struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); + struct amdgpu_bo *pd = vm->root.base.bo; + +- /* Remove eviction fence from PD (and thereby from PTs too as they +- * share the resv. object. Otherwise during PT update job (see +- * amdgpu_vm_bo_update_mapping), eviction fence will get added to +- * job->sync object ++ /* Remove eviction fence from PD (and thereby from PTs too as ++ * they share the resv. object). Otherwise during PT update ++ * job (see amdgpu_vm_bo_update_mapping), eviction fence would ++ * get added to job->sync object and job execution would ++ * trigger the eviction fence. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, +- kvm->process_info->eviction_fence, ++ vm->process_info->eviction_fence, + NULL, NULL); + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + + amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + + /* Add the eviction fence back */ +- amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); ++ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); + +- sync_vm_fence(adev, sync, bo_va->last_pt_update); ++ amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + + return 0; + } +@@ -978,12 +853,12 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, + + /* Update the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); +- if (ret != 0) { ++ if (ret) { + pr_err("amdgpu_vm_bo_update failed\n"); + return ret; + } + +- return sync_vm_fence(adev, sync, bo_va->last_pt_update); ++ return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + } + + static int map_bo_to_gpuvm(struct amdgpu_device *adev, +@@ -994,8 +869,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, + + /* Set virtual address for the allocation */ + ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, +- amdgpu_bo_size(entry->bo_va->base.bo), entry->pte_flags); +- if (ret != 0) { ++ amdgpu_bo_size(entry->bo_va->base.bo), ++ entry->pte_flags); ++ if (ret) { + pr_err("Failed to map VA 0x%llx in vm. ret %d\n", + entry->va, ret); + return ret; +@@ -1005,7 +881,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, + return 0; + + ret = update_gpuvm_pte(adev, entry, sync); +- if (ret != 0) { ++ if (ret) { + pr_err("update_gpuvm_pte() failed\n"); + goto update_gpuvm_pte_failed; + } +@@ -1035,116 +911,424 @@ static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) + return sg; + } + +-int amdgpu_amdkfd_gpuvm_sync_memory( +- struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) ++static int process_validate_vms(struct amdkfd_process_info *process_info) + { +- int ret = 0; +- struct amdgpu_sync sync; +- struct amdgpu_device *adev; ++ struct amdgpu_vm *peer_vm; ++ int ret; + +- adev = get_amdgpu_device(kgd); +- amdgpu_sync_create(&sync); ++ list_for_each_entry(peer_vm, &process_info->vm_list_head, ++ vm_list_node) { ++ ret = vm_validate_pt_pd_bos(peer_vm); ++ if (ret) ++ return ret; ++ } + +- mutex_lock(&mem->lock); +- amdgpu_sync_clone(adev, &mem->sync, &sync); +- mutex_unlock(&mem->lock); ++ return 0; ++} + +- ret = amdgpu_sync_wait(&sync, intr); +- amdgpu_sync_free(&sync); ++static int process_sync_pds_resv(struct amdkfd_process_info *process_info, ++ struct amdgpu_sync *sync) ++{ ++ struct amdgpu_vm *peer_vm; ++ int ret; ++ ++ list_for_each_entry(peer_vm, &process_info->vm_list_head, ++ vm_list_node) { ++ struct amdgpu_bo *pd = peer_vm->root.base.bo; ++ ++ ret = amdgpu_sync_resv(NULL, ++ sync, pd->tbo.resv, ++ AMDGPU_FENCE_OWNER_UNDEFINED, false); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int process_update_pds(struct amdkfd_process_info *process_info, ++ struct amdgpu_sync *sync) ++{ ++ struct amdgpu_vm *peer_vm; ++ int ret; ++ ++ list_for_each_entry(peer_vm, &process_info->vm_list_head, ++ vm_list_node) { ++ ret = vm_update_pds(peer_vm, sync); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ++ struct dma_fence **ef) ++{ ++ struct amdkfd_process_info *info = NULL; ++ int ret; ++ ++ if (!*process_info) { ++ info = kzalloc(sizeof(*info), GFP_KERNEL); ++ if (!info) ++ return -ENOMEM; ++ ++ mutex_init(&info->lock); ++ INIT_LIST_HEAD(&info->vm_list_head); ++ INIT_LIST_HEAD(&info->kfd_bo_list); ++ INIT_LIST_HEAD(&info->userptr_valid_list); ++ INIT_LIST_HEAD(&info->userptr_inval_list); ++ ++ info->eviction_fence = ++ amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), ++ current->mm); ++ if (!info->eviction_fence) { ++ pr_err("Failed to create eviction fence\n"); ++ ret = -ENOMEM; ++ goto create_evict_fence_fail; ++ } ++ ++ info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); ++ atomic_set(&info->evicted_bos, 0); ++ INIT_DELAYED_WORK(&info->work, ++ amdgpu_amdkfd_restore_userptr_worker); ++ ++ *process_info = info; ++ *ef = dma_fence_get(&info->eviction_fence->base); ++ } ++ ++ vm->process_info = *process_info; ++ ++ /* Validate page directory and attach eviction fence */ ++ ret = amdgpu_bo_reserve(vm->root.base.bo, true); ++ if (ret) ++ goto reserve_pd_fail; ++ ret = vm_validate_pt_pd_bos(vm); ++ if (ret) { ++ pr_err("validate_pt_pd_bos() failed\n"); ++ goto validate_pd_fail; ++ } ++ amdgpu_bo_fence(vm->root.base.bo, ++ &vm->process_info->eviction_fence->base, true); ++ amdgpu_bo_unreserve(vm->root.base.bo); ++ ++ /* Update process info */ ++ mutex_lock(&vm->process_info->lock); ++ list_add_tail(&vm->vm_list_node, ++ &(vm->process_info->vm_list_head)); ++ vm->process_info->n_vms++; ++ mutex_unlock(&vm->process_info->lock); ++ ++ return 0; ++ ++validate_pd_fail: ++ amdgpu_bo_unreserve(vm->root.base.bo); ++reserve_pd_fail: ++ vm->process_info = NULL; ++ if (info) { ++ /* Two fence references: one in info and one in *ef */ ++ dma_fence_put(&info->eviction_fence->base); ++ dma_fence_put(*ef); ++ *ef = NULL; ++ *process_info = NULL; ++create_evict_fence_fail: ++ kfree(info); ++ } ++ return ret; ++} ++ ++int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, ++ void **process_info, ++ struct dma_fence **ef) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct amdgpu_vm *new_vm; ++ int ret; ++ ++ new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); ++ if (!new_vm) ++ return -ENOMEM; ++ ++ /* Initialize AMDGPU part of the VM */ ++ ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); ++ if (ret) { ++ pr_err("Failed init vm ret %d\n", ret); ++ goto amdgpu_vm_init_fail; ++ } ++ ++ /* Initialize KFD part of the VM and process info */ ++ ret = init_kfd_vm(new_vm, process_info, ef); ++ if (ret) ++ goto init_kfd_vm_fail; ++ ++ *vm = (void *) new_vm; ++ ++ return 0; ++ ++init_kfd_vm_fail: ++ amdgpu_vm_fini(adev, new_vm); ++amdgpu_vm_init_fail: ++ kfree(new_vm); + return ret; + } + +-#define BOOL_TO_STR(b) (b == true) ? "true" : "false" ++int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, ++ struct file *filp, ++ void **vm, void **process_info, ++ struct dma_fence **ef) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct drm_file *drm_priv = filp->private_data; ++ struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; ++ struct amdgpu_vm *avm = &drv_priv->vm; ++ int ret; ++ ++ /* Convert VM into a compute VM */ ++ ret = amdgpu_vm_make_compute(adev, avm); ++ if (ret) ++ return ret; ++ ++ /* Initialize KFD part of the VM and process info */ ++ ret = init_kfd_vm(avm, process_info, ef); ++ if (ret) ++ return ret; ++ ++ *vm = (void *)avm; ++ ++ return 0; ++} ++ ++void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, ++ struct amdgpu_vm *vm) ++{ ++ struct amdkfd_process_info *process_info = vm->process_info; ++ struct amdgpu_bo *pd = vm->root.base.bo; ++ ++ if (vm->vm_context != AMDGPU_VM_CONTEXT_COMPUTE) ++ return; ++ ++ /* Release eviction fence from PD */ ++ amdgpu_bo_reserve(pd, false); ++ amdgpu_bo_fence(pd, NULL, false); ++ amdgpu_bo_unreserve(pd); ++ ++ if (!process_info) ++ return; ++ ++ /* Update process info */ ++ mutex_lock(&process_info->lock); ++ process_info->n_vms--; ++ list_del(&vm->vm_list_node); ++ mutex_unlock(&process_info->lock); ++ ++ /* Release per-process resources when last compute VM is destroyed */ ++ if (!process_info->n_vms) { ++ WARN_ON(!list_empty(&process_info->kfd_bo_list)); ++ WARN_ON(!list_empty(&process_info->userptr_valid_list)); ++ WARN_ON(!list_empty(&process_info->userptr_inval_list)); ++ ++ dma_fence_put(&process_info->eviction_fence->base); ++ cancel_delayed_work_sync(&process_info->work); ++ put_pid(process_info->pid); ++ kfree(process_info); ++ } ++} ++ ++void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) ++{ ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; ++ ++ if (WARN_ON(!kgd || !vm)) ++ return; ++ ++ pr_debug("Destroying process vm %p\n", vm); ++ ++ /* Release the VM context */ ++ amdgpu_vm_fini(adev, avm); ++ kfree(vm); ++} ++ ++uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) ++{ ++ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; ++ ++ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; ++} + + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags) + { +- bool aql_queue, public, readonly, execute, coherent, no_sub, userptr; +- u64 alloc_flag; +- uint32_t domain; ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; ++ uint64_t user_addr = 0; + struct sg_table *sg = NULL; +- +- if (!(flags & ALLOC_MEM_FLAGS_NONPAGED)) { +- pr_debug("current hw doesn't support paged memory\n"); +- return -EINVAL; +- } +- +- domain = 0; +- alloc_flag = 0; +- +- aql_queue = (flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM) ? true : false; +- public = (flags & ALLOC_MEM_FLAGS_PUBLIC) ? true : false; +- readonly = (flags & ALLOC_MEM_FLAGS_READONLY) ? true : false; +- execute = (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS) ? true : false; +- coherent = (flags & ALLOC_MEM_FLAGS_COHERENT) ? true : false; +- no_sub = (flags & ALLOC_MEM_FLAGS_NO_SUBSTITUTE) ? true : false; +- userptr = (flags & ALLOC_MEM_FLAGS_USERPTR) ? true : false; ++ enum ttm_bo_type bo_type = ttm_bo_type_device; ++ struct amdgpu_bo *bo; ++ int byte_align; ++ u32 domain, alloc_domain; ++ u64 alloc_flags; ++ uint32_t mapping_flags; ++ int ret; + + /* + * Check on which domain to allocate BO + */ + if (flags & ALLOC_MEM_FLAGS_VRAM) { +- domain = AMDGPU_GEM_DOMAIN_VRAM; +- alloc_flag = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; +- if (public) { +- alloc_flag = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; +- } +- alloc_flag |= AMDGPU_GEM_CREATE_VRAM_CLEARED; +- } else if (flags & (ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_USERPTR)) { ++ domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; ++ alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; ++ alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? ++ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : ++ AMDGPU_GEM_CREATE_NO_CPU_ACCESS; ++ } else if (flags & ALLOC_MEM_FLAGS_GTT) { ++ domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; ++ alloc_flags = 0; ++ } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + domain = AMDGPU_GEM_DOMAIN_GTT; +- alloc_flag = 0; ++ alloc_domain = AMDGPU_GEM_DOMAIN_CPU; ++ alloc_flags = 0; ++ if (!offset || !*offset) ++ return -EINVAL; ++ user_addr = *offset; + } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + domain = AMDGPU_GEM_DOMAIN_GTT; +- alloc_flag = 0; ++ alloc_domain = AMDGPU_GEM_DOMAIN_CPU; ++ alloc_flags = 0; + if (size > UINT_MAX) + return -EINVAL; + sg = create_doorbell_sg(*offset, size); + if (!sg) + return -ENOMEM; ++ bo_type = ttm_bo_type_sg; ++ } else { ++ return -EINVAL; ++ } ++ ++ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); ++ if (!*mem) { ++ ret = -ENOMEM; ++ goto err; + } ++ INIT_LIST_HEAD(&(*mem)->bo_va_list); ++ mutex_init(&(*mem)->lock); ++ (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); + +- if (offset && !userptr) +- *offset = 0; ++ /* Workaround for AQL queue wraparound bug. Map the same ++ * memory twice. That means we only actually allocate half ++ * the memory. ++ */ ++ if ((*mem)->aql_queue) ++ size = size >> 1; + +- pr_debug("Allocate VA 0x%llx - 0x%llx domain %s aql %s\n", +- va, va + size, domain_string(domain), +- BOOL_TO_STR(aql_queue)); ++ /* Workaround for TLB bug on older VI chips */ ++ byte_align = (adev->family == AMDGPU_FAMILY_VI && ++ adev->asic_type != CHIP_FIJI && ++ adev->asic_type != CHIP_POLARIS10 && ++ adev->asic_type != CHIP_POLARIS11) ? ++ VI_BO_SIZE_ALIGN : 1; + +- pr_debug("\t alloc_flag 0x%llx public %s readonly %s execute %s coherent %s no_sub %s\n", +- alloc_flag, BOOL_TO_STR(public), +- BOOL_TO_STR(readonly), BOOL_TO_STR(execute), +- BOOL_TO_STR(coherent), BOOL_TO_STR(no_sub)); ++ mapping_flags = AMDGPU_VM_PAGE_READABLE; ++ if (!(flags & ALLOC_MEM_FLAGS_READONLY)) ++ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; ++ if (flags & ALLOC_MEM_FLAGS_EXECUTE_ACCESS) ++ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; ++ if (flags & ALLOC_MEM_FLAGS_COHERENT) ++ mapping_flags |= AMDGPU_VM_MTYPE_UC; ++ else ++ mapping_flags |= AMDGPU_VM_MTYPE_NC; ++ (*mem)->mapping_flags = mapping_flags; + +- return __alloc_memory_of_gpu(kgd, va, size, vm, mem, +- offset, domain, +- alloc_flag, sg, +- aql_queue, readonly, execute, +- coherent, no_sub, userptr); ++ amdgpu_sync_create(&(*mem)->sync); ++ ++ if (!sg) { ++ ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, ++ alloc_domain); ++ if (ret) { ++ pr_debug("Insufficient system memory\n"); ++ goto err_reserve_limit; ++ } ++ } ++ ++ pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", ++ va, size, domain_string(alloc_domain)); ++ ++ /* Allocate buffer object. Userptr objects need to start out ++ * in the CPU domain, get moved to GTT when pinned. ++ */ ++#if 0 ++ ret = amdgpu_bo_create(adev, size, byte_align, alloc_domain, ++ alloc_flags, bo_type, NULL, &bo); ++#else ++ ret = amdgpu_bo_create(adev, size, byte_align, false , alloc_domain, ++ alloc_flags, sg , NULL, &bo); ++#endif ++ if (ret) { ++ pr_debug("Failed to create BO on domain %s. ret %d\n", ++ domain_string(alloc_domain), ret); ++ goto err_bo_create; ++ } ++ if (bo_type == ttm_bo_type_sg) { ++ bo->tbo.sg = sg; ++ bo->tbo.ttm->sg = sg; ++ } ++ bo->kfd_bo = *mem; ++ (*mem)->bo = bo; ++ if (user_addr) ++ bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; ++ ++ (*mem)->va = va; ++ (*mem)->domain = domain; ++ (*mem)->mapped_to_gpu_memory = 0; ++ (*mem)->process_info = avm->process_info; ++ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); ++ ++ if (user_addr) { ++ ret = init_user_pages(*mem, current->mm, user_addr); ++ if (ret) { ++ mutex_lock(&avm->process_info->lock); ++ list_del(&(*mem)->validate_list.head); ++ mutex_unlock(&avm->process_info->lock); ++ goto allocate_init_user_pages_failed; ++ } ++ } ++ ++ if (offset) ++ *offset = amdgpu_bo_mmap_offset(bo); ++ ++ return 0; ++ ++allocate_init_user_pages_failed: ++ amdgpu_bo_unref(&bo); ++err_bo_create: ++ if (!sg) ++ unreserve_system_mem_limit(adev, size, alloc_domain); ++err_reserve_limit: ++ kfree(*mem); ++err: ++ if (sg) { ++ sg_free_table(sg); ++ kfree(sg); ++ } ++ return ret; + } + + int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( +- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) ++ struct kgd_dev *kgd, struct kgd_mem *mem) + { +- struct amdgpu_device *adev; ++ struct amdkfd_process_info *process_info = mem->process_info; ++ unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry, *tmp; + struct bo_vm_reservation_context ctx; +- int ret = 0; + struct ttm_validate_buffer *bo_list_entry; +- struct amdkfd_process_info *process_info; +- unsigned long bo_size; +- +- adev = get_amdgpu_device(kgd); +- process_info = ((struct amdkfd_vm *)vm)->process_info; +- +- bo_size = mem->bo->tbo.mem.size; ++ int ret; + + mutex_lock(&mem->lock); + + if (mem->mapped_to_gpu_memory > 0) { +- pr_debug("BO VA 0x%llx size 0x%lx is already mapped to vm %p.\n", +- mem->va, bo_size, vm); ++ pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", ++ mem->va, bo_size); + mutex_unlock(&mem->lock); + return -EBUSY; + } +@@ -1172,8 +1356,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + kvfree(mem->user_pages); + } + +- ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx); +- if (unlikely(ret != 0)) ++ ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); ++ if (unlikely(ret)) + return ret; + + /* The eviction fence should be removed by the last unmap. +@@ -1187,10 +1371,9 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + mem->va + bo_size * (1 + mem->aql_queue)); + + /* Remove from VM internal data structures */ +- list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) { ++ list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) + remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, + entry, bo_size); +- } + + ret = unreserve_bo_and_vms(&ctx, false, false); + +@@ -1215,7 +1398,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + { +- struct amdgpu_device *adev; ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + int ret; + struct amdgpu_bo *bo; + uint32_t domain; +@@ -1223,11 +1407,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct bo_vm_reservation_context ctx; + struct kfd_bo_va_list *bo_va_entry = NULL; + struct kfd_bo_va_list *bo_va_entry_aql = NULL; +- struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + unsigned long bo_size; +- bool is_invalid_userptr; ++ bool is_invalid_userptr = false; + +- adev = get_amdgpu_device(kgd); ++ bo = mem->bo; ++ if (!bo) { ++ pr_err("Invalid BO when mapping memory to GPU\n"); ++ return -EINVAL; ++ } + + /* Make sure restore is not running concurrently. Since we + * don't map invalid userptr BOs, we rely on the next restore +@@ -1239,20 +1426,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + * sure that the MMU notifier is no longer running + * concurrently and the queues are actually stopped + */ +- down_read(¤t->mm->mmap_sem); +- is_invalid_userptr = atomic_read(&mem->invalid); +- up_read(¤t->mm->mmap_sem); ++ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { ++ down_write(¤t->mm->mmap_sem); ++ is_invalid_userptr = atomic_read(&mem->invalid); ++ up_write(¤t->mm->mmap_sem); ++ } + + mutex_lock(&mem->lock); + +- bo = mem->bo; +- +- if (!bo) { +- pr_err("Invalid BO when mapping memory to GPU\n"); +- ret = -EINVAL; +- goto out; +- } +- + domain = mem->domain; + bo_size = bo->tbo.mem.size; + +@@ -1262,7 +1443,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + vm, domain_string(domain)); + + ret = reserve_bo_and_vm(mem, vm, &ctx); +- if (unlikely(ret != 0)) ++ if (unlikely(ret)) + goto out; + + /* Userptr can be marked as "not invalid", but not actually be +@@ -1273,20 +1454,20 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + is_invalid_userptr = true; + +- if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { +- ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, ++ if (check_if_add_bo_to_vm(avm, mem)) { ++ ret = add_bo_to_vm(adev, mem, avm, false, + &bo_va_entry); +- if (ret != 0) ++ if (ret) + goto add_bo_to_vm_failed; + if (mem->aql_queue) { +- ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, ++ ret = add_bo_to_vm(adev, mem, avm, + true, &bo_va_entry_aql); +- if (ret != 0) ++ if (ret) + goto add_bo_to_vm_failed_aql; + } + } else { +- ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); +- if (unlikely(ret != 0)) ++ ret = vm_validate_pt_pd_bos(avm); ++ if (unlikely(ret)) + goto add_bo_to_vm_failed; + } + +@@ -1311,7 +1492,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + + ret = map_bo_to_gpuvm(adev, entry, ctx.sync, + is_invalid_userptr); +- if (ret != 0) { ++ if (ret) { + pr_err("Failed to map radeon bo to gpuvm\n"); + goto map_bo_to_gpuvm_failed; + } +@@ -1329,15 +1510,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + } + } + +- if (mem->domain & AMDGPU_GEM_DOMAIN_DGMA) { +- ret = amdgpu_bo_pin(bo, mem->domain, NULL); +- if (ret != 0) { +- pr_err("Unable to pin DGMA BO\n"); +- goto map_bo_to_gpuvm_failed; +- } +- } else if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) ++ if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) + amdgpu_bo_fence(bo, +- &kfd_vm->process_info->eviction_fence->base, ++ &avm->process_info->eviction_fence->base, + true); + ret = unreserve_bo_and_vms(&ctx, false, false); + +@@ -1358,200 +1533,30 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + return ret; + } + +-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, +- void **process_info, +- struct dma_fence **ef) +-{ +- int ret; +- struct amdkfd_vm *new_vm; +- struct amdkfd_process_info *info; +- struct amdgpu_device *adev = get_amdgpu_device(kgd); +- +- new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); +- if (new_vm == NULL) +- return -ENOMEM; +- +- /* Initialize the VM context, allocate the page directory and zero it */ +- ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); +- if (ret != 0) { +- pr_err("Failed init vm ret %d\n", ret); +- /* Undo everything related to the new VM context */ +- goto vm_init_fail; +- } +- new_vm->adev = adev; +- +- if (!*process_info) { +- info = kzalloc(sizeof(*info), GFP_KERNEL); +- if (!info) { +- pr_err("Failed to create amdkfd_process_info"); +- ret = -ENOMEM; +- goto alloc_process_info_fail; +- } +- +- mutex_init(&info->lock); +- INIT_LIST_HEAD(&info->vm_list_head); +- INIT_LIST_HEAD(&info->kfd_bo_list); +- INIT_LIST_HEAD(&info->userptr_valid_list); +- INIT_LIST_HEAD(&info->userptr_inval_list); +- +- info->eviction_fence = +- amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), +- current->mm); +- if (info->eviction_fence == NULL) { +- pr_err("Failed to create eviction fence\n"); +- goto create_evict_fence_fail; +- } +- +- info->pid = get_task_pid(current->group_leader, +- PIDTYPE_PID); +- atomic_set(&info->evicted_bos, 0); +- INIT_DELAYED_WORK(&info->work, +- amdgpu_amdkfd_restore_userptr_worker); +- +- *process_info = info; +- *ef = dma_fence_get(&info->eviction_fence->base); +- } +- +- new_vm->process_info = *process_info; +- +- mutex_lock(&new_vm->process_info->lock); +- list_add_tail(&new_vm->vm_list_node, +- &(new_vm->process_info->vm_list_head)); +- new_vm->process_info->n_vms++; +- mutex_unlock(&new_vm->process_info->lock); +- +- *vm = (void *) new_vm; +- +- pr_debug("Created process vm %p\n", *vm); +- +- return ret; +- +-create_evict_fence_fail: +- kfree(info); +-alloc_process_info_fail: +- amdgpu_vm_fini(adev, &new_vm->base); +-vm_init_fail: +- kfree(new_vm); +- return ret; +- +-} +- +-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) +-{ +- struct amdgpu_device *adev = (struct amdgpu_device *) kgd; +- struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; +- struct amdgpu_vm *avm = &kfd_vm->base; +- struct amdgpu_bo *pd; +- struct amdkfd_process_info *process_info; +- +- if (WARN_ON(!kgd || !vm)) +- return; +- +- pr_debug("Destroying process vm %p\n", vm); +- /* Release eviction fence from PD */ +- pd = avm->root.base.bo; +- amdgpu_bo_reserve(pd, false); +- amdgpu_bo_fence(pd, NULL, false); +- amdgpu_bo_unreserve(pd); +- +- process_info = kfd_vm->process_info; +- +- mutex_lock(&process_info->lock); +- process_info->n_vms--; +- list_del(&kfd_vm->vm_list_node); +- mutex_unlock(&process_info->lock); +- +- /* Release per-process resources */ +- if (!process_info->n_vms) { +- WARN_ON(!list_empty(&process_info->kfd_bo_list)); +- WARN_ON(!list_empty(&process_info->userptr_valid_list)); +- WARN_ON(!list_empty(&process_info->userptr_inval_list)); +- +- dma_fence_put(&process_info->eviction_fence->base); +- cancel_delayed_work_sync(&process_info->work); +- put_pid(process_info->pid); +- kfree(process_info); +- } +- +- /* Release the VM context */ +- amdgpu_vm_fini(adev, avm); +- kfree(vm); +-} +- +-uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +-{ +- struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; +- +- return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; +-} +- +-int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, +- struct kfd_vm_fault_info *mem) +-{ +- struct amdgpu_device *adev; +- +- adev = (struct amdgpu_device *) kgd; +- if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { +- *mem = *adev->gmc.vm_fault_info; +- mb(); +- atomic_set(&adev->gmc.vm_fault_info_updated, 0); +- } +- return 0; +-} +- +-static bool is_mem_on_local_device(struct kgd_dev *kgd, +- struct list_head *bo_va_list, void *vm) +-{ +- struct kfd_bo_va_list *entry; +- +- list_for_each_entry(entry, bo_va_list, bo_list) { +- if (entry->kgd_dev == kgd && entry->bo_va->base.vm == vm) +- return true; +- } +- +- return false; +-} +- + int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + { ++ struct amdgpu_device *adev = get_amdgpu_device(kgd); ++ struct amdkfd_process_info *process_info = ++ ((struct amdgpu_vm *)vm)->process_info; ++ unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry; +- struct amdgpu_device *adev; +- unsigned int mapped_before; +- int ret = 0; + struct bo_vm_reservation_context ctx; +- struct amdkfd_process_info *process_info; +- unsigned long bo_size; +- +- adev = (struct amdgpu_device *) kgd; +- process_info = ((struct amdkfd_vm *)vm)->process_info; +- +- bo_size = mem->bo->tbo.mem.size; ++ int ret; + + mutex_lock(&mem->lock); + +- /* +- * Make sure that this BO mapped on KGD before unmappping it +- */ +- if (!is_mem_on_local_device(kgd, &mem->bo_va_list, vm)) { +- ret = -EINVAL; ++ ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); ++ if (unlikely(ret)) + goto out; +- } +- +- if (mem->mapped_to_gpu_memory == 0) { +- pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n", +- mem->va, bo_size, vm); ++ /* If no VMs were reserved, it means the BO wasn't actually mapped */ ++ if (ctx.n_vms == 0) { + ret = -EINVAL; +- goto out; ++ goto unreserve_out; + } +- mapped_before = mem->mapped_to_gpu_memory; + +- ret = reserve_bo_and_cond_vms(mem, vm, VA_MAPPED, &ctx); +- if (unlikely(ret != 0)) +- goto out; +- +- ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); +- if (unlikely(ret != 0)) ++ ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); ++ if (unlikely(ret)) + goto unreserve_out; + + pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", +@@ -1584,20 +1589,11 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + /* If BO is unmapped from all VMs, unfence it. It can be evicted if + * required. + */ +- if (mem->mapped_to_gpu_memory == 0) { +- if (mem->domain & AMDGPU_GEM_DOMAIN_DGMA) +- amdgpu_bo_unpin(mem->bo); +- else if (!amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) +- amdgpu_amdkfd_remove_eviction_fence(mem->bo, ++ if (mem->mapped_to_gpu_memory == 0 && ++ !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) ++ amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, +- NULL, NULL); +- } +- +- if (mapped_before == mem->mapped_to_gpu_memory) { +- pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n", +- mem->va, bo_size, vm); +- ret = -EINVAL; +- } ++ NULL, NULL); + + unreserve_out: + unreserve_bo_and_vms(&ctx, false, false); +@@ -1606,8 +1602,28 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + return ret; + } + ++int amdgpu_amdkfd_gpuvm_sync_memory( ++ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) ++{ ++ struct amdgpu_sync sync; ++ int ret; ++ struct amdgpu_device *adev; ++ ++ adev = get_amdgpu_device(kgd); ++ ++ amdgpu_sync_create(&sync); ++ ++ mutex_lock(&mem->lock); ++ amdgpu_sync_clone(adev , &mem->sync, &sync); ++ mutex_unlock(&mem->lock); ++ ++ ret = amdgpu_sync_wait(&sync, intr); ++ amdgpu_sync_free(&sync); ++ return ret; ++} ++ + int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, +- struct kgd_mem *mem, void **kptr) ++ struct kgd_mem *mem, void **kptr, uint64_t *size) + { + int ret; + struct amdgpu_bo *bo = mem->bo; +@@ -1644,9 +1660,10 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + bo, mem->process_info->eviction_fence, NULL, NULL); + list_del_init(&mem->validate_list.head); + +- amdgpu_bo_unreserve(bo); ++ if (size) ++ *size = amdgpu_bo_size(bo); + +- mem->kptr = *kptr; ++ amdgpu_bo_unreserve(bo); + + mutex_unlock(&mem->process_info->lock); + return 0; +@@ -1661,13 +1678,27 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + return ret; + } + ++int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, ++ struct kfd_vm_fault_info *mem) ++{ ++ struct amdgpu_device *adev; ++ ++ adev = (struct amdgpu_device *) kgd; ++ if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { ++ *mem = *adev->gmc.vm_fault_info; ++ mb(); ++ atomic_set(&adev->gmc.vm_fault_info_updated, 0); ++ } ++ return 0; ++} ++ + static int pin_bo_wo_map(struct kgd_mem *mem) + { + struct amdgpu_bo *bo = mem->bo; + int ret = 0; + + ret = amdgpu_bo_reserve(bo, false); +- if (unlikely(ret != 0)) ++ if (unlikely(ret)) + return ret; + + ret = amdgpu_bo_pin(bo, mem->domain, NULL); +@@ -1682,7 +1713,7 @@ static void unpin_bo_wo_map(struct kgd_mem *mem) + int ret = 0; + + ret = amdgpu_bo_reserve(bo, false); +- if (unlikely(ret != 0)) ++ if (unlikely(ret)) + return; + + amdgpu_bo_unpin(bo); +@@ -1727,7 +1758,8 @@ static int get_sg_table(struct amdgpu_device *adev, + goto out; + + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) { +- bus_addr = bo->tbo.offset + adev->gmc.aper_base + offset; ++ bus_addr = amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start ++ + adev->gmc.aper_base + offset; + + for_each_sg(sg->sgl, s, sg->orig_nents, i) { + uint64_t chunk_size, length; +@@ -1782,7 +1814,7 @@ int amdgpu_amdkfd_gpuvm_pin_get_sg_table(struct kgd_dev *kgd, + struct amdgpu_device *adev; + + ret = pin_bo_wo_map(mem); +- if (unlikely(ret != 0)) ++ if (unlikely(ret)) + return ret; + + adev = get_amdgpu_device(kgd); +@@ -1812,7 +1844,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; +- struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; ++ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + + if (dma_buf->ops != &drm_gem_prime_dmabuf_ops) + /* Can't handle non-graphics buffers */ +@@ -1825,13 +1857,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | +- AMDGPU_GEM_DOMAIN_GTT | +- AMDGPU_GEM_DOMAIN_DGMA))) ++ AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); +- if (*mem == NULL) ++ if (!*mem) + return -ENOMEM; + + if (size) +@@ -1848,15 +1879,11 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + + (*mem)->bo = amdgpu_bo_ref(bo); + (*mem)->va = va; +- if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) +- (*mem)->domain = AMDGPU_GEM_DOMAIN_VRAM; +- else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) +- (*mem)->domain = AMDGPU_GEM_DOMAIN_GTT; +- else +- (*mem)->domain = AMDGPU_GEM_DOMAIN_DGMA; ++ (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? ++ AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; +- (*mem)->process_info = kfd_vm->process_info; +- add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, false); ++ (*mem)->process_info = avm->process_info; ++ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + return 0; +@@ -1886,37 +1913,6 @@ int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm, + return 0; + } + +-static int process_validate_vms(struct amdkfd_process_info *process_info) +-{ +- struct amdkfd_vm *peer_vm; +- int ret; +- +- list_for_each_entry(peer_vm, &process_info->vm_list_head, +- vm_list_node) { +- ret = vm_validate_pt_pd_bos(peer_vm); +- if (ret) +- return ret; +- } +- +- return 0; +-} +- +-static int process_update_pds(struct amdkfd_process_info *process_info, +- struct amdgpu_sync *sync) +-{ +- struct amdkfd_vm *peer_vm; +- int ret; +- +- list_for_each_entry(peer_vm, &process_info->vm_list_head, +- vm_list_node) { +- ret = vm_update_pds(&peer_vm->base, sync); +- if (ret) +- return ret; +- } +- +- return 0; +-} +- + /* Evict a userptr BO by stopping the queues if necessary + * + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it +@@ -1940,7 +1936,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, + if (evicted_bos == 1) { + /* First eviction, stop the queues */ + r = kgd2kfd->quiesce_mm(NULL, mm); +- if (r != 0) ++ if (r) + pr_err("Failed to quiesce KFD\n"); + schedule_delayed_work(&process_info->work, 1); + } +@@ -1959,6 +1955,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + { + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; ++ struct ttm_operation_ctx ctx = { false, false }; + int invalid, ret; + + /* Move all invalidated BOs to the userptr_inval_list and +@@ -2005,8 +2002,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, +- sizeof(struct page *), +- GFP_KERNEL | __GFP_ZERO); ++ sizeof(struct page *), ++ GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); +@@ -2037,6 +2034,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; + } ++ + return 0; + } + +@@ -2053,9 +2051,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) + struct ww_acquire_ctx ticket; + struct amdgpu_sync sync; + +- struct amdkfd_vm *peer_vm; ++ struct amdgpu_vm *peer_vm; + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; ++ struct ttm_operation_ctx ctx = { false, false }; + int i, ret; + + pd_bo_list_entries = kcalloc(process_info->n_vms, +@@ -2073,7 +2072,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) + i = 0; + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) +- amdgpu_vm_get_pd_bo(&peer_vm->base, &resv_list, ++ amdgpu_vm_get_pd_bo(peer_vm, &resv_list, + &pd_bo_list_entries[i++]); + /* Add the userptr_inval_list entries to resv_list */ + list_for_each_entry(mem, &process_info->userptr_inval_list, +@@ -2097,7 +2096,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) + */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) +- amdgpu_amdkfd_remove_eviction_fence(peer_vm->base.root.base.bo, ++ amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, + process_info->eviction_fence, + NULL, NULL); + +@@ -2163,7 +2162,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) + unreserve_out: + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) +- amdgpu_bo_fence(peer_vm->base.root.base.bo, ++ amdgpu_bo_fence(peer_vm->root.base.bo, + &process_info->eviction_fence->base, true); + ttm_eu_backoff_reservation(&ticket, &resv_list); + amdgpu_sync_wait(&sync, false); +@@ -2266,7 +2265,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) + { + struct amdgpu_bo_list_entry *pd_bo_list; + struct amdkfd_process_info *process_info = info; +- struct amdkfd_vm *peer_vm; ++ struct amdgpu_vm *peer_vm; + struct kgd_mem *mem; + struct bo_vm_reservation_context ctx; + struct amdgpu_amdkfd_fence *new_fence; +@@ -2281,15 +2280,14 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) + pd_bo_list = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); +- if (pd_bo_list == NULL) ++ if (!pd_bo_list) + return -ENOMEM; + + i = 0; + mutex_lock(&process_info->lock); + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) +- amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, +- &pd_bo_list[i++]); ++ amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); + + /* Reserve all BOs and page tables/directory. Add all BOs from + * kfd_bo_list to ctx.list +@@ -2310,20 +2308,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) + } + + amdgpu_sync_create(&sync_obj); +- ctx.sync = &sync_obj; + + /* Validate PDs and PTs */ + ret = process_validate_vms(process_info); + if (ret) + goto validate_map_fail; + +- /* Wait for PD/PTs validate to finish */ +- /* FIXME: I think this isn't needed */ +- list_for_each_entry(peer_vm, &process_info->vm_list_head, +- vm_list_node) { +- struct amdgpu_bo *bo = peer_vm->base.root.base.bo; +- +- ttm_bo_wait(&bo->tbo, false, false); ++ ret = process_sync_pds_resv(process_info, &sync_obj); ++ if (ret) { ++ pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); ++ goto validate_map_fail; + } + + /* Validate BOs and map them to GPUVM (update VM page tables). */ +@@ -2339,13 +2333,17 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) + pr_debug("Memory eviction: Validate BOs failed. Try again\n"); + goto validate_map_fail; + } +- ++ ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); ++ if (ret) { ++ pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); ++ goto validate_map_fail; ++ } + list_for_each_entry(bo_va_entry, &mem->bo_va_list, + bo_list) { + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, +- ctx.sync); ++ &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PTE failed. Try again\n"); + goto validate_map_fail; +@@ -2354,13 +2352,14 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) + } + + /* Update page directories */ +- ret = process_update_pds(process_info, ctx.sync); ++ ret = process_update_pds(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PDs failed. Try again\n"); + goto validate_map_fail; + } + +- amdgpu_sync_wait(ctx.sync, false); ++ /* Wait for validate and PT updates to finish */ ++ amdgpu_sync_wait(&sync_obj, false); + + /* Release old eviction fence and create new one, because fence only + * goes from unsignaled to signaled, fence cannot be reused. +@@ -2378,10 +2377,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) + process_info->eviction_fence = new_fence; + *ef = dma_fence_get(&new_fence->base); + +- /* Wait for validate to finish and attach new eviction fence */ +- list_for_each_entry(mem, &process_info->kfd_bo_list, +- validate_list.head) +- ttm_bo_wait(&mem->bo->tbo, false, false); ++ /* Attach new eviction fence to all BOs */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + amdgpu_bo_fence(mem->bo, +@@ -2390,7 +2386,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) + /* Attach eviction fence to PD / PT BOs */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { +- struct amdgpu_bo *bo = peer_vm->base.root.base.bo; ++ struct amdgpu_bo *bo = peer_vm->root.base.bo; + + amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 6414b50..7ac07a3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -132,6 +132,7 @@ int amdgpu_job_hang_limit = 0; + int amdgpu_lbpw = -1; + int amdgpu_compute_multipipe = -1; + int amdgpu_gpu_recovery = -1; /* auto */ ++int amdgpu_emu_mode = 0; + + MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); + module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); +@@ -290,6 +291,9 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); + MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); + module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); + ++MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); ++module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); ++ + #ifdef CONFIG_DRM_AMDGPU_SI + + int amdgpu_si_support = 1; +@@ -569,7 +573,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, + { + struct drm_device *dev; + unsigned long flags = ent->driver_data; +- int ret; ++ int ret, retry = 0; + bool supports_atomic = false; + + if (!amdgpu_virtual_display && +@@ -614,8 +618,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, + + pci_set_drvdata(pdev, dev); + ++retry_init: + ret = drm_dev_register(dev, ent->driver_data); +- if (ret) ++ if (ret == -EAGAIN && ++retry <= 3) { ++ DRM_INFO("retry init %d\n", retry); ++ /* Don't request EX mode too frequently which is attacking */ ++ msleep(5000); ++ goto retry_init; ++ } else if (ret) + goto err_pci; + + return 0; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index 00477a8..ef9a24d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -32,6 +32,7 @@ + #include <drm/amdgpu_drm.h> + #include "amdgpu.h" + #include "amdgpu_trace.h" ++#include "amdgpu_amdkfd.h" + + /* + * GPUVM +@@ -2335,6 +2336,22 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, + adev->vm_manager.fragment_size); + } + ++static void amdgpu_inc_compute_vms(struct amdgpu_device *adev) ++{ ++ /* Temporary use only the first VM manager */ ++ unsigned int vmhub = 0; /*ring->funcs->vmhub;*/ ++ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; ++ ++ mutex_lock(&id_mgr->lock); ++ if ((adev->vm_manager.n_compute_vms++ == 0) && ++ (!amdgpu_sriov_vf(adev))) { ++ /* First Compute VM: enable compute power profile */ ++ if (adev->powerplay.pp_funcs->switch_power_profile) ++ amdgpu_dpm_switch_power_profile(adev,PP_SMC_POWER_PROFILE_COMPUTE); ++ } ++ mutex_unlock(&id_mgr->lock); ++} ++ + /** + * amdgpu_vm_init - initialize a vm instance + * +@@ -2439,21 +2456,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + vm->fault_credit = 16; + + vm->vm_context = vm_context; +- if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { +- struct amdgpu_vmid_mgr *id_mgr = +- &adev->vm_manager.id_mgr[AMDGPU_GFXHUB]; +- +- mutex_lock(&id_mgr->lock); +- +- if ((adev->vm_manager.n_compute_vms++ == 0) && +- (!amdgpu_sriov_vf(adev))) { +- /* First Compute VM: enable compute power profile */ +- if (adev->powerplay.pp_funcs->switch_power_profile) +- amdgpu_dpm_switch_power_profile(adev, +- AMD_PP_COMPUTE_PROFILE); +- } +- mutex_unlock(&id_mgr->lock); +- } ++ if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) ++ amdgpu_inc_compute_vms(adev); + + return 0; + +@@ -2472,6 +2476,86 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + } + + /** ++ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM ++ * ++ * This only works on GFX VMs that don't have any BOs added and no ++ * page tables allocated yet. ++ * ++ * Changes the following VM parameters: ++ * - vm_context ++ * - use_cpu_for_update ++ * - pte_supports_ats ++ * - pasid (old PASID is released, because compute manages its own PASIDs) ++ * ++ * Reinitializes the page directory to reflect the changed ATS ++ * setting. May also switch to the compute power profile if this is ++ * the first compute VM. May leave behind an unused shadow BO for the ++ * page directory when switching from SDMA updates to CPU updates. ++ * ++ * Returns 0 for success, -errno for errors. ++ */ ++int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) ++{ ++ bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); ++ int r; ++ ++ r = amdgpu_bo_reserve(vm->root.base.bo, true); ++ if (r) ++ return r; ++ ++ /* Sanity checks */ ++ if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { ++ /* Can happen if ioctl is interrupted by a signal after ++ * this function already completed. Just return success. ++ */ ++ r = 0; ++ goto error; ++ } ++ if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { ++ r = -EINVAL; ++ goto error; ++ } ++ ++ /* Check if PD needs to be reinitialized and do it before ++ * changing any other state, in case it fails. ++ */ ++ if (pte_support_ats != vm->pte_support_ats) { ++ r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, ++ adev->vm_manager.root_level, ++ pte_support_ats); ++ if (r) ++ goto error; ++ } ++ ++ /* Update VM state */ ++ vm->vm_context = AMDGPU_VM_CONTEXT_COMPUTE; ++ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & ++ AMDGPU_VM_USE_CPU_FOR_COMPUTE); ++ vm->pte_support_ats = pte_support_ats; ++ DRM_DEBUG_DRIVER("VM update mode is %s\n", ++ vm->use_cpu_for_update ? "CPU" : "SDMA"); ++ WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), ++ "CPU update of VM recommended only for large BAR system\n"); ++ ++ if (vm->pasid) { ++ unsigned long flags; ++ ++ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); ++ idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); ++ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); ++ ++ vm->pasid = 0; ++ } ++ ++ /* Count the new compute VM */ ++ amdgpu_inc_compute_vms(adev); ++ ++error: ++ amdgpu_bo_unreserve(vm->root.base.bo); ++ return r; ++} ++ ++/** + * amdgpu_vm_free_levels - free PD/PT levels + * + * @adev: amdgpu device structure +@@ -2532,8 +2616,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) + + if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { + struct amdgpu_vmid_mgr *id_mgr = +- &adev->vm_manager.id_mgr[AMDGPU_GFXHUB]; +- ++ &adev->vm_manager.id_mgr[AMDGPU_GFXHUB]; + mutex_lock(&id_mgr->lock); + + WARN(adev->vm_manager.n_compute_vms == 0, "Unbalanced number of Compute VMs"); +@@ -2654,9 +2737,9 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) + adev->vm_manager.vm_update_mode = 0; + #endif + +- adev->vm_manager.n_compute_vms = 0; + idr_init(&adev->vm_manager.pasid_idr); + spin_lock_init(&adev->vm_manager.pasid_lock); ++ adev->vm_manager.n_compute_vms = 0; + } + + /** +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +index beee443..beba1a5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +@@ -199,9 +199,6 @@ struct amdgpu_vm { + /* dedicated to vm */ + struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; + +- /* Whether this is a Compute or GFX Context */ +- int vm_context; +- + /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ + bool use_cpu_for_update; + +@@ -213,6 +210,18 @@ struct amdgpu_vm { + + /* Limit non-retry fault storms */ + unsigned int fault_credit; ++ ++ /* Whether this is a Compute or GFX Context */ ++ int vm_context; ++ ++ /* Points to the KFD process VM info */ ++ struct amdkfd_process_info *process_info; ++ ++ /* List node in amdkfd_process_info.vm_list_head */ ++ struct list_head vm_list_node; ++ ++ /* Valid while the PD is reserved or fenced */ ++ uint64_t pd_phys_addr; + }; + + struct amdgpu_vm_manager { +@@ -245,20 +254,22 @@ struct amdgpu_vm_manager { + * BIT1[= 0] Compute updated by SDMA [= 1] by CPU + */ + int vm_update_mode; +- /* Number of Compute VMs, used for detecting Compute activity */ +- unsigned n_compute_vms; + + /* PASID to VM mapping, will be used in interrupt context to + * look up VM of a page fault + */ + struct idr pasid_idr; + spinlock_t pasid_lock; ++ ++ /* Number of Compute VMs, used for detecting Compute activity */ ++ unsigned n_compute_vms; + }; + + void amdgpu_vm_manager_init(struct amdgpu_device *adev); + void amdgpu_vm_manager_fini(struct amdgpu_device *adev); + int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context, unsigned int pasid); ++int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); + void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); + bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, + unsigned int pasid); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +old mode 100644 +new mode 100755 +index 47dfce9..52f456e +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -366,14 +366,14 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, + * 32 and 64-bit requests are possible and must be + * supported. + */ +- if (pci_enable_atomic_ops_to_root(pdev, +- PCI_EXP_DEVCAP2_ATOMIC_COMP32 | +- PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { +- dev_info(kfd_device, +- "skipped device %x:%x, PCI rejects atomics", +- pdev->vendor, pdev->device); +- return NULL; +- } ++ ++ if (pci_enable_atomic_ops_to_root(pdev) < 0) { ++ dev_info(kfd_device, ++ "skipped device %x:%x, PCI rejects atomics", ++ pdev->vendor, pdev->device); ++ return NULL; ++ } ++ + } + + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); +diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +index 4dcc7d0..e164abb 100644 +--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h ++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +@@ -29,8 +29,11 @@ + #define KGD_KFD_INTERFACE_H_INCLUDED + + #include <linux/types.h> +-#include <linux/bitmap.h> ++#include <linux/mm_types.h> ++#include <linux/scatterlist.h> ++#include <linux/dma-fence.h> + #include <linux/dma-buf.h> ++#include <linux/bitmap.h> + + struct pci_dev; + +@@ -197,8 +200,6 @@ struct tile_config { + * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp + * scheduling mode. Only used for no cp scheduling mode. + * +- * @init_pipeline: Initialized the compute pipelines. +- * + * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp + * sceduling mode. + * +@@ -226,9 +227,6 @@ struct tile_config { + * + * @get_fw_version: Returns FW versions from the header + * +- * @set_num_of_requests: Sets number of Peripheral Page Request (PPR) sent to +- * IOMMU when address translation failed +- * + * @get_cu_info: Retrieves activated cu info + * + * @get_dmabuf_info: Returns information about a dmabuf if it was +@@ -263,13 +261,15 @@ struct kfd2kgd_calls { + + void(*get_local_mem_info)(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); +- uint64_t (*get_vmem_size)(struct kgd_dev *kgd); + uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); + + uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); + + int (*create_process_vm)(struct kgd_dev *kgd, void **vm, + void **process_info, struct dma_fence **ef); ++ int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp, ++ void **vm, void **process_info, ++ struct dma_fence **ef); + void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); + + int (*create_process_gpumem)(struct kgd_dev *kgd, uint64_t va, size_t size, void *vm, struct kgd_mem **mem); +@@ -277,8 +277,6 @@ struct kfd2kgd_calls { + + uint32_t (*get_process_page_dir)(void *vm); + +- int (*open_graphic_handle)(struct kgd_dev *kgd, uint64_t va, void *vm, int fd, uint32_t handle, struct kgd_mem **mem); +- + int (*alloc_pasid)(unsigned int bits); + void (*free_pasid)(unsigned int pasid); + +@@ -290,9 +288,6 @@ struct kfd2kgd_calls { + int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); + +- int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, +- uint32_t hpd_size, uint64_t hpd_gpu_addr); +- + int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); + + +@@ -342,8 +337,6 @@ struct kfd2kgd_calls { + uint16_t (*get_atc_vmid_pasid_mapping_pasid)( + struct kgd_dev *kgd, + uint8_t vmid); +- void (*write_vmid_invalidate_request)(struct kgd_dev *kgd, +- uint8_t vmid); + uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); + + int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); +@@ -355,8 +348,7 @@ struct kfd2kgd_calls { + uint64_t size, void *vm, + struct kgd_mem **mem, uint64_t *offset, + uint32_t flags); +- int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, +- void *vm); ++ int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem); + int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm); + int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, +@@ -365,8 +357,6 @@ struct kfd2kgd_calls { + uint16_t (*get_fw_version)(struct kgd_dev *kgd, + enum kgd_engine_type type); + +- void (*set_num_of_requests)(struct kgd_dev *kgd, +- uint8_t num_of_requests); + int (*alloc_memory_of_scratch)(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); + int (*write_config_static_mem)(struct kgd_dev *kgd, bool swizzle_enable, +@@ -374,7 +364,7 @@ struct kfd2kgd_calls { + void (*get_cu_info)(struct kgd_dev *kgd, + struct kfd_cu_info *cu_info); + int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, +- struct kgd_mem *mem, void **kptr); ++ struct kgd_mem *mem, void **kptr, uint64_t *size); + void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); + +diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +index 44de087..416abeb 100644 +--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c ++++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c +@@ -166,10 +166,10 @@ void cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate) + cz_dpm_powerup_uvd(hwmgr); + cgs_set_clockgating_state(hwmgr->device, + AMD_IP_BLOCK_TYPE_UVD, +- AMD_PG_STATE_UNGATE); ++ AMD_CG_STATE_UNGATE); + cgs_set_powergating_state(hwmgr->device, + AMD_IP_BLOCK_TYPE_UVD, +- AMD_CG_STATE_UNGATE); ++ AMD_PG_STATE_UNGATE); + cz_dpm_update_uvd_dpm(hwmgr, false); + } + +@@ -197,11 +197,11 @@ void cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) + cgs_set_clockgating_state( + hwmgr->device, + AMD_IP_BLOCK_TYPE_VCE, +- AMD_PG_STATE_UNGATE); ++ AMD_CG_STATE_UNGATE); + cgs_set_powergating_state( + hwmgr->device, + AMD_IP_BLOCK_TYPE_VCE, +- AMD_CG_STATE_UNGATE); ++ AMD_PG_STATE_UNGATE); + cz_dpm_update_vce_dpm(hwmgr); + cz_enable_disable_vce_dpm(hwmgr, true); + } +diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile +old mode 100644 +new mode 100755 +index 0ad8244..cd3a725 +--- a/drivers/gpu/drm/radeon/Makefile ++++ b/drivers/gpu/drm/radeon/Makefile +@@ -103,8 +103,7 @@ radeon-y += \ + radeon-y += \ + radeon_vce.o \ + vce_v1_0.o \ +- vce_v2_0.o \ +- radeon_kfd.o ++ vce_v2_0.o + + radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o + radeon-$(CONFIG_ACPI) += radeon_acpi.o +diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h +index 26e0abc..ec0574e 100644 +--- a/include/uapi/linux/kfd_ioctl.h ++++ b/include/uapi/linux/kfd_ioctl.h +@@ -208,7 +208,7 @@ struct kfd_ioctl_dbg_wave_control_args { + #define KFD_IOC_WAIT_RESULT_TIMEOUT 1 + #define KFD_IOC_WAIT_RESULT_FAIL 2 + +-#define KFD_SIGNAL_EVENT_LIMIT (4096 + 512) ++#define KFD_SIGNAL_EVENT_LIMIT 4096 + + struct kfd_ioctl_create_event_args { + uint64_t event_page_offset; /* from KFD */ +@@ -278,6 +278,11 @@ struct kfd_ioctl_alloc_memory_of_scratch_args { + uint32_t pad; + }; + ++struct kfd_ioctl_acquire_vm_args { ++ uint32_t drm_fd; /* to KFD */ ++ uint32_t gpu_id; /* to KFD */ ++}; ++ + /* Allocation flags: memory types */ + #define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) + #define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) +@@ -361,22 +366,22 @@ struct kfd_ioctl_ipc_import_handle_args { + + struct kfd_ioctl_get_tile_config_args { + /* to KFD: pointer to tile array */ +- uint64_t tile_config_ptr; ++ __u64 tile_config_ptr; + /* to KFD: pointer to macro tile array */ +- uint64_t macro_tile_config_ptr; ++ __u64 macro_tile_config_ptr; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ +- uint32_t num_tile_configs; ++ __u32 num_tile_configs; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ +- uint32_t num_macro_tile_configs; ++ __u32 num_macro_tile_configs; + +- uint32_t gpu_id; /* to KFD */ +- uint32_t gb_addr_config; /* from KFD */ +- uint32_t num_banks; /* from KFD */ +- uint32_t num_ranks; /* from KFD */ ++ __u32 gpu_id; /* to KFD */ ++ __u32 gb_addr_config; /* from KFD */ ++ __u32 num_banks; /* from KFD */ ++ __u32 num_ranks; /* from KFD */ + /* struct size can be extended later if needed + * without breaking ABI compatibility + */ +@@ -517,7 +522,10 @@ struct kfd_ioctl_cross_memory_copy_args { + #define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ + AMDKFD_IOWR(0x20, struct kfd_ioctl_get_queue_wave_state_args) + ++#define AMDKFD_IOC_ACQUIRE_VM \ ++ AMDKFD_IOW(0x21, struct kfd_ioctl_acquire_vm_args) ++ + #define AMDKFD_COMMAND_START 0x01 +-#define AMDKFD_COMMAND_END 0x21 ++#define AMDKFD_COMMAND_END 0x22 + + #endif +-- +2.7.4 + |