aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch1270
1 files changed, 0 insertions, 1270 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch
deleted file mode 100644
index 7fc9f02c..00000000
--- a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch
+++ /dev/null
@@ -1,1270 +0,0 @@
-From 8c8e41b130f06c684c2455d26ff4523264fdfdef Mon Sep 17 00:00:00 2001
-From: Felix Kuehling <Felix.Kuehling@amd.com>
-Date: Wed, 5 Oct 2016 16:25:45 -0400
-Subject: [PATCH 1586/4131] drm/amdgpu: Automatic power profile switching
-
-Switch between compute and graphic profiles automatically when KFD
-compute work starts and stops. It uses the number of KFD VMs as a
-criteria for the existence of KFD compute work.
-
-Change-Id: I11d34f45d901f4dd1e16e4a64c1ad1010088d9b8
-Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
-
- Conflicts:
- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 625 +++++++++++------------
- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +
- 3 files changed, 311 insertions(+), 321 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-index 3ec1ff1..155de54 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-@@ -1327,7 +1327,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
- return -ENOMEM;
-
- /* Initialize the VM context, allocate the page directory and zero it */
-- ret = amdgpu_vm_init(adev, &new_vm->base);
-+ ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE);
- if (ret != 0) {
- pr_err("Failed init vm ret %d\n", ret);
- /* Undo everything related to the new VM context */
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
-index bdf2d6c..c300397 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
-@@ -25,8 +25,14 @@
- * Alex Deucher
- * Jerome Glisse
- */
-+#if defined(BUILD_AS_DKMS)
-+#include <kcl/kcl_fence_array.h>
-+#else
- #include <linux/dma-fence-array.h>
-+#endif
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
- #include <linux/interval_tree_generic.h>
-+#endif
- #include <drm/drmP.h>
- #include <drm/amdgpu_drm.h>
- #include "amdgpu.h"
-@@ -140,7 +146,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
- struct list_head *validated,
- struct amdgpu_bo_list_entry *entry)
- {
-- entry->robj = vm->root.base.bo;
-+ entry->robj = vm->root.bo;
- entry->priority = 0;
- entry->tv.bo = &entry->robj->tbo;
- entry->tv.shared = true;
-@@ -149,6 +155,61 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
- }
-
- /**
-+ * amdgpu_vm_validate_layer - validate a single page table level
-+ *
-+ * @parent: parent page table level
-+ * @validate: callback to do the validation
-+ * @param: parameter for the validation callback
-+ *
-+ * Validate the page table BOs on command submission if neccessary.
-+ */
-+static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
-+ int (*validate)(void *, struct amdgpu_bo *),
-+ void *param, bool use_cpu_for_update,
-+ struct ttm_bo_global *glob)
-+{
-+ unsigned i;
-+ int r;
-+
-+ if (use_cpu_for_update) {
-+ r = amdgpu_bo_kmap(parent->bo, NULL);
-+ if (r)
-+ return r;
-+ }
-+
-+ if (!parent->entries)
-+ return 0;
-+
-+ for (i = 0; i <= parent->last_entry_used; ++i) {
-+ struct amdgpu_vm_pt *entry = &parent->entries[i];
-+
-+ if (!entry->bo)
-+ continue;
-+
-+ r = validate(param, entry->bo);
-+ if (r)
-+ return r;
-+
-+ spin_lock(&glob->lru_lock);
-+ ttm_bo_move_to_lru_tail(&entry->bo->tbo);
-+ if (entry->bo->shadow)
-+ ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
-+ spin_unlock(&glob->lru_lock);
-+
-+ /*
-+ * Recurse into the sub directory. This is harmless because we
-+ * have only a maximum of 5 layers.
-+ */
-+ r = amdgpu_vm_validate_level(entry, validate, param,
-+ use_cpu_for_update, glob);
-+ if (r)
-+ return r;
-+ }
-+
-+ return r;
-+}
-+
-+/**
- * amdgpu_vm_validate_pt_bos - validate the page table BOs
- *
- * @adev: amdgpu device pointer
-@@ -162,47 +223,32 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int (*validate)(void *p, struct amdgpu_bo *bo),
- void *param)
- {
-- struct ttm_bo_global *glob = adev->mman.bdev.glob;
-- int r;
--
-- spin_lock(&vm->status_lock);
-- while (!list_empty(&vm->evicted)) {
-- struct amdgpu_vm_bo_base *bo_base;
-- struct amdgpu_bo *bo;
--
-- bo_base = list_first_entry(&vm->evicted,
-- struct amdgpu_vm_bo_base,
-- vm_status);
-- spin_unlock(&vm->status_lock);
-+ uint64_t num_evictions;
-
-- bo = bo_base->bo;
-- BUG_ON(!bo);
-- if (bo->parent) {
-- r = validate(param, bo);
-- if (r)
-- return r;
-+ /* We only need to validate the page tables
-+ * if they aren't already valid.
-+ */
-+ num_evictions = atomic64_read(&adev->num_evictions);
-+ if (num_evictions == vm->last_eviction_counter)
-+ return 0;
-
-- spin_lock(&glob->lru_lock);
-- ttm_bo_move_to_lru_tail(&bo->tbo);
-- if (bo->shadow)
-- ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
-- spin_unlock(&glob->lru_lock);
-- }
-+ return amdgpu_vm_validate_level(&vm->root, validate, param,
-+ vm->use_cpu_for_update,
-+ adev->mman.bdev.glob);
-+}
-
-- if (bo->tbo.type == ttm_bo_type_kernel &&
-- vm->use_cpu_for_update) {
-- r = amdgpu_bo_kmap(bo, NULL);
-- if (r)
-- return r;
-- }
-+/**
-+ * amdgpu_vm_check - helper for amdgpu_vm_ready
-+ */
-+static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
-+{
-+ /* if anything is swapped out don't swap it in here,
-+ just abort and wait for the next CS */
-+ if (!amdgpu_bo_gpu_accessible(bo))
-+ return -ERESTARTSYS;
-
-- spin_lock(&vm->status_lock);
-- if (bo->tbo.type != ttm_bo_type_kernel)
-- list_move(&bo_base->vm_status, &vm->moved);
-- else
-- list_move(&bo_base->vm_status, &vm->relocated);
-- }
-- spin_unlock(&vm->status_lock);
-+ if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-+ return -ERESTARTSYS;
-
- return 0;
- }
-@@ -210,19 +256,17 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- /**
- * amdgpu_vm_ready - check VM is ready for updates
- *
-+ * @adev: amdgpu device
- * @vm: VM to check
- *
- * Check if all VM PDs/PTs are ready for updates
- */
--bool amdgpu_vm_ready(struct amdgpu_vm *vm)
-+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
- {
-- bool ready;
--
-- spin_lock(&vm->status_lock);
-- ready = list_empty(&vm->evicted);
-- spin_unlock(&vm->status_lock);
-+ if (amdgpu_vm_check(NULL, vm->root.bo))
-+ return false;
-
-- return ready;
-+ return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
- }
-
- /**
-@@ -251,9 +295,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
- if (!parent->entries) {
- unsigned num_entries = amdgpu_vm_num_entries(adev, level);
-
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-+ parent->entries = drm_calloc_large(num_entries,
-+ sizeof(struct amdgpu_vm_pt));
-+#else
- parent->entries = kvmalloc_array(num_entries,
- sizeof(struct amdgpu_vm_pt),
- GFP_KERNEL | __GFP_ZERO);
-+#endif
- if (!parent->entries)
- return -ENOMEM;
- memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
-@@ -288,11 +337,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
-
- /* walk over the address space and allocate the page tables */
- for (pt_idx = from; pt_idx <= to; ++pt_idx) {
-- struct reservation_object *resv = vm->root.base.bo->tbo.resv;
-+ struct reservation_object *resv = vm->root.bo->tbo.resv;
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
- struct amdgpu_bo *pt;
-
-- if (!entry->base.bo) {
-+ if (!entry->bo) {
- r = amdgpu_bo_create(adev,
- amdgpu_vm_bo_size(adev, level),
- AMDGPU_GPU_PAGE_SIZE, true,
-@@ -313,14 +362,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
- /* Keep a reference to the root directory to avoid
- * freeing them up in the wrong order.
- */
-- pt->parent = amdgpu_bo_ref(parent->base.bo);
--
-- entry->base.vm = vm;
-- entry->base.bo = pt;
-- list_add_tail(&entry->base.bo_list, &pt->va);
-- spin_lock(&vm->status_lock);
-- list_add(&entry->base.vm_status, &vm->relocated);
-- spin_unlock(&vm->status_lock);
-+ pt->parent = amdgpu_bo_ref(vm->root.bo);
-+
-+ entry->bo = pt;
- entry->addr = 0;
- }
-
-@@ -987,7 +1031,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int r;
-
- amdgpu_sync_create(&sync);
-- amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
-+ amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
- r = amdgpu_sync_wait(&sync, true);
- amdgpu_sync_free(&sync);
-
-@@ -1006,17 +1050,18 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- */
- static int amdgpu_vm_update_level(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
-- struct amdgpu_vm_pt *parent)
-+ struct amdgpu_vm_pt *parent,
-+ unsigned level)
- {
- struct amdgpu_bo *shadow;
- struct amdgpu_ring *ring = NULL;
- uint64_t pd_addr, shadow_addr = 0;
-+ uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
- uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
- unsigned count = 0, pt_idx, ndw = 0;
- struct amdgpu_job *job;
- struct amdgpu_pte_update_params params;
- struct dma_fence *fence = NULL;
-- uint32_t incr;
-
- int r;
-
-@@ -1025,10 +1070,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
-
- memset(&params, 0, sizeof(params));
- params.adev = adev;
-- shadow = parent->base.bo->shadow;
-+ shadow = parent->bo->shadow;
-
- if (vm->use_cpu_for_update) {
-- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-+ pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
- r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
- if (unlikely(r))
- return r;
-@@ -1044,7 +1089,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
- /* assume the worst case */
- ndw += parent->last_entry_used * 6;
-
-- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-+ pd_addr = amdgpu_bo_gpu_offset(parent->bo);
-
- if (shadow) {
- shadow_addr = amdgpu_bo_gpu_offset(shadow);
-@@ -1064,17 +1109,12 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
-
- /* walk over the address space and update the directory */
- for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-- struct amdgpu_bo *bo = entry->base.bo;
-+ struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
- uint64_t pde, pt;
-
- if (bo == NULL)
- continue;
-
-- spin_lock(&vm->status_lock);
-- list_del_init(&entry->base.vm_status);
-- spin_unlock(&vm->status_lock);
--
- pt = amdgpu_bo_gpu_offset(bo);
- pt = amdgpu_gart_get_vm_pde(adev, pt);
- /* Don't update huge pages here */
-@@ -1085,7 +1125,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
- parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
-
- pde = pd_addr + pt_idx * 8;
-- incr = amdgpu_bo_size(bo);
- if (((last_pde + 8 * count) != pde) ||
- ((last_pt + incr * count) != pt) ||
- (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
-@@ -1113,7 +1152,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
- }
-
- if (count) {
-- if (vm->root.base.bo->shadow)
-+ if (vm->root.bo->shadow)
- params.func(&params, last_shadow, last_pt,
- count, incr, AMDGPU_PTE_VALID);
-
-@@ -1126,8 +1165,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
- amdgpu_job_free(job);
- } else {
- amdgpu_ring_pad_ib(ring, params.ib);
-- amdgpu_sync_resv(adev, &job->sync,
-- parent->base.bo->tbo.resv,
-+ amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
- AMDGPU_FENCE_OWNER_VM);
- if (shadow)
- amdgpu_sync_resv(adev, &job->sync,
-@@ -1140,11 +1178,26 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
- if (r)
- goto error_free;
-
-- amdgpu_bo_fence(parent->base.bo, fence, true);
-- dma_fence_put(vm->last_update);
-- vm->last_update = fence;
-+ amdgpu_bo_fence(parent->bo, fence, true);
-+ dma_fence_put(vm->last_dir_update);
-+ vm->last_dir_update = dma_fence_get(fence);
-+ dma_fence_put(fence);
- }
- }
-+ /*
-+ * Recurse into the subdirectories. This recursion is harmless because
-+ * we only have a maximum of 5 layers.
-+ */
-+ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-+ struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-+
-+ if (!entry->bo)
-+ continue;
-+
-+ r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
-+ if (r)
-+ return r;
-+ }
-
- return 0;
-
-@@ -1160,8 +1213,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
- *
- * Mark all PD level as invalid after an error.
- */
--static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
-- struct amdgpu_vm_pt *parent)
-+static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
- {
- unsigned pt_idx;
-
-@@ -1172,15 +1224,11 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
- for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-
-- if (!entry->base.bo)
-+ if (!entry->bo)
- continue;
-
- entry->addr = ~0ULL;
-- spin_lock(&vm->status_lock);
-- if (list_empty(&entry->base.vm_status))
-- list_add(&entry->base.vm_status, &vm->relocated);
-- spin_unlock(&vm->status_lock);
-- amdgpu_vm_invalidate_level(vm, entry);
-+ amdgpu_vm_invalidate_level(entry);
- }
- }
-
-@@ -1196,40 +1244,11 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
- int amdgpu_vm_update_directories(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
- {
-- int r = 0;
--
-- spin_lock(&vm->status_lock);
-- while (!list_empty(&vm->relocated)) {
-- struct amdgpu_vm_bo_base *bo_base;
-- struct amdgpu_bo *bo;
--
-- bo_base = list_first_entry(&vm->relocated,
-- struct amdgpu_vm_bo_base,
-- vm_status);
-- spin_unlock(&vm->status_lock);
--
-- bo = bo_base->bo->parent;
-- if (bo) {
-- struct amdgpu_vm_bo_base *parent;
-- struct amdgpu_vm_pt *pt;
--
-- parent = list_first_entry(&bo->va,
-- struct amdgpu_vm_bo_base,
-- bo_list);
-- pt = container_of(parent, struct amdgpu_vm_pt, base);
-+ int r;
-
-- r = amdgpu_vm_update_level(adev, vm, pt);
-- if (r) {
-- amdgpu_vm_invalidate_level(vm, &vm->root);
-- return r;
-- }
-- spin_lock(&vm->status_lock);
-- } else {
-- spin_lock(&vm->status_lock);
-- list_del_init(&bo_base->vm_status);
-- }
-- }
-- spin_unlock(&vm->status_lock);
-+ r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
-+ if (r)
-+ amdgpu_vm_invalidate_level(&vm->root);
-
- if (vm->use_cpu_for_update) {
- /* Flush HDP */
-@@ -1260,7 +1279,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
- *entry = &p->vm->root;
- while ((*entry)->entries) {
- idx = addr >> (p->adev->vm_manager.block_size * level--);
-- idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
-+ idx %= amdgpu_bo_size((*entry)->bo) / 8;
- *parent = *entry;
- *entry = &(*entry)->entries[idx];
- }
-@@ -1296,7 +1315,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
- p->src ||
- !(flags & AMDGPU_PTE_VALID)) {
-
-- dst = amdgpu_bo_gpu_offset(entry->base.bo);
-+ dst = amdgpu_bo_gpu_offset(entry->bo);
- dst = amdgpu_gart_get_vm_pde(p->adev, dst);
- flags = AMDGPU_PTE_VALID;
- } else {
-@@ -1322,18 +1341,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
- tmp = p->pages_addr;
- p->pages_addr = NULL;
-
-- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
-+ pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
- pde = pd_addr + (entry - parent->entries) * 8;
- amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
-
- p->pages_addr = tmp;
- } else {
-- if (parent->base.bo->shadow) {
-- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
-+ if (parent->bo->shadow) {
-+ pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
- pde = pd_addr + (entry - parent->entries) * 8;
- amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
- }
-- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-+ pd_addr = amdgpu_bo_gpu_offset(parent->bo);
- pde = pd_addr + (entry - parent->entries) * 8;
- amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
- }
-@@ -1384,7 +1403,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
- if (entry->addr & AMDGPU_PDE_PTE)
- continue;
-
-- pt = entry->base.bo;
-+ pt = entry->bo;
- if (use_cpu_update) {
- pe_start = (unsigned long)amdgpu_bo_kptr(pt);
- } else {
-@@ -1420,6 +1439,8 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
- uint64_t start, uint64_t end,
- uint64_t dst, uint64_t flags)
- {
-+ int r;
-+
- /**
- * The MC L1 TLB supports variable sized pages, based on a fragment
- * field in the PTE. When this field is set to a non-zero value, page
-@@ -1438,38 +1459,39 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
- * Userspace can support this by aligning virtual base address and
- * allocation size to the fragment size.
- */
-- unsigned max_frag = params->adev->vm_manager.fragment_size;
-- int r;
-+ unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-+ uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-+ uint64_t frag_align = 1 << pages_per_frag;
-+
-+ uint64_t frag_start = ALIGN(start, frag_align);
-+ uint64_t frag_end = end & ~(frag_align - 1);
-
- /* system pages are non continuously */
-- if (params->src || !(flags & AMDGPU_PTE_VALID))
-+ if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-+ (frag_start >= frag_end))
- return amdgpu_vm_update_ptes(params, start, end, dst, flags);
-
-- while (start != end) {
-- uint64_t frag_flags, frag_end;
-- unsigned frag;
--
-- /* This intentionally wraps around if no bit is set */
-- frag = min((unsigned)ffs(start) - 1,
-- (unsigned)fls64(end - start) - 1);
-- if (frag >= max_frag) {
-- frag_flags = AMDGPU_PTE_FRAG(max_frag);
-- frag_end = end & ~((1ULL << max_frag) - 1);
-- } else {
-- frag_flags = AMDGPU_PTE_FRAG(frag);
-- frag_end = start + (1 << frag);
-- }
--
-- r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
-- flags | frag_flags);
-+ /* handle the 4K area at the beginning */
-+ if (start != frag_start) {
-+ r = amdgpu_vm_update_ptes(params, start, frag_start,
-+ dst, flags);
- if (r)
- return r;
--
-- dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
-- start = frag_end;
-+ dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
- }
-
-- return 0;
-+ /* handle the area in the middle */
-+ r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-+ flags | frag_flags);
-+ if (r)
-+ return r;
-+
-+ /* handle the 4K area at the end */
-+ if (frag_end != end) {
-+ dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-+ r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
-+ }
-+ return r;
- }
-
- /**
-@@ -1477,6 +1499,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
- *
- * @adev: amdgpu_device pointer
- * @exclusive: fence we need to sync to
-+ * @src: address where to copy page table entries from
- * @pages_addr: DMA addresses to use for mapping
- * @vm: requested vm
- * @start: start of mapped range
-@@ -1490,6 +1513,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
- */
- static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct dma_fence *exclusive,
-+ uint64_t src,
- dma_addr_t *pages_addr,
- struct amdgpu_vm *vm,
- uint64_t start, uint64_t last,
-@@ -1507,6 +1531,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- memset(&params, 0, sizeof(params));
- params.adev = adev;
- params.vm = vm;
-+ params.src = src;
-
- /* sync to everything on unmapping */
- if (!(flags & AMDGPU_PTE_VALID))
-@@ -1535,12 +1560,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- nptes = last - start + 1;
-
- /*
-- * reserve space for two commands every (1 << BLOCK_SIZE)
-+ * reserve space for one command every (1 << BLOCK_SIZE)
- * entries or 2k dwords (whatever is smaller)
-- *
-- * The second command is for the shadow pagetables.
- */
-- ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
-+ ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1;
-
- /* padding, etc. */
- ndw = 64;
-@@ -1548,9 +1571,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- /* one PDE write for each huge page */
- ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
-
-- if (pages_addr) {
-+ if (src) {
-+ /* only copy commands needed */
-+ ndw += ncmds * 7;
-+
-+ params.func = amdgpu_vm_do_copy_ptes;
-+
-+ } else if (pages_addr) {
- /* copy commands needed */
-- ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
-+ ndw += ncmds * 7;
-
- /* and also PTEs */
- ndw += nptes * 2;
-@@ -1559,11 +1588,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-
- } else {
- /* set page commands needed */
-- ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
-+ ndw += ncmds * 10;
-
-- /* extra commands for begin/end fragments */
-- ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
-- * adev->vm_manager.fragment_size;
-+ /* two extra commands for begin/end of fragment */
-+ ndw += 2 * 10;
-
- params.func = amdgpu_vm_do_set_ptes;
- }
-@@ -1574,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-
- params.ib = &job->ibs[0];
-
-- if (pages_addr) {
-+ if (!src && pages_addr) {
- uint64_t *pte;
- unsigned i;
-
-@@ -1595,12 +1623,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- if (r)
- goto error_free;
-
-- r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
-+ r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
- owner);
- if (r)
- goto error_free;
-
-- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
-+ r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
- if (r)
- goto error_free;
-
-@@ -1615,14 +1643,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- if (r)
- goto error_free;
-
-- amdgpu_bo_fence(vm->root.base.bo, f, true);
-+ amdgpu_bo_fence(vm->root.bo, f, true);
- dma_fence_put(*fence);
- *fence = f;
- return 0;
-
- error_free:
- amdgpu_job_free(job);
-- amdgpu_vm_invalidate_level(vm, &vm->root);
-+ amdgpu_vm_invalidate_level(&vm->root);
- return r;
- }
-
-@@ -1647,13 +1675,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
- dma_addr_t *pages_addr,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_va_mapping *mapping,
-- uint64_t vram_base_offset,
- uint64_t flags,
- struct ttm_mem_reg *mem,
- struct dma_fence **fence)
- {
- struct drm_mm_node *nodes = mem ? mem->mm_node : NULL;
-- uint64_t pfn, start = mapping->start;
-+ uint64_t pfn, src = 0, start = mapping->start;
- int r;
-
- /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
-@@ -1704,12 +1731,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
- max_entries = min(max_entries, 16ull * 1024ull);
- break;
- case AMDGPU_PL_DGMA:
-- addr += vram_base_offset +
-+ addr += adev->vm_manager.vram_base_offset +
- adev->mman.bdev.man[mem->mem_type].gpu_offset -
- adev->mman.bdev.man[TTM_PL_VRAM].gpu_offset;
- break;
- case TTM_PL_VRAM:
-- addr += vram_base_offset;
-+ addr += adev->vm_manager.vram_base_offset;
- break;
- default:
- break;
-@@ -1722,7 +1749,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
- addr += pfn << PAGE_SHIFT;
-
- last = min((uint64_t)mapping->last, start + max_entries - 1);
-- r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
-+ r = amdgpu_vm_bo_update_mapping(adev, exclusive,
-+ src, pages_addr, vm,
- start, last, flags, addr,
- fence);
- if (r)
-@@ -1760,10 +1788,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
- dma_addr_t *pages_addr = NULL;
- struct ttm_mem_reg *mem;
- struct drm_mm_node *nodes;
-- struct dma_fence *exclusive, **last_update;
-+ struct dma_fence *exclusive;
- uint64_t flags;
-- uint64_t vram_base_offset = adev->vm_manager.vram_base_offset;
-- struct amdgpu_device *bo_adev;
- int r;
-
- if (clear || !bo_va->base.bo) {
-@@ -1785,54 +1811,43 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
- exclusive = reservation_object_get_excl(bo->tbo.resv);
- }
-
-- if (bo) {
-+ if (bo)
- flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
-- bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
-- if (mem && mem->mem_type == TTM_PL_VRAM &&
-- adev != bo_adev) {
-- flags |= AMDGPU_PTE_SYSTEM;
-- vram_base_offset = bo_adev->mc.aper_base;
-- }
-- } else
-- flags = 0x0;
--
-- if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
-- last_update = &vm->last_update;
- else
-- last_update = &bo_va->last_pt_update;
--
-- if (!clear && bo_va->base.moved) {
-- bo_va->base.moved = false;
-- list_splice_init(&bo_va->valids, &bo_va->invalids);
-+ flags = 0x0;
-
-- } else if (bo_va->cleared != clear) {
-+ spin_lock(&vm->status_lock);
-+ if (!list_empty(&bo_va->base.vm_status))
- list_splice_init(&bo_va->valids, &bo_va->invalids);
-- }
-+ spin_unlock(&vm->status_lock);
-
- list_for_each_entry(mapping, &bo_va->invalids, list) {
- r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
-- mapping, vram_base_offset, flags,
-- mem, last_update);
-+ mapping, flags, mem,
-+ &bo_va->last_pt_update);
- if (r)
- return r;
- }
-
-- if (vm->use_cpu_for_update) {
-- /* Flush HDP */
-- mb();
-- amdgpu_gart_flush_gpu_tlb(adev, 0);
-+ if (trace_amdgpu_vm_bo_mapping_enabled()) {
-+ list_for_each_entry(mapping, &bo_va->valids, list)
-+ trace_amdgpu_vm_bo_mapping(mapping);
-+
-+ list_for_each_entry(mapping, &bo_va->invalids, list)
-+ trace_amdgpu_vm_bo_mapping(mapping);
- }
-
- spin_lock(&vm->status_lock);
-+ list_splice_init(&bo_va->invalids, &bo_va->valids);
- list_del_init(&bo_va->base.vm_status);
-+ if (clear)
-+ list_add(&bo_va->base.vm_status, &vm->cleared);
- spin_unlock(&vm->status_lock);
-
-- list_splice_init(&bo_va->invalids, &bo_va->valids);
-- bo_va->cleared = clear;
--
-- if (trace_amdgpu_vm_bo_mapping_enabled()) {
-- list_for_each_entry(mapping, &bo_va->valids, list)
-- trace_amdgpu_vm_bo_mapping(mapping);
-+ if (vm->use_cpu_for_update) {
-+ /* Flush HDP */
-+ mb();
-+ amdgpu_gart_flush_gpu_tlb(adev, 0);
- }
-
- return 0;
-@@ -1940,7 +1955,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
- */
- static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
- {
-- struct reservation_object *resv = vm->root.base.bo->tbo.resv;
-+ struct reservation_object *resv = vm->root.bo->tbo.resv;
- struct dma_fence *excl, **shared;
- unsigned i, shared_count;
- int r;
-@@ -1998,7 +2013,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- if (vm->pte_support_ats)
- init_pte_value = AMDGPU_PTE_SYSTEM;
-
-- r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
-+ r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
- mapping->start, mapping->last,
- init_pte_value, 0, &f);
- amdgpu_vm_free_mapping(adev, vm, mapping, f);
-@@ -2020,35 +2035,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
- }
-
- /**
-- * amdgpu_vm_handle_moved - handle moved BOs in the PT
-+ * amdgpu_vm_clear_moved - clear moved BOs in the PT
- *
- * @adev: amdgpu_device pointer
- * @vm: requested vm
-- * @sync: sync object to add fences to
- *
-- * Make sure all BOs which are moved are updated in the PTs.
-+ * Make sure all moved BOs are cleared in the PT.
- * Returns 0 for success.
- *
-- * PTs have to be reserved!
-+ * PTs have to be reserved and mutex must be locked!
- */
--int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-- struct amdgpu_vm *vm)
-+int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-+ struct amdgpu_sync *sync)
- {
-- bool clear;
-+ struct amdgpu_bo_va *bo_va = NULL;
- int r = 0;
-
- spin_lock(&vm->status_lock);
- while (!list_empty(&vm->moved)) {
-- struct amdgpu_bo_va *bo_va;
--
- bo_va = list_first_entry(&vm->moved,
- struct amdgpu_bo_va, base.vm_status);
- spin_unlock(&vm->status_lock);
-
-- /* Per VM BOs never need to bo cleared in the page tables */
-- clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
--
-- r = amdgpu_vm_bo_update(adev, bo_va, clear);
-+ r = amdgpu_vm_bo_update(adev, bo_va, true);
- if (r)
- return r;
-
-@@ -2056,6 +2065,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
- }
- spin_unlock(&vm->status_lock);
-
-+ if (bo_va)
-+ r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
-+
- return r;
- }
-
-@@ -2097,39 +2109,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
- return bo_va;
- }
-
--
--/**
-- * amdgpu_vm_bo_insert_mapping - insert a new mapping
-- *
-- * @adev: amdgpu_device pointer
-- * @bo_va: bo_va to store the address
-- * @mapping: the mapping to insert
-- *
-- * Insert a new mapping into all structures.
-- */
--static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
-- struct amdgpu_bo_va *bo_va,
-- struct amdgpu_bo_va_mapping *mapping)
--{
-- struct amdgpu_vm *vm = bo_va->base.vm;
-- struct amdgpu_bo *bo = bo_va->base.bo;
--
-- mapping->bo_va = bo_va;
-- list_add(&mapping->list, &bo_va->invalids);
-- amdgpu_vm_it_insert(mapping, &vm->va);
--
-- if (mapping->flags & AMDGPU_PTE_PRT)
-- amdgpu_vm_prt_get(adev);
--
-- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-- spin_lock(&vm->status_lock);
-- if (list_empty(&bo_va->base.vm_status))
-- list_add(&bo_va->base.vm_status, &vm->moved);
-- spin_unlock(&vm->status_lock);
-- }
-- trace_amdgpu_vm_bo_map(bo_va, mapping);
--}
--
- /**
- * amdgpu_vm_bo_map - map bo inside a vm
- *
-@@ -2181,12 +2160,18 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
- if (!mapping)
- return -ENOMEM;
-
-+ INIT_LIST_HEAD(&mapping->list);
- mapping->start = saddr;
- mapping->last = eaddr;
- mapping->offset = offset;
- mapping->flags = flags;
-
-- amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
-+ list_add(&mapping->list, &bo_va->invalids);
-+ amdgpu_vm_it_insert(mapping, &vm->va);
-+
-+ if (flags & AMDGPU_PTE_PRT)
-+ amdgpu_vm_prt_get(adev);
-+ trace_amdgpu_vm_bo_map(bo_va, mapping);
-
- return 0;
- }
-@@ -2213,6 +2198,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
- {
- struct amdgpu_bo_va_mapping *mapping;
- struct amdgpu_bo *bo = bo_va->base.bo;
-+ struct amdgpu_vm *vm = bo_va->base.vm;
- uint64_t eaddr;
- int r;
-
-@@ -2246,7 +2232,12 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
- mapping->offset = offset;
- mapping->flags = flags;
-
-- amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
-+ list_add(&mapping->list, &bo_va->invalids);
-+ amdgpu_vm_it_insert(mapping, &vm->va);
-+
-+ if (flags & AMDGPU_PTE_PRT)
-+ amdgpu_vm_prt_get(adev);
-+ trace_amdgpu_vm_bo_map(bo_va, mapping);
-
- return 0;
- }
-@@ -2292,7 +2283,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
-
- list_del(&mapping->list);
- amdgpu_vm_it_remove(mapping, &vm->va);
-- mapping->bo_va = NULL;
- trace_amdgpu_vm_bo_unmap(bo_va, mapping);
-
- if (valid)
-@@ -2378,7 +2368,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
- if (tmp->last > eaddr)
- tmp->last = eaddr;
-
-- tmp->bo_va = NULL;
- list_add(&tmp->list, &vm->freed);
- trace_amdgpu_vm_bo_unmap(NULL, tmp);
- }
-@@ -2405,19 +2394,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
- }
-
- /**
-- * amdgpu_vm_bo_lookup_mapping - find mapping by address
-- *
-- * @vm: the requested VM
-- *
-- * Find a mapping by it's address.
-- */
--struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
-- uint64_t addr)
--{
-- return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
--}
--
--/**
- * amdgpu_vm_bo_rmv - remove a bo to a specific vm
- *
- * @adev: amdgpu_device pointer
-@@ -2442,7 +2418,6 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
- list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
- list_del(&mapping->list);
- amdgpu_vm_it_remove(mapping, &vm->va);
-- mapping->bo_va = NULL;
- trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- list_add(&mapping->list, &vm->freed);
- }
-@@ -2467,36 +2442,15 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
- * Mark @bo as invalid.
- */
- void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-- struct amdgpu_bo *bo, bool evicted)
-+ struct amdgpu_bo *bo)
- {
- struct amdgpu_vm_bo_base *bo_base;
-
- list_for_each_entry(bo_base, &bo->va, bo_list) {
-- struct amdgpu_vm *vm = bo_base->vm;
--
-- bo_base->moved = true;
-- if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-- spin_lock(&bo_base->vm->status_lock);
-- if (bo->tbo.type == ttm_bo_type_kernel)
-- list_move(&bo_base->vm_status, &vm->evicted);
-- else
-- list_move_tail(&bo_base->vm_status,
-- &vm->evicted);
-- spin_unlock(&bo_base->vm->status_lock);
-- continue;
-- }
--
-- if (bo->tbo.type == ttm_bo_type_kernel) {
-- spin_lock(&bo_base->vm->status_lock);
-- if (list_empty(&bo_base->vm_status))
-- list_add(&bo_base->vm_status, &vm->relocated);
-- spin_unlock(&bo_base->vm->status_lock);
-- continue;
-- }
--
- spin_lock(&bo_base->vm->status_lock);
- if (list_empty(&bo_base->vm_status))
-- list_add(&bo_base->vm_status, &vm->moved);
-+ list_add(&bo_base->vm_status,
-+ &bo_base->vm->moved);
- spin_unlock(&bo_base->vm->status_lock);
- }
- }
-@@ -2577,14 +2531,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- u64 flags;
- uint64_t init_pde_value = 0;
-
-- vm->va = RB_ROOT_CACHED;
-+ vm->va = RB_ROOT;
- vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
- for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
- vm->reserved_vmid[i] = NULL;
- spin_lock_init(&vm->status_lock);
-- INIT_LIST_HEAD(&vm->evicted);
-- INIT_LIST_HEAD(&vm->relocated);
- INIT_LIST_HEAD(&vm->moved);
-+ INIT_LIST_HEAD(&vm->cleared);
- INIT_LIST_HEAD(&vm->freed);
-
- /* create scheduler entity for page table updates */
-@@ -2615,7 +2568,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
- "CPU update of VM recommended only for large BAR system\n");
-- vm->last_update = NULL;
-+ vm->last_dir_update = NULL;
-
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED;
-@@ -2628,31 +2581,46 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- flags,
-- NULL, NULL, init_pde_value, &vm->root.base.bo);
-+ NULL, NULL, init_pde_value, &vm->root.bo);
- if (r)
- goto error_free_sched_entity;
-
-- vm->root.base.vm = vm;
-- list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
-- INIT_LIST_HEAD(&vm->root.base.vm_status);
-+ r = amdgpu_bo_reserve(vm->root.bo, false);
-+ if (r)
-+ goto error_free_root;
-+
-+ vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
-
- if (vm->use_cpu_for_update) {
-- r = amdgpu_bo_reserve(vm->root.base.bo, false);
-+ r = amdgpu_bo_kmap(vm->root.bo, NULL);
- if (r)
- goto error_free_root;
-+ }
-
-- r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
-- if (r)
-- goto error_free_root;
-- amdgpu_bo_unreserve(vm->root.base.bo);
-+ amdgpu_bo_unreserve(vm->root.bo);
-+
-+ vm->vm_context = vm_context;
-+ if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
-+ mutex_lock(&adev->vm_manager.lock);
-+
-+ if (adev->vm_manager.n_compute_vms++ == 0) {
-+ /* First Compute VM: enable compute power profile */
-+ if (adev->pp_enabled)
-+ amdgpu_dpm_switch_power_profile(adev,
-+ AMD_PP_COMPUTE_PROFILE);
-+ else if (adev->pm.funcs->switch_power_profile)
-+ adev->pm.funcs->switch_power_profile(adev,
-+ AMD_PP_COMPUTE_PROFILE);
-+ }
-+ mutex_unlock(&adev->vm_manager.lock);
- }
-
- return 0;
-
- error_free_root:
-- amdgpu_bo_unref(&vm->root.base.bo->shadow);
-- amdgpu_bo_unref(&vm->root.base.bo);
-- vm->root.base.bo = NULL;
-+ amdgpu_bo_unref(&vm->root.bo->shadow);
-+ amdgpu_bo_unref(&vm->root.bo);
-+ vm->root.bo = NULL;
-
- error_free_sched_entity:
- amd_sched_entity_fini(&ring->sched, &vm->entity);
-@@ -2671,18 +2639,20 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
- {
- unsigned i;
-
-- if (level->base.bo) {
-- list_del(&level->base.bo_list);
-- list_del(&level->base.vm_status);
-- amdgpu_bo_unref(&level->base.bo->shadow);
-- amdgpu_bo_unref(&level->base.bo);
-+ if (level->bo) {
-+ amdgpu_bo_unref(&level->bo->shadow);
-+ amdgpu_bo_unref(&level->bo);
- }
-
- if (level->entries)
- for (i = 0; i <= level->last_entry_used; i++)
- amdgpu_vm_free_levels(&level->entries[i]);
-
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
-+ drm_free_large(level->entries);
-+#else
- kvfree(level->entries);
-+#endif
- }
-
- /**
-@@ -2698,16 +2668,31 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
- {
- struct amdgpu_bo_va_mapping *mapping, *tmp;
- bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
-- struct amdgpu_bo *root;
-- int i, r;
-+ int i;
-+
-+ if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
-+ mutex_lock(&adev->vm_manager.lock);
-+
-+ WARN(adev->vm_manager.n_compute_vms == 0, "Unbalanced number of Compute VMs");
-+
-+ if (--adev->vm_manager.n_compute_vms == 0) {
-+ /* Last Compute VM: enable graphics power profile */
-+ if (adev->pp_enabled)
-+ amdgpu_dpm_switch_power_profile(adev,
-+ AMD_PP_GFX_PROFILE);
-+ else if (adev->pm.funcs->switch_power_profile)
-+ adev->pm.funcs->switch_power_profile(adev,
-+ AMD_PP_GFX_PROFILE);
-+ }
-+ mutex_unlock(&adev->vm_manager.lock);
-+ }
-
- amd_sched_entity_fini(vm->entity.sched, &vm->entity);
-
-- if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
-+ if (!RB_EMPTY_ROOT(&vm->va)) {
- dev_err(adev->dev, "still active bo inside vm\n");
- }
-- rbtree_postorder_for_each_entry_safe(mapping, tmp,
-- &vm->va.rb_root, rb) {
-+ rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) {
- list_del(&mapping->list);
- amdgpu_vm_it_remove(mapping, &vm->va);
- kfree(mapping);
-@@ -2721,9 +2706,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
- list_del(&mapping->list);
- amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
- }
--
-+
- amdgpu_vm_free_levels(&vm->root);
-- dma_fence_put(vm->last_update);
-+ dma_fence_put(vm->last_dir_update);
- for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
- amdgpu_vm_free_reserved_vmid(adev, vm, i);
- }
-@@ -2755,8 +2740,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
- }
- }
-
-- adev->vm_manager.fence_context =
-- dma_fence_context_alloc(AMDGPU_MAX_RINGS);
-+ adev->vm_manager.fence_context = kcl_fence_context_alloc(AMDGPU_MAX_RINGS);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- adev->vm_manager.seqno[i] = 0;
-
-@@ -2781,6 +2765,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
- adev->vm_manager.vm_update_mode = 0;
- #endif
-
-+ adev->vm_manager.n_compute_vms = 0;
- }
-
- /**
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
-index 28cf20b..415e659 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
-@@ -153,6 +153,9 @@ struct amdgpu_vm {
- /* dedicated to vm */
- struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS];
-
-+ /* Whether this is a Compute or GFX Context */
-+ int vm_context;
-+
- /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
- bool use_cpu_for_update;
-
-@@ -220,6 +223,8 @@ struct amdgpu_vm_manager {
- * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
- */
- int vm_update_mode;
-+ /* Number of Compute VMs, used for detecting Compute activity */
-+ unsigned n_compute_vms;
- };
-
- void amdgpu_vm_manager_init(struct amdgpu_device *adev);
---
-2.7.4
-