diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch | 1270 |
1 files changed, 1270 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch new file mode 100644 index 00000000..7fc9f02c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch @@ -0,0 +1,1270 @@ +From 8c8e41b130f06c684c2455d26ff4523264fdfdef Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Wed, 5 Oct 2016 16:25:45 -0400 +Subject: [PATCH 1586/4131] drm/amdgpu: Automatic power profile switching + +Switch between compute and graphic profiles automatically when KFD +compute work starts and stops. It uses the number of KFD VMs as a +criteria for the existence of KFD compute work. + +Change-Id: I11d34f45d901f4dd1e16e4a64c1ad1010088d9b8 +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> + + Conflicts: + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 625 +++++++++++------------ + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 + + 3 files changed, 311 insertions(+), 321 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index 3ec1ff1..155de54 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -1327,7 +1327,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ +- ret = amdgpu_vm_init(adev, &new_vm->base); ++ ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE); + if (ret != 0) { + pr_err("Failed init vm ret %d\n", ret); + /* Undo everything related to the new VM context */ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index bdf2d6c..c300397 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -25,8 +25,14 @@ + * Alex Deucher + * Jerome Glisse + */ ++#if defined(BUILD_AS_DKMS) ++#include <kcl/kcl_fence_array.h> ++#else + #include <linux/dma-fence-array.h> ++#endif ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + #include <linux/interval_tree_generic.h> ++#endif + #include <drm/drmP.h> + #include <drm/amdgpu_drm.h> + #include "amdgpu.h" +@@ -140,7 +146,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, + struct list_head *validated, + struct amdgpu_bo_list_entry *entry) + { +- entry->robj = vm->root.base.bo; ++ entry->robj = vm->root.bo; + entry->priority = 0; + entry->tv.bo = &entry->robj->tbo; + entry->tv.shared = true; +@@ -149,6 +155,61 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, + } + + /** ++ * amdgpu_vm_validate_layer - validate a single page table level ++ * ++ * @parent: parent page table level ++ * @validate: callback to do the validation ++ * @param: parameter for the validation callback ++ * ++ * Validate the page table BOs on command submission if neccessary. ++ */ ++static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, ++ int (*validate)(void *, struct amdgpu_bo *), ++ void *param, bool use_cpu_for_update, ++ struct ttm_bo_global *glob) ++{ ++ unsigned i; ++ int r; ++ ++ if (use_cpu_for_update) { ++ r = amdgpu_bo_kmap(parent->bo, NULL); ++ if (r) ++ return r; ++ } ++ ++ if (!parent->entries) ++ return 0; ++ ++ for (i = 0; i <= parent->last_entry_used; ++i) { ++ struct amdgpu_vm_pt *entry = &parent->entries[i]; ++ ++ if (!entry->bo) ++ continue; ++ ++ r = validate(param, entry->bo); ++ if (r) ++ return r; ++ ++ spin_lock(&glob->lru_lock); ++ ttm_bo_move_to_lru_tail(&entry->bo->tbo); ++ if (entry->bo->shadow) ++ ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); ++ spin_unlock(&glob->lru_lock); ++ ++ /* ++ * Recurse into the sub directory. This is harmless because we ++ * have only a maximum of 5 layers. ++ */ ++ r = amdgpu_vm_validate_level(entry, validate, param, ++ use_cpu_for_update, glob); ++ if (r) ++ return r; ++ } ++ ++ return r; ++} ++ ++/** + * amdgpu_vm_validate_pt_bos - validate the page table BOs + * + * @adev: amdgpu device pointer +@@ -162,47 +223,32 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int (*validate)(void *p, struct amdgpu_bo *bo), + void *param) + { +- struct ttm_bo_global *glob = adev->mman.bdev.glob; +- int r; +- +- spin_lock(&vm->status_lock); +- while (!list_empty(&vm->evicted)) { +- struct amdgpu_vm_bo_base *bo_base; +- struct amdgpu_bo *bo; +- +- bo_base = list_first_entry(&vm->evicted, +- struct amdgpu_vm_bo_base, +- vm_status); +- spin_unlock(&vm->status_lock); ++ uint64_t num_evictions; + +- bo = bo_base->bo; +- BUG_ON(!bo); +- if (bo->parent) { +- r = validate(param, bo); +- if (r) +- return r; ++ /* We only need to validate the page tables ++ * if they aren't already valid. ++ */ ++ num_evictions = atomic64_read(&adev->num_evictions); ++ if (num_evictions == vm->last_eviction_counter) ++ return 0; + +- spin_lock(&glob->lru_lock); +- ttm_bo_move_to_lru_tail(&bo->tbo); +- if (bo->shadow) +- ttm_bo_move_to_lru_tail(&bo->shadow->tbo); +- spin_unlock(&glob->lru_lock); +- } ++ return amdgpu_vm_validate_level(&vm->root, validate, param, ++ vm->use_cpu_for_update, ++ adev->mman.bdev.glob); ++} + +- if (bo->tbo.type == ttm_bo_type_kernel && +- vm->use_cpu_for_update) { +- r = amdgpu_bo_kmap(bo, NULL); +- if (r) +- return r; +- } ++/** ++ * amdgpu_vm_check - helper for amdgpu_vm_ready ++ */ ++static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo) ++{ ++ /* if anything is swapped out don't swap it in here, ++ just abort and wait for the next CS */ ++ if (!amdgpu_bo_gpu_accessible(bo)) ++ return -ERESTARTSYS; + +- spin_lock(&vm->status_lock); +- if (bo->tbo.type != ttm_bo_type_kernel) +- list_move(&bo_base->vm_status, &vm->moved); +- else +- list_move(&bo_base->vm_status, &vm->relocated); +- } +- spin_unlock(&vm->status_lock); ++ if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) ++ return -ERESTARTSYS; + + return 0; + } +@@ -210,19 +256,17 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, + /** + * amdgpu_vm_ready - check VM is ready for updates + * ++ * @adev: amdgpu device + * @vm: VM to check + * + * Check if all VM PDs/PTs are ready for updates + */ +-bool amdgpu_vm_ready(struct amdgpu_vm *vm) ++bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm) + { +- bool ready; +- +- spin_lock(&vm->status_lock); +- ready = list_empty(&vm->evicted); +- spin_unlock(&vm->status_lock); ++ if (amdgpu_vm_check(NULL, vm->root.bo)) ++ return false; + +- return ready; ++ return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL); + } + + /** +@@ -251,9 +295,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, + if (!parent->entries) { + unsigned num_entries = amdgpu_vm_num_entries(adev, level); + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) ++ parent->entries = drm_calloc_large(num_entries, ++ sizeof(struct amdgpu_vm_pt)); ++#else + parent->entries = kvmalloc_array(num_entries, + sizeof(struct amdgpu_vm_pt), + GFP_KERNEL | __GFP_ZERO); ++#endif + if (!parent->entries) + return -ENOMEM; + memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); +@@ -288,11 +337,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, + + /* walk over the address space and allocate the page tables */ + for (pt_idx = from; pt_idx <= to; ++pt_idx) { +- struct reservation_object *resv = vm->root.base.bo->tbo.resv; ++ struct reservation_object *resv = vm->root.bo->tbo.resv; + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + struct amdgpu_bo *pt; + +- if (!entry->base.bo) { ++ if (!entry->bo) { + r = amdgpu_bo_create(adev, + amdgpu_vm_bo_size(adev, level), + AMDGPU_GPU_PAGE_SIZE, true, +@@ -313,14 +362,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ +- pt->parent = amdgpu_bo_ref(parent->base.bo); +- +- entry->base.vm = vm; +- entry->base.bo = pt; +- list_add_tail(&entry->base.bo_list, &pt->va); +- spin_lock(&vm->status_lock); +- list_add(&entry->base.vm_status, &vm->relocated); +- spin_unlock(&vm->status_lock); ++ pt->parent = amdgpu_bo_ref(vm->root.bo); ++ ++ entry->bo = pt; + entry->addr = 0; + } + +@@ -987,7 +1031,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int r; + + amdgpu_sync_create(&sync); +- amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); ++ amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); + r = amdgpu_sync_wait(&sync, true); + amdgpu_sync_free(&sync); + +@@ -1006,17 +1050,18 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, + */ + static int amdgpu_vm_update_level(struct amdgpu_device *adev, + struct amdgpu_vm *vm, +- struct amdgpu_vm_pt *parent) ++ struct amdgpu_vm_pt *parent, ++ unsigned level) + { + struct amdgpu_bo *shadow; + struct amdgpu_ring *ring = NULL; + uint64_t pd_addr, shadow_addr = 0; ++ uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); + uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; + unsigned count = 0, pt_idx, ndw = 0; + struct amdgpu_job *job; + struct amdgpu_pte_update_params params; + struct dma_fence *fence = NULL; +- uint32_t incr; + + int r; + +@@ -1025,10 +1070,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + + memset(¶ms, 0, sizeof(params)); + params.adev = adev; +- shadow = parent->base.bo->shadow; ++ shadow = parent->bo->shadow; + + if (vm->use_cpu_for_update) { +- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); ++ pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); + if (unlikely(r)) + return r; +@@ -1044,7 +1089,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + /* assume the worst case */ + ndw += parent->last_entry_used * 6; + +- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); ++ pd_addr = amdgpu_bo_gpu_offset(parent->bo); + + if (shadow) { + shadow_addr = amdgpu_bo_gpu_offset(shadow); +@@ -1064,17 +1109,12 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + + /* walk over the address space and update the directory */ + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { +- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; +- struct amdgpu_bo *bo = entry->base.bo; ++ struct amdgpu_bo *bo = parent->entries[pt_idx].bo; + uint64_t pde, pt; + + if (bo == NULL) + continue; + +- spin_lock(&vm->status_lock); +- list_del_init(&entry->base.vm_status); +- spin_unlock(&vm->status_lock); +- + pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_gart_get_vm_pde(adev, pt); + /* Don't update huge pages here */ +@@ -1085,7 +1125,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; + + pde = pd_addr + pt_idx * 8; +- incr = amdgpu_bo_size(bo); + if (((last_pde + 8 * count) != pde) || + ((last_pt + incr * count) != pt) || + (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { +@@ -1113,7 +1152,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + } + + if (count) { +- if (vm->root.base.bo->shadow) ++ if (vm->root.bo->shadow) + params.func(¶ms, last_shadow, last_pt, + count, incr, AMDGPU_PTE_VALID); + +@@ -1126,8 +1165,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + amdgpu_job_free(job); + } else { + amdgpu_ring_pad_ib(ring, params.ib); +- amdgpu_sync_resv(adev, &job->sync, +- parent->base.bo->tbo.resv, ++ amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, + AMDGPU_FENCE_OWNER_VM); + if (shadow) + amdgpu_sync_resv(adev, &job->sync, +@@ -1140,11 +1178,26 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + if (r) + goto error_free; + +- amdgpu_bo_fence(parent->base.bo, fence, true); +- dma_fence_put(vm->last_update); +- vm->last_update = fence; ++ amdgpu_bo_fence(parent->bo, fence, true); ++ dma_fence_put(vm->last_dir_update); ++ vm->last_dir_update = dma_fence_get(fence); ++ dma_fence_put(fence); + } + } ++ /* ++ * Recurse into the subdirectories. This recursion is harmless because ++ * we only have a maximum of 5 layers. ++ */ ++ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { ++ struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; ++ ++ if (!entry->bo) ++ continue; ++ ++ r = amdgpu_vm_update_level(adev, vm, entry, level + 1); ++ if (r) ++ return r; ++ } + + return 0; + +@@ -1160,8 +1213,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, + * + * Mark all PD level as invalid after an error. + */ +-static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, +- struct amdgpu_vm_pt *parent) ++static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) + { + unsigned pt_idx; + +@@ -1172,15 +1224,11 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + +- if (!entry->base.bo) ++ if (!entry->bo) + continue; + + entry->addr = ~0ULL; +- spin_lock(&vm->status_lock); +- if (list_empty(&entry->base.vm_status)) +- list_add(&entry->base.vm_status, &vm->relocated); +- spin_unlock(&vm->status_lock); +- amdgpu_vm_invalidate_level(vm, entry); ++ amdgpu_vm_invalidate_level(entry); + } + } + +@@ -1196,40 +1244,11 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, + int amdgpu_vm_update_directories(struct amdgpu_device *adev, + struct amdgpu_vm *vm) + { +- int r = 0; +- +- spin_lock(&vm->status_lock); +- while (!list_empty(&vm->relocated)) { +- struct amdgpu_vm_bo_base *bo_base; +- struct amdgpu_bo *bo; +- +- bo_base = list_first_entry(&vm->relocated, +- struct amdgpu_vm_bo_base, +- vm_status); +- spin_unlock(&vm->status_lock); +- +- bo = bo_base->bo->parent; +- if (bo) { +- struct amdgpu_vm_bo_base *parent; +- struct amdgpu_vm_pt *pt; +- +- parent = list_first_entry(&bo->va, +- struct amdgpu_vm_bo_base, +- bo_list); +- pt = container_of(parent, struct amdgpu_vm_pt, base); ++ int r; + +- r = amdgpu_vm_update_level(adev, vm, pt); +- if (r) { +- amdgpu_vm_invalidate_level(vm, &vm->root); +- return r; +- } +- spin_lock(&vm->status_lock); +- } else { +- spin_lock(&vm->status_lock); +- list_del_init(&bo_base->vm_status); +- } +- } +- spin_unlock(&vm->status_lock); ++ r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); ++ if (r) ++ amdgpu_vm_invalidate_level(&vm->root); + + if (vm->use_cpu_for_update) { + /* Flush HDP */ +@@ -1260,7 +1279,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, + *entry = &p->vm->root; + while ((*entry)->entries) { + idx = addr >> (p->adev->vm_manager.block_size * level--); +- idx %= amdgpu_bo_size((*entry)->base.bo) / 8; ++ idx %= amdgpu_bo_size((*entry)->bo) / 8; + *parent = *entry; + *entry = &(*entry)->entries[idx]; + } +@@ -1296,7 +1315,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, + p->src || + !(flags & AMDGPU_PTE_VALID)) { + +- dst = amdgpu_bo_gpu_offset(entry->base.bo); ++ dst = amdgpu_bo_gpu_offset(entry->bo); + dst = amdgpu_gart_get_vm_pde(p->adev, dst); + flags = AMDGPU_PTE_VALID; + } else { +@@ -1322,18 +1341,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, + tmp = p->pages_addr; + p->pages_addr = NULL; + +- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); ++ pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); + + p->pages_addr = tmp; + } else { +- if (parent->base.bo->shadow) { +- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); ++ if (parent->bo->shadow) { ++ pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } +- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); ++ pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } +@@ -1384,7 +1403,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, + if (entry->addr & AMDGPU_PDE_PTE) + continue; + +- pt = entry->base.bo; ++ pt = entry->bo; + if (use_cpu_update) { + pe_start = (unsigned long)amdgpu_bo_kptr(pt); + } else { +@@ -1420,6 +1439,8 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) + { ++ int r; ++ + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page +@@ -1438,38 +1459,39 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + */ +- unsigned max_frag = params->adev->vm_manager.fragment_size; +- int r; ++ unsigned pages_per_frag = params->adev->vm_manager.fragment_size; ++ uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); ++ uint64_t frag_align = 1 << pages_per_frag; ++ ++ uint64_t frag_start = ALIGN(start, frag_align); ++ uint64_t frag_end = end & ~(frag_align - 1); + + /* system pages are non continuously */ +- if (params->src || !(flags & AMDGPU_PTE_VALID)) ++ if (params->src || !(flags & AMDGPU_PTE_VALID) || ++ (frag_start >= frag_end)) + return amdgpu_vm_update_ptes(params, start, end, dst, flags); + +- while (start != end) { +- uint64_t frag_flags, frag_end; +- unsigned frag; +- +- /* This intentionally wraps around if no bit is set */ +- frag = min((unsigned)ffs(start) - 1, +- (unsigned)fls64(end - start) - 1); +- if (frag >= max_frag) { +- frag_flags = AMDGPU_PTE_FRAG(max_frag); +- frag_end = end & ~((1ULL << max_frag) - 1); +- } else { +- frag_flags = AMDGPU_PTE_FRAG(frag); +- frag_end = start + (1 << frag); +- } +- +- r = amdgpu_vm_update_ptes(params, start, frag_end, dst, +- flags | frag_flags); ++ /* handle the 4K area at the beginning */ ++ if (start != frag_start) { ++ r = amdgpu_vm_update_ptes(params, start, frag_start, ++ dst, flags); + if (r) + return r; +- +- dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; +- start = frag_end; ++ dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; + } + +- return 0; ++ /* handle the area in the middle */ ++ r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, ++ flags | frag_flags); ++ if (r) ++ return r; ++ ++ /* handle the 4K area at the end */ ++ if (frag_end != end) { ++ dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; ++ r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); ++ } ++ return r; + } + + /** +@@ -1477,6 +1499,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, + * + * @adev: amdgpu_device pointer + * @exclusive: fence we need to sync to ++ * @src: address where to copy page table entries from + * @pages_addr: DMA addresses to use for mapping + * @vm: requested vm + * @start: start of mapped range +@@ -1490,6 +1513,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, + */ + static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct dma_fence *exclusive, ++ uint64_t src, + dma_addr_t *pages_addr, + struct amdgpu_vm *vm, + uint64_t start, uint64_t last, +@@ -1507,6 +1531,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; ++ params.src = src; + + /* sync to everything on unmapping */ + if (!(flags & AMDGPU_PTE_VALID)) +@@ -1535,12 +1560,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + nptes = last - start + 1; + + /* +- * reserve space for two commands every (1 << BLOCK_SIZE) ++ * reserve space for one command every (1 << BLOCK_SIZE) + * entries or 2k dwords (whatever is smaller) +- * +- * The second command is for the shadow pagetables. + */ +- ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; ++ ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1; + + /* padding, etc. */ + ndw = 64; +@@ -1548,9 +1571,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + /* one PDE write for each huge page */ + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; + +- if (pages_addr) { ++ if (src) { ++ /* only copy commands needed */ ++ ndw += ncmds * 7; ++ ++ params.func = amdgpu_vm_do_copy_ptes; ++ ++ } else if (pages_addr) { + /* copy commands needed */ +- ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; ++ ndw += ncmds * 7; + + /* and also PTEs */ + ndw += nptes * 2; +@@ -1559,11 +1588,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + + } else { + /* set page commands needed */ +- ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; ++ ndw += ncmds * 10; + +- /* extra commands for begin/end fragments */ +- ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw +- * adev->vm_manager.fragment_size; ++ /* two extra commands for begin/end of fragment */ ++ ndw += 2 * 10; + + params.func = amdgpu_vm_do_set_ptes; + } +@@ -1574,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + + params.ib = &job->ibs[0]; + +- if (pages_addr) { ++ if (!src && pages_addr) { + uint64_t *pte; + unsigned i; + +@@ -1595,12 +1623,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + if (r) + goto error_free; + +- r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, ++ r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, + owner); + if (r) + goto error_free; + +- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); ++ r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); + if (r) + goto error_free; + +@@ -1615,14 +1643,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + if (r) + goto error_free; + +- amdgpu_bo_fence(vm->root.base.bo, f, true); ++ amdgpu_bo_fence(vm->root.bo, f, true); + dma_fence_put(*fence); + *fence = f; + return 0; + + error_free: + amdgpu_job_free(job); +- amdgpu_vm_invalidate_level(vm, &vm->root); ++ amdgpu_vm_invalidate_level(&vm->root); + return r; + } + +@@ -1647,13 +1675,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + dma_addr_t *pages_addr, + struct amdgpu_vm *vm, + struct amdgpu_bo_va_mapping *mapping, +- uint64_t vram_base_offset, + uint64_t flags, + struct ttm_mem_reg *mem, + struct dma_fence **fence) + { + struct drm_mm_node *nodes = mem ? mem->mm_node : NULL; +- uint64_t pfn, start = mapping->start; ++ uint64_t pfn, src = 0, start = mapping->start; + int r; + + /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here +@@ -1704,12 +1731,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + max_entries = min(max_entries, 16ull * 1024ull); + break; + case AMDGPU_PL_DGMA: +- addr += vram_base_offset + ++ addr += adev->vm_manager.vram_base_offset + + adev->mman.bdev.man[mem->mem_type].gpu_offset - + adev->mman.bdev.man[TTM_PL_VRAM].gpu_offset; + break; + case TTM_PL_VRAM: +- addr += vram_base_offset; ++ addr += adev->vm_manager.vram_base_offset; + break; + default: + break; +@@ -1722,7 +1749,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + addr += pfn << PAGE_SHIFT; + + last = min((uint64_t)mapping->last, start + max_entries - 1); +- r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, ++ r = amdgpu_vm_bo_update_mapping(adev, exclusive, ++ src, pages_addr, vm, + start, last, flags, addr, + fence); + if (r) +@@ -1760,10 +1788,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, + dma_addr_t *pages_addr = NULL; + struct ttm_mem_reg *mem; + struct drm_mm_node *nodes; +- struct dma_fence *exclusive, **last_update; ++ struct dma_fence *exclusive; + uint64_t flags; +- uint64_t vram_base_offset = adev->vm_manager.vram_base_offset; +- struct amdgpu_device *bo_adev; + int r; + + if (clear || !bo_va->base.bo) { +@@ -1785,54 +1811,43 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, + exclusive = reservation_object_get_excl(bo->tbo.resv); + } + +- if (bo) { ++ if (bo) + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); +- bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); +- if (mem && mem->mem_type == TTM_PL_VRAM && +- adev != bo_adev) { +- flags |= AMDGPU_PTE_SYSTEM; +- vram_base_offset = bo_adev->mc.aper_base; +- } +- } else +- flags = 0x0; +- +- if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) +- last_update = &vm->last_update; + else +- last_update = &bo_va->last_pt_update; +- +- if (!clear && bo_va->base.moved) { +- bo_va->base.moved = false; +- list_splice_init(&bo_va->valids, &bo_va->invalids); ++ flags = 0x0; + +- } else if (bo_va->cleared != clear) { ++ spin_lock(&vm->status_lock); ++ if (!list_empty(&bo_va->base.vm_status)) + list_splice_init(&bo_va->valids, &bo_va->invalids); +- } ++ spin_unlock(&vm->status_lock); + + list_for_each_entry(mapping, &bo_va->invalids, list) { + r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, +- mapping, vram_base_offset, flags, +- mem, last_update); ++ mapping, flags, mem, ++ &bo_va->last_pt_update); + if (r) + return r; + } + +- if (vm->use_cpu_for_update) { +- /* Flush HDP */ +- mb(); +- amdgpu_gart_flush_gpu_tlb(adev, 0); ++ if (trace_amdgpu_vm_bo_mapping_enabled()) { ++ list_for_each_entry(mapping, &bo_va->valids, list) ++ trace_amdgpu_vm_bo_mapping(mapping); ++ ++ list_for_each_entry(mapping, &bo_va->invalids, list) ++ trace_amdgpu_vm_bo_mapping(mapping); + } + + spin_lock(&vm->status_lock); ++ list_splice_init(&bo_va->invalids, &bo_va->valids); + list_del_init(&bo_va->base.vm_status); ++ if (clear) ++ list_add(&bo_va->base.vm_status, &vm->cleared); + spin_unlock(&vm->status_lock); + +- list_splice_init(&bo_va->invalids, &bo_va->valids); +- bo_va->cleared = clear; +- +- if (trace_amdgpu_vm_bo_mapping_enabled()) { +- list_for_each_entry(mapping, &bo_va->valids, list) +- trace_amdgpu_vm_bo_mapping(mapping); ++ if (vm->use_cpu_for_update) { ++ /* Flush HDP */ ++ mb(); ++ amdgpu_gart_flush_gpu_tlb(adev, 0); + } + + return 0; +@@ -1940,7 +1955,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, + */ + static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) + { +- struct reservation_object *resv = vm->root.base.bo->tbo.resv; ++ struct reservation_object *resv = vm->root.bo->tbo.resv; + struct dma_fence *excl, **shared; + unsigned i, shared_count; + int r; +@@ -1998,7 +2013,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, + if (vm->pte_support_ats) + init_pte_value = AMDGPU_PTE_SYSTEM; + +- r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, ++ r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, + mapping->start, mapping->last, + init_pte_value, 0, &f); + amdgpu_vm_free_mapping(adev, vm, mapping, f); +@@ -2020,35 +2035,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, + } + + /** +- * amdgpu_vm_handle_moved - handle moved BOs in the PT ++ * amdgpu_vm_clear_moved - clear moved BOs in the PT + * + * @adev: amdgpu_device pointer + * @vm: requested vm +- * @sync: sync object to add fences to + * +- * Make sure all BOs which are moved are updated in the PTs. ++ * Make sure all moved BOs are cleared in the PT. + * Returns 0 for success. + * +- * PTs have to be reserved! ++ * PTs have to be reserved and mutex must be locked! + */ +-int amdgpu_vm_handle_moved(struct amdgpu_device *adev, +- struct amdgpu_vm *vm) ++int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, ++ struct amdgpu_sync *sync) + { +- bool clear; ++ struct amdgpu_bo_va *bo_va = NULL; + int r = 0; + + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { +- struct amdgpu_bo_va *bo_va; +- + bo_va = list_first_entry(&vm->moved, + struct amdgpu_bo_va, base.vm_status); + spin_unlock(&vm->status_lock); + +- /* Per VM BOs never need to bo cleared in the page tables */ +- clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; +- +- r = amdgpu_vm_bo_update(adev, bo_va, clear); ++ r = amdgpu_vm_bo_update(adev, bo_va, true); + if (r) + return r; + +@@ -2056,6 +2065,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, + } + spin_unlock(&vm->status_lock); + ++ if (bo_va) ++ r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); ++ + return r; + } + +@@ -2097,39 +2109,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, + return bo_va; + } + +- +-/** +- * amdgpu_vm_bo_insert_mapping - insert a new mapping +- * +- * @adev: amdgpu_device pointer +- * @bo_va: bo_va to store the address +- * @mapping: the mapping to insert +- * +- * Insert a new mapping into all structures. +- */ +-static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, +- struct amdgpu_bo_va *bo_va, +- struct amdgpu_bo_va_mapping *mapping) +-{ +- struct amdgpu_vm *vm = bo_va->base.vm; +- struct amdgpu_bo *bo = bo_va->base.bo; +- +- mapping->bo_va = bo_va; +- list_add(&mapping->list, &bo_va->invalids); +- amdgpu_vm_it_insert(mapping, &vm->va); +- +- if (mapping->flags & AMDGPU_PTE_PRT) +- amdgpu_vm_prt_get(adev); +- +- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { +- spin_lock(&vm->status_lock); +- if (list_empty(&bo_va->base.vm_status)) +- list_add(&bo_va->base.vm_status, &vm->moved); +- spin_unlock(&vm->status_lock); +- } +- trace_amdgpu_vm_bo_map(bo_va, mapping); +-} +- + /** + * amdgpu_vm_bo_map - map bo inside a vm + * +@@ -2181,12 +2160,18 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, + if (!mapping) + return -ENOMEM; + ++ INIT_LIST_HEAD(&mapping->list); + mapping->start = saddr; + mapping->last = eaddr; + mapping->offset = offset; + mapping->flags = flags; + +- amdgpu_vm_bo_insert_map(adev, bo_va, mapping); ++ list_add(&mapping->list, &bo_va->invalids); ++ amdgpu_vm_it_insert(mapping, &vm->va); ++ ++ if (flags & AMDGPU_PTE_PRT) ++ amdgpu_vm_prt_get(adev); ++ trace_amdgpu_vm_bo_map(bo_va, mapping); + + return 0; + } +@@ -2213,6 +2198,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, + { + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *bo = bo_va->base.bo; ++ struct amdgpu_vm *vm = bo_va->base.vm; + uint64_t eaddr; + int r; + +@@ -2246,7 +2232,12 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, + mapping->offset = offset; + mapping->flags = flags; + +- amdgpu_vm_bo_insert_map(adev, bo_va, mapping); ++ list_add(&mapping->list, &bo_va->invalids); ++ amdgpu_vm_it_insert(mapping, &vm->va); ++ ++ if (flags & AMDGPU_PTE_PRT) ++ amdgpu_vm_prt_get(adev); ++ trace_amdgpu_vm_bo_map(bo_va, mapping); + + return 0; + } +@@ -2292,7 +2283,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, + + list_del(&mapping->list); + amdgpu_vm_it_remove(mapping, &vm->va); +- mapping->bo_va = NULL; + trace_amdgpu_vm_bo_unmap(bo_va, mapping); + + if (valid) +@@ -2378,7 +2368,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, + if (tmp->last > eaddr) + tmp->last = eaddr; + +- tmp->bo_va = NULL; + list_add(&tmp->list, &vm->freed); + trace_amdgpu_vm_bo_unmap(NULL, tmp); + } +@@ -2405,19 +2394,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, + } + + /** +- * amdgpu_vm_bo_lookup_mapping - find mapping by address +- * +- * @vm: the requested VM +- * +- * Find a mapping by it's address. +- */ +-struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, +- uint64_t addr) +-{ +- return amdgpu_vm_it_iter_first(&vm->va, addr, addr); +-} +- +-/** + * amdgpu_vm_bo_rmv - remove a bo to a specific vm + * + * @adev: amdgpu_device pointer +@@ -2442,7 +2418,6 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, + list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { + list_del(&mapping->list); + amdgpu_vm_it_remove(mapping, &vm->va); +- mapping->bo_va = NULL; + trace_amdgpu_vm_bo_unmap(bo_va, mapping); + list_add(&mapping->list, &vm->freed); + } +@@ -2467,36 +2442,15 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, + * Mark @bo as invalid. + */ + void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, +- struct amdgpu_bo *bo, bool evicted) ++ struct amdgpu_bo *bo) + { + struct amdgpu_vm_bo_base *bo_base; + + list_for_each_entry(bo_base, &bo->va, bo_list) { +- struct amdgpu_vm *vm = bo_base->vm; +- +- bo_base->moved = true; +- if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { +- spin_lock(&bo_base->vm->status_lock); +- if (bo->tbo.type == ttm_bo_type_kernel) +- list_move(&bo_base->vm_status, &vm->evicted); +- else +- list_move_tail(&bo_base->vm_status, +- &vm->evicted); +- spin_unlock(&bo_base->vm->status_lock); +- continue; +- } +- +- if (bo->tbo.type == ttm_bo_type_kernel) { +- spin_lock(&bo_base->vm->status_lock); +- if (list_empty(&bo_base->vm_status)) +- list_add(&bo_base->vm_status, &vm->relocated); +- spin_unlock(&bo_base->vm->status_lock); +- continue; +- } +- + spin_lock(&bo_base->vm->status_lock); + if (list_empty(&bo_base->vm_status)) +- list_add(&bo_base->vm_status, &vm->moved); ++ list_add(&bo_base->vm_status, ++ &bo_base->vm->moved); + spin_unlock(&bo_base->vm->status_lock); + } + } +@@ -2577,14 +2531,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + u64 flags; + uint64_t init_pde_value = 0; + +- vm->va = RB_ROOT_CACHED; ++ vm->va = RB_ROOT; + vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + vm->reserved_vmid[i] = NULL; + spin_lock_init(&vm->status_lock); +- INIT_LIST_HEAD(&vm->evicted); +- INIT_LIST_HEAD(&vm->relocated); + INIT_LIST_HEAD(&vm->moved); ++ INIT_LIST_HEAD(&vm->cleared); + INIT_LIST_HEAD(&vm->freed); + + /* create scheduler entity for page table updates */ +@@ -2615,7 +2568,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); +- vm->last_update = NULL; ++ vm->last_dir_update = NULL; + + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; +@@ -2628,31 +2581,46 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, + AMDGPU_GEM_DOMAIN_VRAM, + flags, +- NULL, NULL, init_pde_value, &vm->root.base.bo); ++ NULL, NULL, init_pde_value, &vm->root.bo); + if (r) + goto error_free_sched_entity; + +- vm->root.base.vm = vm; +- list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); +- INIT_LIST_HEAD(&vm->root.base.vm_status); ++ r = amdgpu_bo_reserve(vm->root.bo, false); ++ if (r) ++ goto error_free_root; ++ ++ vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + + if (vm->use_cpu_for_update) { +- r = amdgpu_bo_reserve(vm->root.base.bo, false); ++ r = amdgpu_bo_kmap(vm->root.bo, NULL); + if (r) + goto error_free_root; ++ } + +- r = amdgpu_bo_kmap(vm->root.base.bo, NULL); +- if (r) +- goto error_free_root; +- amdgpu_bo_unreserve(vm->root.base.bo); ++ amdgpu_bo_unreserve(vm->root.bo); ++ ++ vm->vm_context = vm_context; ++ if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { ++ mutex_lock(&adev->vm_manager.lock); ++ ++ if (adev->vm_manager.n_compute_vms++ == 0) { ++ /* First Compute VM: enable compute power profile */ ++ if (adev->pp_enabled) ++ amdgpu_dpm_switch_power_profile(adev, ++ AMD_PP_COMPUTE_PROFILE); ++ else if (adev->pm.funcs->switch_power_profile) ++ adev->pm.funcs->switch_power_profile(adev, ++ AMD_PP_COMPUTE_PROFILE); ++ } ++ mutex_unlock(&adev->vm_manager.lock); + } + + return 0; + + error_free_root: +- amdgpu_bo_unref(&vm->root.base.bo->shadow); +- amdgpu_bo_unref(&vm->root.base.bo); +- vm->root.base.bo = NULL; ++ amdgpu_bo_unref(&vm->root.bo->shadow); ++ amdgpu_bo_unref(&vm->root.bo); ++ vm->root.bo = NULL; + + error_free_sched_entity: + amd_sched_entity_fini(&ring->sched, &vm->entity); +@@ -2671,18 +2639,20 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) + { + unsigned i; + +- if (level->base.bo) { +- list_del(&level->base.bo_list); +- list_del(&level->base.vm_status); +- amdgpu_bo_unref(&level->base.bo->shadow); +- amdgpu_bo_unref(&level->base.bo); ++ if (level->bo) { ++ amdgpu_bo_unref(&level->bo->shadow); ++ amdgpu_bo_unref(&level->bo); + } + + if (level->entries) + for (i = 0; i <= level->last_entry_used; i++) + amdgpu_vm_free_levels(&level->entries[i]); + ++#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) ++ drm_free_large(level->entries); ++#else + kvfree(level->entries); ++#endif + } + + /** +@@ -2698,16 +2668,31 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) + { + struct amdgpu_bo_va_mapping *mapping, *tmp; + bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; +- struct amdgpu_bo *root; +- int i, r; ++ int i; ++ ++ if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { ++ mutex_lock(&adev->vm_manager.lock); ++ ++ WARN(adev->vm_manager.n_compute_vms == 0, "Unbalanced number of Compute VMs"); ++ ++ if (--adev->vm_manager.n_compute_vms == 0) { ++ /* Last Compute VM: enable graphics power profile */ ++ if (adev->pp_enabled) ++ amdgpu_dpm_switch_power_profile(adev, ++ AMD_PP_GFX_PROFILE); ++ else if (adev->pm.funcs->switch_power_profile) ++ adev->pm.funcs->switch_power_profile(adev, ++ AMD_PP_GFX_PROFILE); ++ } ++ mutex_unlock(&adev->vm_manager.lock); ++ } + + amd_sched_entity_fini(vm->entity.sched, &vm->entity); + +- if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { ++ if (!RB_EMPTY_ROOT(&vm->va)) { + dev_err(adev->dev, "still active bo inside vm\n"); + } +- rbtree_postorder_for_each_entry_safe(mapping, tmp, +- &vm->va.rb_root, rb) { ++ rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { + list_del(&mapping->list); + amdgpu_vm_it_remove(mapping, &vm->va); + kfree(mapping); +@@ -2721,9 +2706,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) + list_del(&mapping->list); + amdgpu_vm_free_mapping(adev, vm, mapping, NULL); + } +- ++ + amdgpu_vm_free_levels(&vm->root); +- dma_fence_put(vm->last_update); ++ dma_fence_put(vm->last_dir_update); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + amdgpu_vm_free_reserved_vmid(adev, vm, i); + } +@@ -2755,8 +2740,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) + } + } + +- adev->vm_manager.fence_context = +- dma_fence_context_alloc(AMDGPU_MAX_RINGS); ++ adev->vm_manager.fence_context = kcl_fence_context_alloc(AMDGPU_MAX_RINGS); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + adev->vm_manager.seqno[i] = 0; + +@@ -2781,6 +2765,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) + adev->vm_manager.vm_update_mode = 0; + #endif + ++ adev->vm_manager.n_compute_vms = 0; + } + + /** +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +index 28cf20b..415e659 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +@@ -153,6 +153,9 @@ struct amdgpu_vm { + /* dedicated to vm */ + struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; + ++ /* Whether this is a Compute or GFX Context */ ++ int vm_context; ++ + /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ + bool use_cpu_for_update; + +@@ -220,6 +223,8 @@ struct amdgpu_vm_manager { + * BIT1[= 0] Compute updated by SDMA [= 1] by CPU + */ + int vm_update_mode; ++ /* Number of Compute VMs, used for detecting Compute activity */ ++ unsigned n_compute_vms; + }; + + void amdgpu_vm_manager_init(struct amdgpu_device *adev); +-- +2.7.4 + |