diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch | 1270 |
1 files changed, 0 insertions, 1270 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch deleted file mode 100644 index 7fc9f02c..00000000 --- a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1586-drm-amdgpu-Automatic-power-profile-switching.patch +++ /dev/null @@ -1,1270 +0,0 @@ -From 8c8e41b130f06c684c2455d26ff4523264fdfdef Mon Sep 17 00:00:00 2001 -From: Felix Kuehling <Felix.Kuehling@amd.com> -Date: Wed, 5 Oct 2016 16:25:45 -0400 -Subject: [PATCH 1586/4131] drm/amdgpu: Automatic power profile switching - -Switch between compute and graphic profiles automatically when KFD -compute work starts and stops. It uses the number of KFD VMs as a -criteria for the existence of KFD compute work. - -Change-Id: I11d34f45d901f4dd1e16e4a64c1ad1010088d9b8 -Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> - - Conflicts: - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 625 +++++++++++------------ - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 + - 3 files changed, 311 insertions(+), 321 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -index 3ec1ff1..155de54 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -@@ -1327,7 +1327,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - return -ENOMEM; - - /* Initialize the VM context, allocate the page directory and zero it */ -- ret = amdgpu_vm_init(adev, &new_vm->base); -+ ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE); - if (ret != 0) { - pr_err("Failed init vm ret %d\n", ret); - /* Undo everything related to the new VM context */ -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c -index bdf2d6c..c300397 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c -@@ -25,8 +25,14 @@ - * Alex Deucher - * Jerome Glisse - */ -+#if defined(BUILD_AS_DKMS) -+#include <kcl/kcl_fence_array.h> -+#else - #include <linux/dma-fence-array.h> -+#endif -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) - #include <linux/interval_tree_generic.h> -+#endif - #include <drm/drmP.h> - #include <drm/amdgpu_drm.h> - #include "amdgpu.h" -@@ -140,7 +146,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, - struct list_head *validated, - struct amdgpu_bo_list_entry *entry) - { -- entry->robj = vm->root.base.bo; -+ entry->robj = vm->root.bo; - entry->priority = 0; - entry->tv.bo = &entry->robj->tbo; - entry->tv.shared = true; -@@ -149,6 +155,61 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, - } - - /** -+ * amdgpu_vm_validate_layer - validate a single page table level -+ * -+ * @parent: parent page table level -+ * @validate: callback to do the validation -+ * @param: parameter for the validation callback -+ * -+ * Validate the page table BOs on command submission if neccessary. -+ */ -+static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, -+ int (*validate)(void *, struct amdgpu_bo *), -+ void *param, bool use_cpu_for_update, -+ struct ttm_bo_global *glob) -+{ -+ unsigned i; -+ int r; -+ -+ if (use_cpu_for_update) { -+ r = amdgpu_bo_kmap(parent->bo, NULL); -+ if (r) -+ return r; -+ } -+ -+ if (!parent->entries) -+ return 0; -+ -+ for (i = 0; i <= parent->last_entry_used; ++i) { -+ struct amdgpu_vm_pt *entry = &parent->entries[i]; -+ -+ if (!entry->bo) -+ continue; -+ -+ r = validate(param, entry->bo); -+ if (r) -+ return r; -+ -+ spin_lock(&glob->lru_lock); -+ ttm_bo_move_to_lru_tail(&entry->bo->tbo); -+ if (entry->bo->shadow) -+ ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); -+ spin_unlock(&glob->lru_lock); -+ -+ /* -+ * Recurse into the sub directory. This is harmless because we -+ * have only a maximum of 5 layers. -+ */ -+ r = amdgpu_vm_validate_level(entry, validate, param, -+ use_cpu_for_update, glob); -+ if (r) -+ return r; -+ } -+ -+ return r; -+} -+ -+/** - * amdgpu_vm_validate_pt_bos - validate the page table BOs - * - * @adev: amdgpu device pointer -@@ -162,47 +223,32 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*validate)(void *p, struct amdgpu_bo *bo), - void *param) - { -- struct ttm_bo_global *glob = adev->mman.bdev.glob; -- int r; -- -- spin_lock(&vm->status_lock); -- while (!list_empty(&vm->evicted)) { -- struct amdgpu_vm_bo_base *bo_base; -- struct amdgpu_bo *bo; -- -- bo_base = list_first_entry(&vm->evicted, -- struct amdgpu_vm_bo_base, -- vm_status); -- spin_unlock(&vm->status_lock); -+ uint64_t num_evictions; - -- bo = bo_base->bo; -- BUG_ON(!bo); -- if (bo->parent) { -- r = validate(param, bo); -- if (r) -- return r; -+ /* We only need to validate the page tables -+ * if they aren't already valid. -+ */ -+ num_evictions = atomic64_read(&adev->num_evictions); -+ if (num_evictions == vm->last_eviction_counter) -+ return 0; - -- spin_lock(&glob->lru_lock); -- ttm_bo_move_to_lru_tail(&bo->tbo); -- if (bo->shadow) -- ttm_bo_move_to_lru_tail(&bo->shadow->tbo); -- spin_unlock(&glob->lru_lock); -- } -+ return amdgpu_vm_validate_level(&vm->root, validate, param, -+ vm->use_cpu_for_update, -+ adev->mman.bdev.glob); -+} - -- if (bo->tbo.type == ttm_bo_type_kernel && -- vm->use_cpu_for_update) { -- r = amdgpu_bo_kmap(bo, NULL); -- if (r) -- return r; -- } -+/** -+ * amdgpu_vm_check - helper for amdgpu_vm_ready -+ */ -+static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo) -+{ -+ /* if anything is swapped out don't swap it in here, -+ just abort and wait for the next CS */ -+ if (!amdgpu_bo_gpu_accessible(bo)) -+ return -ERESTARTSYS; - -- spin_lock(&vm->status_lock); -- if (bo->tbo.type != ttm_bo_type_kernel) -- list_move(&bo_base->vm_status, &vm->moved); -- else -- list_move(&bo_base->vm_status, &vm->relocated); -- } -- spin_unlock(&vm->status_lock); -+ if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow)) -+ return -ERESTARTSYS; - - return 0; - } -@@ -210,19 +256,17 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - /** - * amdgpu_vm_ready - check VM is ready for updates - * -+ * @adev: amdgpu device - * @vm: VM to check - * - * Check if all VM PDs/PTs are ready for updates - */ --bool amdgpu_vm_ready(struct amdgpu_vm *vm) -+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm) - { -- bool ready; -- -- spin_lock(&vm->status_lock); -- ready = list_empty(&vm->evicted); -- spin_unlock(&vm->status_lock); -+ if (amdgpu_vm_check(NULL, vm->root.bo)) -+ return false; - -- return ready; -+ return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL); - } - - /** -@@ -251,9 +295,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, - if (!parent->entries) { - unsigned num_entries = amdgpu_vm_num_entries(adev, level); - -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) -+ parent->entries = drm_calloc_large(num_entries, -+ sizeof(struct amdgpu_vm_pt)); -+#else - parent->entries = kvmalloc_array(num_entries, - sizeof(struct amdgpu_vm_pt), - GFP_KERNEL | __GFP_ZERO); -+#endif - if (!parent->entries) - return -ENOMEM; - memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); -@@ -288,11 +337,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, - - /* walk over the address space and allocate the page tables */ - for (pt_idx = from; pt_idx <= to; ++pt_idx) { -- struct reservation_object *resv = vm->root.base.bo->tbo.resv; -+ struct reservation_object *resv = vm->root.bo->tbo.resv; - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - struct amdgpu_bo *pt; - -- if (!entry->base.bo) { -+ if (!entry->bo) { - r = amdgpu_bo_create(adev, - amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, true, -@@ -313,14 +362,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ -- pt->parent = amdgpu_bo_ref(parent->base.bo); -- -- entry->base.vm = vm; -- entry->base.bo = pt; -- list_add_tail(&entry->base.bo_list, &pt->va); -- spin_lock(&vm->status_lock); -- list_add(&entry->base.vm_status, &vm->relocated); -- spin_unlock(&vm->status_lock); -+ pt->parent = amdgpu_bo_ref(vm->root.bo); -+ -+ entry->bo = pt; - entry->addr = 0; - } - -@@ -987,7 +1031,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int r; - - amdgpu_sync_create(&sync); -- amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); -+ amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); - r = amdgpu_sync_wait(&sync, true); - amdgpu_sync_free(&sync); - -@@ -1006,17 +1050,18 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, - */ - static int amdgpu_vm_update_level(struct amdgpu_device *adev, - struct amdgpu_vm *vm, -- struct amdgpu_vm_pt *parent) -+ struct amdgpu_vm_pt *parent, -+ unsigned level) - { - struct amdgpu_bo *shadow; - struct amdgpu_ring *ring = NULL; - uint64_t pd_addr, shadow_addr = 0; -+ uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); - uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw = 0; - struct amdgpu_job *job; - struct amdgpu_pte_update_params params; - struct dma_fence *fence = NULL; -- uint32_t incr; - - int r; - -@@ -1025,10 +1070,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - - memset(¶ms, 0, sizeof(params)); - params.adev = adev; -- shadow = parent->base.bo->shadow; -+ shadow = parent->bo->shadow; - - if (vm->use_cpu_for_update) { -- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); -+ pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); - r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); - if (unlikely(r)) - return r; -@@ -1044,7 +1089,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - /* assume the worst case */ - ndw += parent->last_entry_used * 6; - -- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); -+ pd_addr = amdgpu_bo_gpu_offset(parent->bo); - - if (shadow) { - shadow_addr = amdgpu_bo_gpu_offset(shadow); -@@ -1064,17 +1109,12 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - - /* walk over the address space and update the directory */ - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { -- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; -- struct amdgpu_bo *bo = entry->base.bo; -+ struct amdgpu_bo *bo = parent->entries[pt_idx].bo; - uint64_t pde, pt; - - if (bo == NULL) - continue; - -- spin_lock(&vm->status_lock); -- list_del_init(&entry->base.vm_status); -- spin_unlock(&vm->status_lock); -- - pt = amdgpu_bo_gpu_offset(bo); - pt = amdgpu_gart_get_vm_pde(adev, pt); - /* Don't update huge pages here */ -@@ -1085,7 +1125,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; - - pde = pd_addr + pt_idx * 8; -- incr = amdgpu_bo_size(bo); - if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt) || - (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { -@@ -1113,7 +1152,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - } - - if (count) { -- if (vm->root.base.bo->shadow) -+ if (vm->root.bo->shadow) - params.func(¶ms, last_shadow, last_pt, - count, incr, AMDGPU_PTE_VALID); - -@@ -1126,8 +1165,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - amdgpu_job_free(job); - } else { - amdgpu_ring_pad_ib(ring, params.ib); -- amdgpu_sync_resv(adev, &job->sync, -- parent->base.bo->tbo.resv, -+ amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, -@@ -1140,11 +1178,26 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - if (r) - goto error_free; - -- amdgpu_bo_fence(parent->base.bo, fence, true); -- dma_fence_put(vm->last_update); -- vm->last_update = fence; -+ amdgpu_bo_fence(parent->bo, fence, true); -+ dma_fence_put(vm->last_dir_update); -+ vm->last_dir_update = dma_fence_get(fence); -+ dma_fence_put(fence); - } - } -+ /* -+ * Recurse into the subdirectories. This recursion is harmless because -+ * we only have a maximum of 5 layers. -+ */ -+ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { -+ struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; -+ -+ if (!entry->bo) -+ continue; -+ -+ r = amdgpu_vm_update_level(adev, vm, entry, level + 1); -+ if (r) -+ return r; -+ } - - return 0; - -@@ -1160,8 +1213,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, - * - * Mark all PD level as invalid after an error. - */ --static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, -- struct amdgpu_vm_pt *parent) -+static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) - { - unsigned pt_idx; - -@@ -1172,15 +1224,11 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, - for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - -- if (!entry->base.bo) -+ if (!entry->bo) - continue; - - entry->addr = ~0ULL; -- spin_lock(&vm->status_lock); -- if (list_empty(&entry->base.vm_status)) -- list_add(&entry->base.vm_status, &vm->relocated); -- spin_unlock(&vm->status_lock); -- amdgpu_vm_invalidate_level(vm, entry); -+ amdgpu_vm_invalidate_level(entry); - } - } - -@@ -1196,40 +1244,11 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm, - int amdgpu_vm_update_directories(struct amdgpu_device *adev, - struct amdgpu_vm *vm) - { -- int r = 0; -- -- spin_lock(&vm->status_lock); -- while (!list_empty(&vm->relocated)) { -- struct amdgpu_vm_bo_base *bo_base; -- struct amdgpu_bo *bo; -- -- bo_base = list_first_entry(&vm->relocated, -- struct amdgpu_vm_bo_base, -- vm_status); -- spin_unlock(&vm->status_lock); -- -- bo = bo_base->bo->parent; -- if (bo) { -- struct amdgpu_vm_bo_base *parent; -- struct amdgpu_vm_pt *pt; -- -- parent = list_first_entry(&bo->va, -- struct amdgpu_vm_bo_base, -- bo_list); -- pt = container_of(parent, struct amdgpu_vm_pt, base); -+ int r; - -- r = amdgpu_vm_update_level(adev, vm, pt); -- if (r) { -- amdgpu_vm_invalidate_level(vm, &vm->root); -- return r; -- } -- spin_lock(&vm->status_lock); -- } else { -- spin_lock(&vm->status_lock); -- list_del_init(&bo_base->vm_status); -- } -- } -- spin_unlock(&vm->status_lock); -+ r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); -+ if (r) -+ amdgpu_vm_invalidate_level(&vm->root); - - if (vm->use_cpu_for_update) { - /* Flush HDP */ -@@ -1260,7 +1279,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, - *entry = &p->vm->root; - while ((*entry)->entries) { - idx = addr >> (p->adev->vm_manager.block_size * level--); -- idx %= amdgpu_bo_size((*entry)->base.bo) / 8; -+ idx %= amdgpu_bo_size((*entry)->bo) / 8; - *parent = *entry; - *entry = &(*entry)->entries[idx]; - } -@@ -1296,7 +1315,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, - p->src || - !(flags & AMDGPU_PTE_VALID)) { - -- dst = amdgpu_bo_gpu_offset(entry->base.bo); -+ dst = amdgpu_bo_gpu_offset(entry->bo); - dst = amdgpu_gart_get_vm_pde(p->adev, dst); - flags = AMDGPU_PTE_VALID; - } else { -@@ -1322,18 +1341,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, - tmp = p->pages_addr; - p->pages_addr = NULL; - -- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); -+ pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); - pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); - - p->pages_addr = tmp; - } else { -- if (parent->base.bo->shadow) { -- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); -+ if (parent->bo->shadow) { -+ pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); - pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); - } -- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); -+ pd_addr = amdgpu_bo_gpu_offset(parent->bo); - pde = pd_addr + (entry - parent->entries) * 8; - amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); - } -@@ -1384,7 +1403,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, - if (entry->addr & AMDGPU_PDE_PTE) - continue; - -- pt = entry->base.bo; -+ pt = entry->bo; - if (use_cpu_update) { - pe_start = (unsigned long)amdgpu_bo_kptr(pt); - } else { -@@ -1420,6 +1439,8 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) - { -+ int r; -+ - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page -@@ -1438,38 +1459,39 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - */ -- unsigned max_frag = params->adev->vm_manager.fragment_size; -- int r; -+ unsigned pages_per_frag = params->adev->vm_manager.fragment_size; -+ uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); -+ uint64_t frag_align = 1 << pages_per_frag; -+ -+ uint64_t frag_start = ALIGN(start, frag_align); -+ uint64_t frag_end = end & ~(frag_align - 1); - - /* system pages are non continuously */ -- if (params->src || !(flags & AMDGPU_PTE_VALID)) -+ if (params->src || !(flags & AMDGPU_PTE_VALID) || -+ (frag_start >= frag_end)) - return amdgpu_vm_update_ptes(params, start, end, dst, flags); - -- while (start != end) { -- uint64_t frag_flags, frag_end; -- unsigned frag; -- -- /* This intentionally wraps around if no bit is set */ -- frag = min((unsigned)ffs(start) - 1, -- (unsigned)fls64(end - start) - 1); -- if (frag >= max_frag) { -- frag_flags = AMDGPU_PTE_FRAG(max_frag); -- frag_end = end & ~((1ULL << max_frag) - 1); -- } else { -- frag_flags = AMDGPU_PTE_FRAG(frag); -- frag_end = start + (1 << frag); -- } -- -- r = amdgpu_vm_update_ptes(params, start, frag_end, dst, -- flags | frag_flags); -+ /* handle the 4K area at the beginning */ -+ if (start != frag_start) { -+ r = amdgpu_vm_update_ptes(params, start, frag_start, -+ dst, flags); - if (r) - return r; -- -- dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; -- start = frag_end; -+ dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; - } - -- return 0; -+ /* handle the area in the middle */ -+ r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, -+ flags | frag_flags); -+ if (r) -+ return r; -+ -+ /* handle the 4K area at the end */ -+ if (frag_end != end) { -+ dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; -+ r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); -+ } -+ return r; - } - - /** -@@ -1477,6 +1499,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - * - * @adev: amdgpu_device pointer - * @exclusive: fence we need to sync to -+ * @src: address where to copy page table entries from - * @pages_addr: DMA addresses to use for mapping - * @vm: requested vm - * @start: start of mapped range -@@ -1490,6 +1513,7 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - */ - static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct dma_fence *exclusive, -+ uint64_t src, - dma_addr_t *pages_addr, - struct amdgpu_vm *vm, - uint64_t start, uint64_t last, -@@ -1507,6 +1531,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.vm = vm; -+ params.src = src; - - /* sync to everything on unmapping */ - if (!(flags & AMDGPU_PTE_VALID)) -@@ -1535,12 +1560,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - nptes = last - start + 1; - - /* -- * reserve space for two commands every (1 << BLOCK_SIZE) -+ * reserve space for one command every (1 << BLOCK_SIZE) - * entries or 2k dwords (whatever is smaller) -- * -- * The second command is for the shadow pagetables. - */ -- ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2; -+ ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1; - - /* padding, etc. */ - ndw = 64; -@@ -1548,9 +1571,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - /* one PDE write for each huge page */ - ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; - -- if (pages_addr) { -+ if (src) { -+ /* only copy commands needed */ -+ ndw += ncmds * 7; -+ -+ params.func = amdgpu_vm_do_copy_ptes; -+ -+ } else if (pages_addr) { - /* copy commands needed */ -- ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; -+ ndw += ncmds * 7; - - /* and also PTEs */ - ndw += nptes * 2; -@@ -1559,11 +1588,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - - } else { - /* set page commands needed */ -- ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; -+ ndw += ncmds * 10; - -- /* extra commands for begin/end fragments */ -- ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw -- * adev->vm_manager.fragment_size; -+ /* two extra commands for begin/end of fragment */ -+ ndw += 2 * 10; - - params.func = amdgpu_vm_do_set_ptes; - } -@@ -1574,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - - params.ib = &job->ibs[0]; - -- if (pages_addr) { -+ if (!src && pages_addr) { - uint64_t *pte; - unsigned i; - -@@ -1595,12 +1623,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - if (r) - goto error_free; - -- r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, -+ r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv, - owner); - if (r) - goto error_free; - -- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); -+ r = reservation_object_reserve_shared(vm->root.bo->tbo.resv); - if (r) - goto error_free; - -@@ -1615,14 +1643,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - if (r) - goto error_free; - -- amdgpu_bo_fence(vm->root.base.bo, f, true); -+ amdgpu_bo_fence(vm->root.bo, f, true); - dma_fence_put(*fence); - *fence = f; - return 0; - - error_free: - amdgpu_job_free(job); -- amdgpu_vm_invalidate_level(vm, &vm->root); -+ amdgpu_vm_invalidate_level(&vm->root); - return r; - } - -@@ -1647,13 +1675,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, - dma_addr_t *pages_addr, - struct amdgpu_vm *vm, - struct amdgpu_bo_va_mapping *mapping, -- uint64_t vram_base_offset, - uint64_t flags, - struct ttm_mem_reg *mem, - struct dma_fence **fence) - { - struct drm_mm_node *nodes = mem ? mem->mm_node : NULL; -- uint64_t pfn, start = mapping->start; -+ uint64_t pfn, src = 0, start = mapping->start; - int r; - - /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here -@@ -1704,12 +1731,12 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, - max_entries = min(max_entries, 16ull * 1024ull); - break; - case AMDGPU_PL_DGMA: -- addr += vram_base_offset + -+ addr += adev->vm_manager.vram_base_offset + - adev->mman.bdev.man[mem->mem_type].gpu_offset - - adev->mman.bdev.man[TTM_PL_VRAM].gpu_offset; - break; - case TTM_PL_VRAM: -- addr += vram_base_offset; -+ addr += adev->vm_manager.vram_base_offset; - break; - default: - break; -@@ -1722,7 +1749,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, - addr += pfn << PAGE_SHIFT; - - last = min((uint64_t)mapping->last, start + max_entries - 1); -- r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, -+ r = amdgpu_vm_bo_update_mapping(adev, exclusive, -+ src, pages_addr, vm, - start, last, flags, addr, - fence); - if (r) -@@ -1760,10 +1788,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, - dma_addr_t *pages_addr = NULL; - struct ttm_mem_reg *mem; - struct drm_mm_node *nodes; -- struct dma_fence *exclusive, **last_update; -+ struct dma_fence *exclusive; - uint64_t flags; -- uint64_t vram_base_offset = adev->vm_manager.vram_base_offset; -- struct amdgpu_device *bo_adev; - int r; - - if (clear || !bo_va->base.bo) { -@@ -1785,54 +1811,43 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, - exclusive = reservation_object_get_excl(bo->tbo.resv); - } - -- if (bo) { -+ if (bo) - flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); -- bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); -- if (mem && mem->mem_type == TTM_PL_VRAM && -- adev != bo_adev) { -- flags |= AMDGPU_PTE_SYSTEM; -- vram_base_offset = bo_adev->mc.aper_base; -- } -- } else -- flags = 0x0; -- -- if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) -- last_update = &vm->last_update; - else -- last_update = &bo_va->last_pt_update; -- -- if (!clear && bo_va->base.moved) { -- bo_va->base.moved = false; -- list_splice_init(&bo_va->valids, &bo_va->invalids); -+ flags = 0x0; - -- } else if (bo_va->cleared != clear) { -+ spin_lock(&vm->status_lock); -+ if (!list_empty(&bo_va->base.vm_status)) - list_splice_init(&bo_va->valids, &bo_va->invalids); -- } -+ spin_unlock(&vm->status_lock); - - list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, -- mapping, vram_base_offset, flags, -- mem, last_update); -+ mapping, flags, mem, -+ &bo_va->last_pt_update); - if (r) - return r; - } - -- if (vm->use_cpu_for_update) { -- /* Flush HDP */ -- mb(); -- amdgpu_gart_flush_gpu_tlb(adev, 0); -+ if (trace_amdgpu_vm_bo_mapping_enabled()) { -+ list_for_each_entry(mapping, &bo_va->valids, list) -+ trace_amdgpu_vm_bo_mapping(mapping); -+ -+ list_for_each_entry(mapping, &bo_va->invalids, list) -+ trace_amdgpu_vm_bo_mapping(mapping); - } - - spin_lock(&vm->status_lock); -+ list_splice_init(&bo_va->invalids, &bo_va->valids); - list_del_init(&bo_va->base.vm_status); -+ if (clear) -+ list_add(&bo_va->base.vm_status, &vm->cleared); - spin_unlock(&vm->status_lock); - -- list_splice_init(&bo_va->invalids, &bo_va->valids); -- bo_va->cleared = clear; -- -- if (trace_amdgpu_vm_bo_mapping_enabled()) { -- list_for_each_entry(mapping, &bo_va->valids, list) -- trace_amdgpu_vm_bo_mapping(mapping); -+ if (vm->use_cpu_for_update) { -+ /* Flush HDP */ -+ mb(); -+ amdgpu_gart_flush_gpu_tlb(adev, 0); - } - - return 0; -@@ -1940,7 +1955,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, - */ - static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) - { -- struct reservation_object *resv = vm->root.base.bo->tbo.resv; -+ struct reservation_object *resv = vm->root.bo->tbo.resv; - struct dma_fence *excl, **shared; - unsigned i, shared_count; - int r; -@@ -1998,7 +2013,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, - if (vm->pte_support_ats) - init_pte_value = AMDGPU_PTE_SYSTEM; - -- r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, -+ r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, - mapping->start, mapping->last, - init_pte_value, 0, &f); - amdgpu_vm_free_mapping(adev, vm, mapping, f); -@@ -2020,35 +2035,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, - } - - /** -- * amdgpu_vm_handle_moved - handle moved BOs in the PT -+ * amdgpu_vm_clear_moved - clear moved BOs in the PT - * - * @adev: amdgpu_device pointer - * @vm: requested vm -- * @sync: sync object to add fences to - * -- * Make sure all BOs which are moved are updated in the PTs. -+ * Make sure all moved BOs are cleared in the PT. - * Returns 0 for success. - * -- * PTs have to be reserved! -+ * PTs have to be reserved and mutex must be locked! - */ --int amdgpu_vm_handle_moved(struct amdgpu_device *adev, -- struct amdgpu_vm *vm) -+int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, -+ struct amdgpu_sync *sync) - { -- bool clear; -+ struct amdgpu_bo_va *bo_va = NULL; - int r = 0; - - spin_lock(&vm->status_lock); - while (!list_empty(&vm->moved)) { -- struct amdgpu_bo_va *bo_va; -- - bo_va = list_first_entry(&vm->moved, - struct amdgpu_bo_va, base.vm_status); - spin_unlock(&vm->status_lock); - -- /* Per VM BOs never need to bo cleared in the page tables */ -- clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv; -- -- r = amdgpu_vm_bo_update(adev, bo_va, clear); -+ r = amdgpu_vm_bo_update(adev, bo_va, true); - if (r) - return r; - -@@ -2056,6 +2065,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - } - spin_unlock(&vm->status_lock); - -+ if (bo_va) -+ r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); -+ - return r; - } - -@@ -2097,39 +2109,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, - return bo_va; - } - -- --/** -- * amdgpu_vm_bo_insert_mapping - insert a new mapping -- * -- * @adev: amdgpu_device pointer -- * @bo_va: bo_va to store the address -- * @mapping: the mapping to insert -- * -- * Insert a new mapping into all structures. -- */ --static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, -- struct amdgpu_bo_va *bo_va, -- struct amdgpu_bo_va_mapping *mapping) --{ -- struct amdgpu_vm *vm = bo_va->base.vm; -- struct amdgpu_bo *bo = bo_va->base.bo; -- -- mapping->bo_va = bo_va; -- list_add(&mapping->list, &bo_va->invalids); -- amdgpu_vm_it_insert(mapping, &vm->va); -- -- if (mapping->flags & AMDGPU_PTE_PRT) -- amdgpu_vm_prt_get(adev); -- -- if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { -- spin_lock(&vm->status_lock); -- if (list_empty(&bo_va->base.vm_status)) -- list_add(&bo_va->base.vm_status, &vm->moved); -- spin_unlock(&vm->status_lock); -- } -- trace_amdgpu_vm_bo_map(bo_va, mapping); --} -- - /** - * amdgpu_vm_bo_map - map bo inside a vm - * -@@ -2181,12 +2160,18 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, - if (!mapping) - return -ENOMEM; - -+ INIT_LIST_HEAD(&mapping->list); - mapping->start = saddr; - mapping->last = eaddr; - mapping->offset = offset; - mapping->flags = flags; - -- amdgpu_vm_bo_insert_map(adev, bo_va, mapping); -+ list_add(&mapping->list, &bo_va->invalids); -+ amdgpu_vm_it_insert(mapping, &vm->va); -+ -+ if (flags & AMDGPU_PTE_PRT) -+ amdgpu_vm_prt_get(adev); -+ trace_amdgpu_vm_bo_map(bo_va, mapping); - - return 0; - } -@@ -2213,6 +2198,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, - { - struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_bo *bo = bo_va->base.bo; -+ struct amdgpu_vm *vm = bo_va->base.vm; - uint64_t eaddr; - int r; - -@@ -2246,7 +2232,12 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, - mapping->offset = offset; - mapping->flags = flags; - -- amdgpu_vm_bo_insert_map(adev, bo_va, mapping); -+ list_add(&mapping->list, &bo_va->invalids); -+ amdgpu_vm_it_insert(mapping, &vm->va); -+ -+ if (flags & AMDGPU_PTE_PRT) -+ amdgpu_vm_prt_get(adev); -+ trace_amdgpu_vm_bo_map(bo_va, mapping); - - return 0; - } -@@ -2292,7 +2283,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, - - list_del(&mapping->list); - amdgpu_vm_it_remove(mapping, &vm->va); -- mapping->bo_va = NULL; - trace_amdgpu_vm_bo_unmap(bo_va, mapping); - - if (valid) -@@ -2378,7 +2368,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, - if (tmp->last > eaddr) - tmp->last = eaddr; - -- tmp->bo_va = NULL; - list_add(&tmp->list, &vm->freed); - trace_amdgpu_vm_bo_unmap(NULL, tmp); - } -@@ -2405,19 +2394,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, - } - - /** -- * amdgpu_vm_bo_lookup_mapping - find mapping by address -- * -- * @vm: the requested VM -- * -- * Find a mapping by it's address. -- */ --struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, -- uint64_t addr) --{ -- return amdgpu_vm_it_iter_first(&vm->va, addr, addr); --} -- --/** - * amdgpu_vm_bo_rmv - remove a bo to a specific vm - * - * @adev: amdgpu_device pointer -@@ -2442,7 +2418,6 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, - list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { - list_del(&mapping->list); - amdgpu_vm_it_remove(mapping, &vm->va); -- mapping->bo_va = NULL; - trace_amdgpu_vm_bo_unmap(bo_va, mapping); - list_add(&mapping->list, &vm->freed); - } -@@ -2467,36 +2442,15 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, - * Mark @bo as invalid. - */ - void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, -- struct amdgpu_bo *bo, bool evicted) -+ struct amdgpu_bo *bo) - { - struct amdgpu_vm_bo_base *bo_base; - - list_for_each_entry(bo_base, &bo->va, bo_list) { -- struct amdgpu_vm *vm = bo_base->vm; -- -- bo_base->moved = true; -- if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { -- spin_lock(&bo_base->vm->status_lock); -- if (bo->tbo.type == ttm_bo_type_kernel) -- list_move(&bo_base->vm_status, &vm->evicted); -- else -- list_move_tail(&bo_base->vm_status, -- &vm->evicted); -- spin_unlock(&bo_base->vm->status_lock); -- continue; -- } -- -- if (bo->tbo.type == ttm_bo_type_kernel) { -- spin_lock(&bo_base->vm->status_lock); -- if (list_empty(&bo_base->vm_status)) -- list_add(&bo_base->vm_status, &vm->relocated); -- spin_unlock(&bo_base->vm->status_lock); -- continue; -- } -- - spin_lock(&bo_base->vm->status_lock); - if (list_empty(&bo_base->vm_status)) -- list_add(&bo_base->vm_status, &vm->moved); -+ list_add(&bo_base->vm_status, -+ &bo_base->vm->moved); - spin_unlock(&bo_base->vm->status_lock); - } - } -@@ -2577,14 +2531,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - u64 flags; - uint64_t init_pde_value = 0; - -- vm->va = RB_ROOT_CACHED; -+ vm->va = RB_ROOT; - vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); - for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) - vm->reserved_vmid[i] = NULL; - spin_lock_init(&vm->status_lock); -- INIT_LIST_HEAD(&vm->evicted); -- INIT_LIST_HEAD(&vm->relocated); - INIT_LIST_HEAD(&vm->moved); -+ INIT_LIST_HEAD(&vm->cleared); - INIT_LIST_HEAD(&vm->freed); - - /* create scheduler entity for page table updates */ -@@ -2615,7 +2568,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), - "CPU update of VM recommended only for large BAR system\n"); -- vm->last_update = NULL; -+ vm->last_dir_update = NULL; - - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; -@@ -2628,31 +2581,46 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, - AMDGPU_GEM_DOMAIN_VRAM, - flags, -- NULL, NULL, init_pde_value, &vm->root.base.bo); -+ NULL, NULL, init_pde_value, &vm->root.bo); - if (r) - goto error_free_sched_entity; - -- vm->root.base.vm = vm; -- list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); -- INIT_LIST_HEAD(&vm->root.base.vm_status); -+ r = amdgpu_bo_reserve(vm->root.bo, false); -+ if (r) -+ goto error_free_root; -+ -+ vm->last_eviction_counter = atomic64_read(&adev->num_evictions); - - if (vm->use_cpu_for_update) { -- r = amdgpu_bo_reserve(vm->root.base.bo, false); -+ r = amdgpu_bo_kmap(vm->root.bo, NULL); - if (r) - goto error_free_root; -+ } - -- r = amdgpu_bo_kmap(vm->root.base.bo, NULL); -- if (r) -- goto error_free_root; -- amdgpu_bo_unreserve(vm->root.base.bo); -+ amdgpu_bo_unreserve(vm->root.bo); -+ -+ vm->vm_context = vm_context; -+ if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { -+ mutex_lock(&adev->vm_manager.lock); -+ -+ if (adev->vm_manager.n_compute_vms++ == 0) { -+ /* First Compute VM: enable compute power profile */ -+ if (adev->pp_enabled) -+ amdgpu_dpm_switch_power_profile(adev, -+ AMD_PP_COMPUTE_PROFILE); -+ else if (adev->pm.funcs->switch_power_profile) -+ adev->pm.funcs->switch_power_profile(adev, -+ AMD_PP_COMPUTE_PROFILE); -+ } -+ mutex_unlock(&adev->vm_manager.lock); - } - - return 0; - - error_free_root: -- amdgpu_bo_unref(&vm->root.base.bo->shadow); -- amdgpu_bo_unref(&vm->root.base.bo); -- vm->root.base.bo = NULL; -+ amdgpu_bo_unref(&vm->root.bo->shadow); -+ amdgpu_bo_unref(&vm->root.bo); -+ vm->root.bo = NULL; - - error_free_sched_entity: - amd_sched_entity_fini(&ring->sched, &vm->entity); -@@ -2671,18 +2639,20 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) - { - unsigned i; - -- if (level->base.bo) { -- list_del(&level->base.bo_list); -- list_del(&level->base.vm_status); -- amdgpu_bo_unref(&level->base.bo->shadow); -- amdgpu_bo_unref(&level->base.bo); -+ if (level->bo) { -+ amdgpu_bo_unref(&level->bo->shadow); -+ amdgpu_bo_unref(&level->bo); - } - - if (level->entries) - for (i = 0; i <= level->last_entry_used; i++) - amdgpu_vm_free_levels(&level->entries[i]); - -+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0) -+ drm_free_large(level->entries); -+#else - kvfree(level->entries); -+#endif - } - - /** -@@ -2698,16 +2668,31 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) - { - struct amdgpu_bo_va_mapping *mapping, *tmp; - bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; -- struct amdgpu_bo *root; -- int i, r; -+ int i; -+ -+ if (vm->vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { -+ mutex_lock(&adev->vm_manager.lock); -+ -+ WARN(adev->vm_manager.n_compute_vms == 0, "Unbalanced number of Compute VMs"); -+ -+ if (--adev->vm_manager.n_compute_vms == 0) { -+ /* Last Compute VM: enable graphics power profile */ -+ if (adev->pp_enabled) -+ amdgpu_dpm_switch_power_profile(adev, -+ AMD_PP_GFX_PROFILE); -+ else if (adev->pm.funcs->switch_power_profile) -+ adev->pm.funcs->switch_power_profile(adev, -+ AMD_PP_GFX_PROFILE); -+ } -+ mutex_unlock(&adev->vm_manager.lock); -+ } - - amd_sched_entity_fini(vm->entity.sched, &vm->entity); - -- if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { -+ if (!RB_EMPTY_ROOT(&vm->va)) { - dev_err(adev->dev, "still active bo inside vm\n"); - } -- rbtree_postorder_for_each_entry_safe(mapping, tmp, -- &vm->va.rb_root, rb) { -+ rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { - list_del(&mapping->list); - amdgpu_vm_it_remove(mapping, &vm->va); - kfree(mapping); -@@ -2721,9 +2706,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) - list_del(&mapping->list); - amdgpu_vm_free_mapping(adev, vm, mapping, NULL); - } -- -+ - amdgpu_vm_free_levels(&vm->root); -- dma_fence_put(vm->last_update); -+ dma_fence_put(vm->last_dir_update); - for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) - amdgpu_vm_free_reserved_vmid(adev, vm, i); - } -@@ -2755,8 +2740,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) - } - } - -- adev->vm_manager.fence_context = -- dma_fence_context_alloc(AMDGPU_MAX_RINGS); -+ adev->vm_manager.fence_context = kcl_fence_context_alloc(AMDGPU_MAX_RINGS); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - adev->vm_manager.seqno[i] = 0; - -@@ -2781,6 +2765,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) - adev->vm_manager.vm_update_mode = 0; - #endif - -+ adev->vm_manager.n_compute_vms = 0; - } - - /** -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h -index 28cf20b..415e659 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h -@@ -153,6 +153,9 @@ struct amdgpu_vm { - /* dedicated to vm */ - struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; - -+ /* Whether this is a Compute or GFX Context */ -+ int vm_context; -+ - /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ - bool use_cpu_for_update; - -@@ -220,6 +223,8 @@ struct amdgpu_vm_manager { - * BIT1[= 0] Compute updated by SDMA [= 1] by CPU - */ - int vm_update_mode; -+ /* Number of Compute VMs, used for detecting Compute activity */ -+ unsigned n_compute_vms; - }; - - void amdgpu_vm_manager_init(struct amdgpu_device *adev); --- -2.7.4 - |