From b93a8bfd66d0254e2920f9d1648fa994b0d821a9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 11 Jul 2017 19:56:49 -0400 Subject: [PATCH 1737/4131] drm/amdgpu: Optimize page directory updates for KFD Updating 4-level page tables is expensive when lots of virtual address space is used and thousands of page table BOs allocated. So avoid doing it unnecessarily. Move page table allocation into add_bo_to_vm and page directory update into vm_validate_pt_pd_bos. The latter is called by add_bo_to_vm and when restoring from evictions. This reduces page directory update frequency from once per mapping to only the first mapping per GPU. On restore from evictions, it reduces page directory updates from once per BO mapping to once per GPU. Also stop moving PT BOs in the LRU list. This was originally added when we were still pinning memory to remove PT BOs from the LRU list. Now it's just a pointless move, which is very expensive when there are thousands of BOs. Change-Id: If23e2f30e665511510bad705346543de82cc9cfe Signed-off-by: Felix Kuehling Conflicts: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 231 ++++++++++++----------- 1 file changed, 118 insertions(+), 113 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 01414bc..505d006 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -335,68 +335,6 @@ static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, kfree(ef_list); } -static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, - struct amdgpu_vm *avm, bool is_aql, - struct kfd_bo_va_list **p_bo_va_entry) -{ - int ret; - struct kfd_bo_va_list *bo_va_entry; - struct amdgpu_bo *bo = mem->bo; - uint64_t va = mem->va; - struct list_head *list_bo_va = &mem->bo_va_list; - unsigned long bo_size = bo->tbo.mem.size; - - if (is_aql) - va += bo_size; - - bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); - if (!bo_va_entry) - return -ENOMEM; - - if (!va) { - pr_err("Invalid VA when adding BO to VM\n"); - return -EINVAL; - } - - pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, - va + bo_size, avm); - - /* Add BO to VM internal data structures*/ - bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); - if (bo_va_entry->bo_va == NULL) { - ret = -EINVAL; - pr_err("Failed to add BO object to VM. ret == %d\n", - ret); - goto err_vmadd; - } - - bo_va_entry->va = va; - bo_va_entry->pte_flags = amdgpu_vm_get_pte_flags(adev, - mem->mapping_flags); - bo_va_entry->kgd_dev = (void *)adev; - list_add(&bo_va_entry->bo_list, list_bo_va); - - if (p_bo_va_entry) - *p_bo_va_entry = bo_va_entry; - - return 0; - -err_vmadd: - kfree(bo_va_entry); - return ret; -} - -static void remove_bo_from_vm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, unsigned long size) -{ - pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", - entry->va, - entry->va + size, entry); - amdgpu_vm_bo_rmv(adev, entry->bo_va); - list_del(&entry->bo_list); - kfree(entry); -} - static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, bool wait) { @@ -435,6 +373,12 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); } +/* vm_validate_pt_pd_bos - Validate page table and directory BOs + * + * Also updates page directory entries so we don't need to do this + * again later until the page directory is validated again (e.g. after + * an eviction or allocating new page tables). + */ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) { struct amdgpu_bo *pd = vm->root.bo; @@ -460,7 +404,116 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + ret = amdgpu_vm_update_directories(adev, vm); + if (ret != 0) + return ret; + + return 0; +} + +/* add_bo_to_vm - Add a BO to a VM + * + * Everything that needs to bo done only once when a BO is first added + * to a VM. It can later be mapped and unmapped many times without + * repeating these steps. + * + * 1. Allocate and initialize BO VA entry data structure + * 2. Add BO to the VM + * 3. Determine ASIC-specific PTE flags + * 4. Alloc page tables and directories if needed + * 4a. Validate new page tables and directories and update directories + */ +static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_vm *avm, bool is_aql, + struct kfd_bo_va_list **p_bo_va_entry) +{ + int ret; + struct kfd_bo_va_list *bo_va_entry; + struct amdkfd_vm *kvm = container_of(avm, + struct amdkfd_vm, base); + struct amdgpu_bo *pd = avm->root.bo; + struct amdgpu_bo *bo = mem->bo; + uint64_t va = mem->va; + struct list_head *list_bo_va = &mem->bo_va_list; + unsigned long bo_size = bo->tbo.mem.size; + + if (!va) { + pr_err("Invalid VA when adding BO to VM\n"); + return -EINVAL; + } + + if (is_aql) + va += bo_size; + + bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); + if (!bo_va_entry) + return -ENOMEM; + + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, + va + bo_size, avm); + + /* Add BO to VM internal data structures*/ + bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); + if (bo_va_entry->bo_va == NULL) { + ret = -EINVAL; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); + goto err_vmadd; + } + + bo_va_entry->va = va; + bo_va_entry->pte_flags = amdgpu_vm_get_pte_flags(adev, + mem->mapping_flags); + bo_va_entry->kgd_dev = (void *)adev; + list_add(&bo_va_entry->bo_list, list_bo_va); + + if (p_bo_va_entry) + *p_bo_va_entry = bo_va_entry; + + /* Allocate new page tables if neeeded and validate + * them. Clearing of new page tables and validate need to wait + * on move fences. We don't want that to trigger the eviction + * fence, so remove it temporarily. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + + ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); + if (ret) { + pr_err("Failed to allocate pts, err=%d\n", ret); + goto err_alloc_pts; + } + + ret = vm_validate_pt_pd_bos(avm); + if (ret != 0) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; + } + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + return 0; + +err_alloc_pts: + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); + list_del(&bo_va_entry->bo_list); +err_vmadd: + kfree(bo_va_entry); + return ret; +} + +static void remove_bo_from_vm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, unsigned long size) +{ + pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", + entry->va, + entry->va + size, entry); + amdgpu_vm_bo_rmv(adev, entry->bo_va); + list_del(&entry->bo_list); + kfree(entry); } static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, @@ -940,15 +993,6 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, vm = bo_va->vm; bo = bo_va->bo; - /* Update the page directory */ - ret = amdgpu_vm_update_directories(adev, vm); - if (ret != 0) { - pr_err("amdgpu_vm_update_directories failed\n"); - return ret; - } - - amdgpu_sync_fence(adev, sync, vm->last_dir_update); - /* Update the page tables */ ret = amdgpu_vm_bo_update(adev, bo_va, false); if (ret != 0) { @@ -958,9 +1002,6 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); - /* Remove PTs from LRU list (reservation removed PD only) */ - amdgpu_vm_move_pt_bos_in_lru(adev, vm); - /* Sync objects can't handle multiple GPUs (contexts) updating * sync->last_vm_update. Fortunately we don't need it for * KFD's purposes, so we can just drop that fence. @@ -978,52 +1019,16 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, bool no_update_pte) { int ret; - struct amdgpu_bo *bo = entry->bo_va->bo; - struct amdkfd_vm *kvm = container_of(entry->bo_va->vm, - struct amdkfd_vm, base); - struct amdgpu_bo *pd = entry->bo_va->vm->root.bo; - /* Remove eviction fence from PD (and thereby from PTs too as they - * share the resv. object. This is necessary because new PTs are - * cleared and validate needs to wait on move fences. The eviction - * fence shouldn't interfere in both these activities - */ - amdgpu_amdkfd_remove_eviction_fence(pd, - kvm->process_info->eviction_fence, - NULL, NULL); - - ret = amdgpu_vm_alloc_pts(adev, entry->bo_va->vm, entry->va, - amdgpu_bo_size(bo)); - - if (ret) { - pr_err("Failed to allocate pts, err=%d\n", ret); - return ret; - } - - /* Set virtual address for the allocation, allocate PTs, - * if needed, and zero them. - */ - ret = amdgpu_vm_bo_map(adev, entry->bo_va, - entry->va, 0, amdgpu_bo_size(bo), - entry->pte_flags); + /* Set virtual address for the allocation */ + ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, + amdgpu_bo_size(entry->bo_va->bo), entry->pte_flags); if (ret != 0) { pr_err("Failed to map VA 0x%llx in vm. ret %d\n", entry->va, ret); return ret; } - /* PT BOs may be created during amdgpu_vm_bo_map() call, - * so we have to validate the newly created PT BOs. - */ - ret = vm_validate_pt_pd_bos(entry->bo_va->vm); - if (ret != 0) { - pr_err("validate_pt_pd_bos() failed\n"); - return ret; - } - - /* Add the eviction fence back */ - amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); - if (no_update_pte) return 0; -- 2.7.4