diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch new file mode 100644 index 00000000..f3322277 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch @@ -0,0 +1,319 @@ +From b93a8bfd66d0254e2920f9d1648fa994b0d821a9 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Tue, 11 Jul 2017 19:56:49 -0400 +Subject: [PATCH 1737/4131] drm/amdgpu: Optimize page directory updates for KFD + +Updating 4-level page tables is expensive when lots of virtual +address space is used and thousands of page table BOs allocated. So +avoid doing it unnecessarily. Move page table allocation into +add_bo_to_vm and page directory update into vm_validate_pt_pd_bos. +The latter is called by add_bo_to_vm and when restoring from +evictions. + +This reduces page directory update frequency from once per mapping +to only the first mapping per GPU. On restore from evictions, it +reduces page directory updates from once per BO mapping to once +per GPU. + +Also stop moving PT BOs in the LRU list. This was originally added +when we were still pinning memory to remove PT BOs from the LRU list. +Now it's just a pointless move, which is very expensive when there +are thousands of BOs. + +Change-Id: If23e2f30e665511510bad705346543de82cc9cfe +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> + + Conflicts: + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 231 ++++++++++++----------- + 1 file changed, 118 insertions(+), 113 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index 01414bc..505d006 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -335,68 +335,6 @@ static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, + kfree(ef_list); + } + +-static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, +- struct amdgpu_vm *avm, bool is_aql, +- struct kfd_bo_va_list **p_bo_va_entry) +-{ +- int ret; +- struct kfd_bo_va_list *bo_va_entry; +- struct amdgpu_bo *bo = mem->bo; +- uint64_t va = mem->va; +- struct list_head *list_bo_va = &mem->bo_va_list; +- unsigned long bo_size = bo->tbo.mem.size; +- +- if (is_aql) +- va += bo_size; +- +- bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); +- if (!bo_va_entry) +- return -ENOMEM; +- +- if (!va) { +- pr_err("Invalid VA when adding BO to VM\n"); +- return -EINVAL; +- } +- +- pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, +- va + bo_size, avm); +- +- /* Add BO to VM internal data structures*/ +- bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); +- if (bo_va_entry->bo_va == NULL) { +- ret = -EINVAL; +- pr_err("Failed to add BO object to VM. ret == %d\n", +- ret); +- goto err_vmadd; +- } +- +- bo_va_entry->va = va; +- bo_va_entry->pte_flags = amdgpu_vm_get_pte_flags(adev, +- mem->mapping_flags); +- bo_va_entry->kgd_dev = (void *)adev; +- list_add(&bo_va_entry->bo_list, list_bo_va); +- +- if (p_bo_va_entry) +- *p_bo_va_entry = bo_va_entry; +- +- return 0; +- +-err_vmadd: +- kfree(bo_va_entry); +- return ret; +-} +- +-static void remove_bo_from_vm(struct amdgpu_device *adev, +- struct kfd_bo_va_list *entry, unsigned long size) +-{ +- pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", +- entry->va, +- entry->va + size, entry); +- amdgpu_vm_bo_rmv(adev, entry->bo_va); +- list_del(&entry->bo_list); +- kfree(entry); +-} +- + static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, + bool wait) + { +@@ -435,6 +373,12 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) + return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); + } + ++/* vm_validate_pt_pd_bos - Validate page table and directory BOs ++ * ++ * Also updates page directory entries so we don't need to do this ++ * again later until the page directory is validated again (e.g. after ++ * an eviction or allocating new page tables). ++ */ + static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) + { + struct amdgpu_bo *pd = vm->root.bo; +@@ -460,7 +404,116 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) + + vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + ++ ret = amdgpu_vm_update_directories(adev, vm); ++ if (ret != 0) ++ return ret; ++ ++ return 0; ++} ++ ++/* add_bo_to_vm - Add a BO to a VM ++ * ++ * Everything that needs to bo done only once when a BO is first added ++ * to a VM. It can later be mapped and unmapped many times without ++ * repeating these steps. ++ * ++ * 1. Allocate and initialize BO VA entry data structure ++ * 2. Add BO to the VM ++ * 3. Determine ASIC-specific PTE flags ++ * 4. Alloc page tables and directories if needed ++ * 4a. Validate new page tables and directories and update directories ++ */ ++static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, ++ struct amdgpu_vm *avm, bool is_aql, ++ struct kfd_bo_va_list **p_bo_va_entry) ++{ ++ int ret; ++ struct kfd_bo_va_list *bo_va_entry; ++ struct amdkfd_vm *kvm = container_of(avm, ++ struct amdkfd_vm, base); ++ struct amdgpu_bo *pd = avm->root.bo; ++ struct amdgpu_bo *bo = mem->bo; ++ uint64_t va = mem->va; ++ struct list_head *list_bo_va = &mem->bo_va_list; ++ unsigned long bo_size = bo->tbo.mem.size; ++ ++ if (!va) { ++ pr_err("Invalid VA when adding BO to VM\n"); ++ return -EINVAL; ++ } ++ ++ if (is_aql) ++ va += bo_size; ++ ++ bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); ++ if (!bo_va_entry) ++ return -ENOMEM; ++ ++ pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, ++ va + bo_size, avm); ++ ++ /* Add BO to VM internal data structures*/ ++ bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); ++ if (bo_va_entry->bo_va == NULL) { ++ ret = -EINVAL; ++ pr_err("Failed to add BO object to VM. ret == %d\n", ++ ret); ++ goto err_vmadd; ++ } ++ ++ bo_va_entry->va = va; ++ bo_va_entry->pte_flags = amdgpu_vm_get_pte_flags(adev, ++ mem->mapping_flags); ++ bo_va_entry->kgd_dev = (void *)adev; ++ list_add(&bo_va_entry->bo_list, list_bo_va); ++ ++ if (p_bo_va_entry) ++ *p_bo_va_entry = bo_va_entry; ++ ++ /* Allocate new page tables if neeeded and validate ++ * them. Clearing of new page tables and validate need to wait ++ * on move fences. We don't want that to trigger the eviction ++ * fence, so remove it temporarily. ++ */ ++ amdgpu_amdkfd_remove_eviction_fence(pd, ++ kvm->process_info->eviction_fence, ++ NULL, NULL); ++ ++ ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); ++ if (ret) { ++ pr_err("Failed to allocate pts, err=%d\n", ret); ++ goto err_alloc_pts; ++ } ++ ++ ret = vm_validate_pt_pd_bos(avm); ++ if (ret != 0) { ++ pr_err("validate_pt_pd_bos() failed\n"); ++ goto err_alloc_pts; ++ } ++ ++ /* Add the eviction fence back */ ++ amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); ++ + return 0; ++ ++err_alloc_pts: ++ amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); ++ amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); ++ list_del(&bo_va_entry->bo_list); ++err_vmadd: ++ kfree(bo_va_entry); ++ return ret; ++} ++ ++static void remove_bo_from_vm(struct amdgpu_device *adev, ++ struct kfd_bo_va_list *entry, unsigned long size) ++{ ++ pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", ++ entry->va, ++ entry->va + size, entry); ++ amdgpu_vm_bo_rmv(adev, entry->bo_va); ++ list_del(&entry->bo_list); ++ kfree(entry); + } + + static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, +@@ -940,15 +993,6 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, + vm = bo_va->vm; + bo = bo_va->bo; + +- /* Update the page directory */ +- ret = amdgpu_vm_update_directories(adev, vm); +- if (ret != 0) { +- pr_err("amdgpu_vm_update_directories failed\n"); +- return ret; +- } +- +- amdgpu_sync_fence(adev, sync, vm->last_dir_update); +- + /* Update the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret != 0) { +@@ -958,9 +1002,6 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, + + amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); + +- /* Remove PTs from LRU list (reservation removed PD only) */ +- amdgpu_vm_move_pt_bos_in_lru(adev, vm); +- + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. +@@ -978,52 +1019,16 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, + bool no_update_pte) + { + int ret; +- struct amdgpu_bo *bo = entry->bo_va->bo; +- struct amdkfd_vm *kvm = container_of(entry->bo_va->vm, +- struct amdkfd_vm, base); +- struct amdgpu_bo *pd = entry->bo_va->vm->root.bo; + +- /* Remove eviction fence from PD (and thereby from PTs too as they +- * share the resv. object. This is necessary because new PTs are +- * cleared and validate needs to wait on move fences. The eviction +- * fence shouldn't interfere in both these activities +- */ +- amdgpu_amdkfd_remove_eviction_fence(pd, +- kvm->process_info->eviction_fence, +- NULL, NULL); +- +- ret = amdgpu_vm_alloc_pts(adev, entry->bo_va->vm, entry->va, +- amdgpu_bo_size(bo)); +- +- if (ret) { +- pr_err("Failed to allocate pts, err=%d\n", ret); +- return ret; +- } +- +- /* Set virtual address for the allocation, allocate PTs, +- * if needed, and zero them. +- */ +- ret = amdgpu_vm_bo_map(adev, entry->bo_va, +- entry->va, 0, amdgpu_bo_size(bo), +- entry->pte_flags); ++ /* Set virtual address for the allocation */ ++ ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, ++ amdgpu_bo_size(entry->bo_va->bo), entry->pte_flags); + if (ret != 0) { + pr_err("Failed to map VA 0x%llx in vm. ret %d\n", + entry->va, ret); + return ret; + } + +- /* PT BOs may be created during amdgpu_vm_bo_map() call, +- * so we have to validate the newly created PT BOs. +- */ +- ret = vm_validate_pt_pd_bos(entry->bo_va->vm); +- if (ret != 0) { +- pr_err("validate_pt_pd_bos() failed\n"); +- return ret; +- } +- +- /* Add the eviction fence back */ +- amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); +- + if (no_update_pte) + return 0; + +-- +2.7.4 + |