aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch319
1 files changed, 0 insertions, 319 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch
deleted file mode 100644
index f3322277..00000000
--- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1737-drm-amdgpu-Optimize-page-directory-updates-for-KFD.patch
+++ /dev/null
@@ -1,319 +0,0 @@
-From b93a8bfd66d0254e2920f9d1648fa994b0d821a9 Mon Sep 17 00:00:00 2001
-From: Felix Kuehling <Felix.Kuehling@amd.com>
-Date: Tue, 11 Jul 2017 19:56:49 -0400
-Subject: [PATCH 1737/4131] drm/amdgpu: Optimize page directory updates for KFD
-
-Updating 4-level page tables is expensive when lots of virtual
-address space is used and thousands of page table BOs allocated. So
-avoid doing it unnecessarily. Move page table allocation into
-add_bo_to_vm and page directory update into vm_validate_pt_pd_bos.
-The latter is called by add_bo_to_vm and when restoring from
-evictions.
-
-This reduces page directory update frequency from once per mapping
-to only the first mapping per GPU. On restore from evictions, it
-reduces page directory updates from once per BO mapping to once
-per GPU.
-
-Also stop moving PT BOs in the LRU list. This was originally added
-when we were still pinning memory to remove PT BOs from the LRU list.
-Now it's just a pointless move, which is very expensive when there
-are thousands of BOs.
-
-Change-Id: If23e2f30e665511510bad705346543de82cc9cfe
-Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
-
- Conflicts:
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 231 ++++++++++++-----------
- 1 file changed, 118 insertions(+), 113 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-index 01414bc..505d006 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-@@ -335,68 +335,6 @@ static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
- kfree(ef_list);
- }
-
--static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
-- struct amdgpu_vm *avm, bool is_aql,
-- struct kfd_bo_va_list **p_bo_va_entry)
--{
-- int ret;
-- struct kfd_bo_va_list *bo_va_entry;
-- struct amdgpu_bo *bo = mem->bo;
-- uint64_t va = mem->va;
-- struct list_head *list_bo_va = &mem->bo_va_list;
-- unsigned long bo_size = bo->tbo.mem.size;
--
-- if (is_aql)
-- va += bo_size;
--
-- bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
-- if (!bo_va_entry)
-- return -ENOMEM;
--
-- if (!va) {
-- pr_err("Invalid VA when adding BO to VM\n");
-- return -EINVAL;
-- }
--
-- pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-- va + bo_size, avm);
--
-- /* Add BO to VM internal data structures*/
-- bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
-- if (bo_va_entry->bo_va == NULL) {
-- ret = -EINVAL;
-- pr_err("Failed to add BO object to VM. ret == %d\n",
-- ret);
-- goto err_vmadd;
-- }
--
-- bo_va_entry->va = va;
-- bo_va_entry->pte_flags = amdgpu_vm_get_pte_flags(adev,
-- mem->mapping_flags);
-- bo_va_entry->kgd_dev = (void *)adev;
-- list_add(&bo_va_entry->bo_list, list_bo_va);
--
-- if (p_bo_va_entry)
-- *p_bo_va_entry = bo_va_entry;
--
-- return 0;
--
--err_vmadd:
-- kfree(bo_va_entry);
-- return ret;
--}
--
--static void remove_bo_from_vm(struct amdgpu_device *adev,
-- struct kfd_bo_va_list *entry, unsigned long size)
--{
-- pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
-- entry->va,
-- entry->va + size, entry);
-- amdgpu_vm_bo_rmv(adev, entry->bo_va);
-- list_del(&entry->bo_list);
-- kfree(entry);
--}
--
- static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
- bool wait)
- {
-@@ -435,6 +373,12 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
- return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
- }
-
-+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
-+ *
-+ * Also updates page directory entries so we don't need to do this
-+ * again later until the page directory is validated again (e.g. after
-+ * an eviction or allocating new page tables).
-+ */
- static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
- {
- struct amdgpu_bo *pd = vm->root.bo;
-@@ -460,7 +404,116 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
-
- vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
-
-+ ret = amdgpu_vm_update_directories(adev, vm);
-+ if (ret != 0)
-+ return ret;
-+
-+ return 0;
-+}
-+
-+/* add_bo_to_vm - Add a BO to a VM
-+ *
-+ * Everything that needs to bo done only once when a BO is first added
-+ * to a VM. It can later be mapped and unmapped many times without
-+ * repeating these steps.
-+ *
-+ * 1. Allocate and initialize BO VA entry data structure
-+ * 2. Add BO to the VM
-+ * 3. Determine ASIC-specific PTE flags
-+ * 4. Alloc page tables and directories if needed
-+ * 4a. Validate new page tables and directories and update directories
-+ */
-+static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
-+ struct amdgpu_vm *avm, bool is_aql,
-+ struct kfd_bo_va_list **p_bo_va_entry)
-+{
-+ int ret;
-+ struct kfd_bo_va_list *bo_va_entry;
-+ struct amdkfd_vm *kvm = container_of(avm,
-+ struct amdkfd_vm, base);
-+ struct amdgpu_bo *pd = avm->root.bo;
-+ struct amdgpu_bo *bo = mem->bo;
-+ uint64_t va = mem->va;
-+ struct list_head *list_bo_va = &mem->bo_va_list;
-+ unsigned long bo_size = bo->tbo.mem.size;
-+
-+ if (!va) {
-+ pr_err("Invalid VA when adding BO to VM\n");
-+ return -EINVAL;
-+ }
-+
-+ if (is_aql)
-+ va += bo_size;
-+
-+ bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
-+ if (!bo_va_entry)
-+ return -ENOMEM;
-+
-+ pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-+ va + bo_size, avm);
-+
-+ /* Add BO to VM internal data structures*/
-+ bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo);
-+ if (bo_va_entry->bo_va == NULL) {
-+ ret = -EINVAL;
-+ pr_err("Failed to add BO object to VM. ret == %d\n",
-+ ret);
-+ goto err_vmadd;
-+ }
-+
-+ bo_va_entry->va = va;
-+ bo_va_entry->pte_flags = amdgpu_vm_get_pte_flags(adev,
-+ mem->mapping_flags);
-+ bo_va_entry->kgd_dev = (void *)adev;
-+ list_add(&bo_va_entry->bo_list, list_bo_va);
-+
-+ if (p_bo_va_entry)
-+ *p_bo_va_entry = bo_va_entry;
-+
-+ /* Allocate new page tables if neeeded and validate
-+ * them. Clearing of new page tables and validate need to wait
-+ * on move fences. We don't want that to trigger the eviction
-+ * fence, so remove it temporarily.
-+ */
-+ amdgpu_amdkfd_remove_eviction_fence(pd,
-+ kvm->process_info->eviction_fence,
-+ NULL, NULL);
-+
-+ ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo));
-+ if (ret) {
-+ pr_err("Failed to allocate pts, err=%d\n", ret);
-+ goto err_alloc_pts;
-+ }
-+
-+ ret = vm_validate_pt_pd_bos(avm);
-+ if (ret != 0) {
-+ pr_err("validate_pt_pd_bos() failed\n");
-+ goto err_alloc_pts;
-+ }
-+
-+ /* Add the eviction fence back */
-+ amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
-+
- return 0;
-+
-+err_alloc_pts:
-+ amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
-+ amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
-+ list_del(&bo_va_entry->bo_list);
-+err_vmadd:
-+ kfree(bo_va_entry);
-+ return ret;
-+}
-+
-+static void remove_bo_from_vm(struct amdgpu_device *adev,
-+ struct kfd_bo_va_list *entry, unsigned long size)
-+{
-+ pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
-+ entry->va,
-+ entry->va + size, entry);
-+ amdgpu_vm_bo_rmv(adev, entry->bo_va);
-+ list_del(&entry->bo_list);
-+ kfree(entry);
- }
-
- static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
-@@ -940,15 +993,6 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
- vm = bo_va->vm;
- bo = bo_va->bo;
-
-- /* Update the page directory */
-- ret = amdgpu_vm_update_directories(adev, vm);
-- if (ret != 0) {
-- pr_err("amdgpu_vm_update_directories failed\n");
-- return ret;
-- }
--
-- amdgpu_sync_fence(adev, sync, vm->last_dir_update);
--
- /* Update the page tables */
- ret = amdgpu_vm_bo_update(adev, bo_va, false);
- if (ret != 0) {
-@@ -958,9 +1002,6 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
-
- amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
-
-- /* Remove PTs from LRU list (reservation removed PD only) */
-- amdgpu_vm_move_pt_bos_in_lru(adev, vm);
--
- /* Sync objects can't handle multiple GPUs (contexts) updating
- * sync->last_vm_update. Fortunately we don't need it for
- * KFD's purposes, so we can just drop that fence.
-@@ -978,52 +1019,16 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
- bool no_update_pte)
- {
- int ret;
-- struct amdgpu_bo *bo = entry->bo_va->bo;
-- struct amdkfd_vm *kvm = container_of(entry->bo_va->vm,
-- struct amdkfd_vm, base);
-- struct amdgpu_bo *pd = entry->bo_va->vm->root.bo;
-
-- /* Remove eviction fence from PD (and thereby from PTs too as they
-- * share the resv. object. This is necessary because new PTs are
-- * cleared and validate needs to wait on move fences. The eviction
-- * fence shouldn't interfere in both these activities
-- */
-- amdgpu_amdkfd_remove_eviction_fence(pd,
-- kvm->process_info->eviction_fence,
-- NULL, NULL);
--
-- ret = amdgpu_vm_alloc_pts(adev, entry->bo_va->vm, entry->va,
-- amdgpu_bo_size(bo));
--
-- if (ret) {
-- pr_err("Failed to allocate pts, err=%d\n", ret);
-- return ret;
-- }
--
-- /* Set virtual address for the allocation, allocate PTs,
-- * if needed, and zero them.
-- */
-- ret = amdgpu_vm_bo_map(adev, entry->bo_va,
-- entry->va, 0, amdgpu_bo_size(bo),
-- entry->pte_flags);
-+ /* Set virtual address for the allocation */
-+ ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
-+ amdgpu_bo_size(entry->bo_va->bo), entry->pte_flags);
- if (ret != 0) {
- pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
- entry->va, ret);
- return ret;
- }
-
-- /* PT BOs may be created during amdgpu_vm_bo_map() call,
-- * so we have to validate the newly created PT BOs.
-- */
-- ret = vm_validate_pt_pd_bos(entry->bo_va->vm);
-- if (ret != 0) {
-- pr_err("validate_pt_pd_bos() failed\n");
-- return ret;
-- }
--
-- /* Add the eviction fence back */
-- amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true);
--
- if (no_update_pte)
- return 0;
-
---
-2.7.4
-