From 116d98ff97fd3aad490ea72c8f19c106a48ae1e0 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Wed, 29 Jun 2016 16:50:52 -0400 Subject: [PATCH 1515/4131] drm/amdgpu: Add kfd2kgd restore_process_bos API Restore all BOs and the resume the evicted queues Change-Id: Ieee6e0b4eeb327c7dd56304aca86c4008e044b70 Signed-off-by: Harish Kasiviswanathan --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 168 ++++++++++++++++++++++ drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 5 + 5 files changed, 176 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 83bea0e..a291577 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -138,7 +138,7 @@ void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev, int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); - +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *master_vm); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 22c4b7a..be8bbda 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -227,6 +227,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, .submit_ib = amdgpu_amdkfd_submit_ib, .get_tile_config = amdgpu_amdkfd_get_tile_config, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions() diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 7be32f3..2c34213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -208,6 +208,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, .submit_ib = amdgpu_amdkfd_submit_ib, .get_tile_config = amdgpu_amdkfd_get_tile_config, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions() diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index bab085c..ad61cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1725,3 +1725,171 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm) return ret; } + +/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given + * KFD process identified by master_vm + * + * @master_vm: Master VM of the KFD process + * + * After memory eviction, restore thread calls this function. The function + * should be called when the Process is still valid. BO restore involves - + * + * 1. Release old eviction fence and create new one + * 2. Get PD BO list and PT BO list from all the VMs. + * 3. Merge PD BO list into KFD BO list. Reserve all BOs. + * 4. Split the list into PD BO list and KFD BO list + * 5. Validate of PD and PT BOs and add new fence to the BOs + * 6. Validate all KFD BOs and Map them and add new fence + * 7. Merge PD BO list into KFD BO list. Unreserve all BOs + * 8. Restore back KFD BO list by removing PD BO entries + */ + +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm) +{ + struct amdgpu_bo_list_entry *pd_bo_list, *last_pd_bo_entry, *entry; + struct amdkfd_vm *master_vm = (struct amdkfd_vm *)m_vm, *peer_vm; + struct kgd_mem *mem; + struct bo_vm_reservation_context ctx; + struct amdgpu_amdkfd_fence *old_fence; + int ret = 0, i; + + if (WARN_ON(master_vm == NULL || master_vm->master != master_vm)) + return -EINVAL; + + INIT_LIST_HEAD(&ctx.list); + INIT_LIST_HEAD(&ctx.duplicates); + + pd_bo_list = kcalloc(master_vm->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (pd_bo_list == NULL) + return -ENOMEM; + + /* Release old eviction fence and create new one. Use context and mm + * from the old fence. + */ + old_fence = master_vm->eviction_fence; + master_vm->eviction_fence = + amdgpu_amdkfd_fence_create(old_fence->base.context, + old_fence->mm); + fence_put(&old_fence->base); + if (master_vm->eviction_fence == NULL) { + pr_err("Failed to create eviction fence\n"); + goto evict_fence_fail; + } + + /* Get PD BO list and PT BO list from all the VMs */ + amdgpu_vm_get_pd_bo(&master_vm->base, &ctx.list, + &pd_bo_list[0]); + amdgpu_vm_get_pt_bos(master_vm->adev, &master_vm->base, + &ctx.duplicates); + + i = 1; + list_for_each_entry(peer_vm, &master_vm->kfd_vm_list, kfd_vm_list) { + amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, + &pd_bo_list[i]); + amdgpu_vm_get_pt_bos(peer_vm->adev, &peer_vm->base, + &ctx.duplicates); + i++; + } + + /* Needed to splicing and cutting the lists */ + last_pd_bo_entry = list_last_entry(&ctx.list, + struct amdgpu_bo_list_entry, + tv.head); + + /* Reserve all BOs and page tables/directory. */ + mutex_lock(&master_vm->lock); + list_splice_init(&ctx.list, &master_vm->kfd_bo_list); + ret = ttm_eu_reserve_buffers(&ctx.ticket, &master_vm->kfd_bo_list, + false, &ctx.duplicates); + if (ret) { + pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); + goto ttm_reserve_fail; + } + + /* Restore kfd_bo_list. ctx.list contains only PDs */ + list_cut_position(&ctx.list, &master_vm->kfd_bo_list, + &last_pd_bo_entry->tv.head); + + /* Validate PDs and PTs */ + list_for_each_entry(entry, &ctx.list, tv.head) { + struct amdgpu_bo *bo = entry->robj; + + amdgpu_ttm_placement_from_domain(bo, bo->prefered_domains); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (ret) { + pr_debug("Memory eviction: Validate failed. Try again\n"); + goto validate_map_fail; + } + } + list_for_each_entry(entry, &ctx.duplicates, tv.head) { + struct amdgpu_bo *bo = entry->robj; + + amdgpu_ttm_placement_from_domain(bo, bo->prefered_domains); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (ret) { + pr_debug("Memory eviction: Validate failed. Try again\n"); + goto validate_map_fail; + } + } + + /* Wait for PT/PD validate to finish and attach eviction fence. + * PD/PT share the same reservation object + */ + list_for_each_entry(entry, &ctx.list, tv.head) { + struct amdgpu_bo *bo = entry->robj; + + ttm_bo_wait(&bo->tbo, false, false); + amdgpu_bo_fence(bo, &master_vm->eviction_fence->base, true); + } + + + /* Validate BOs and map them to GPUVM (update VM page tables). */ + list_for_each_entry(mem, &master_vm->kfd_bo_list, + data2.bo_list_entry.tv.head) { + + struct amdgpu_bo *bo = mem->data2.bo; + uint32_t domain = mem->data2.domain; + struct kfd_bo_va_list *bo_va_entry; + + amdgpu_ttm_placement_from_domain(bo, domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (ret) { + pr_debug("Memory eviction: Validate failed. Try again\n"); + goto validate_map_fail; + } + list_for_each_entry(bo_va_entry, &mem->data2.bo_va_list, + bo_list) { + ret = map_bo_to_gpuvm((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo, bo_va_entry->bo_va, + domain); + if (ret) { + pr_debug("Memory eviction: Map failed. Try again\n"); + goto validate_map_fail; + } + } + } + + /* Wait for validate to finish and attach new eviction fence */ + list_for_each_entry(mem, &master_vm->kfd_bo_list, + data2.bo_list_entry.tv.head) { + struct amdgpu_bo *bo = mem->data2.bo; + + ttm_bo_wait(&bo->tbo, false, false); + amdgpu_bo_fence(bo, &master_vm->eviction_fence->base, true); + } +validate_map_fail: + /* Add PDs to kfd_bo_list for unreserve */ + list_splice_init(&ctx.list, &master_vm->kfd_bo_list); + ttm_eu_backoff_reservation(&ctx.ticket, &master_vm->kfd_bo_list); +ttm_reserve_fail: + /* Restore kfd_bo_list */ + list_cut_position(&ctx.list, &master_vm->kfd_bo_list, + &last_pd_bo_entry->tv.head); + mutex_unlock(&master_vm->lock); +evict_fence_fail: + kfree(pd_bo_list); + return ret; +} diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 4b52412..c38e707 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -220,6 +220,9 @@ struct tile_config { * @submit_ib: Submits an IB to the engine specified by inserting the IB to * the corresonded ring (ring type). * + * @restore_process_bos: Restore all BOs that belongs to the process identified + * by master_vm. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -361,6 +364,8 @@ struct kfd2kgd_calls { uint32_t *ib_cmd, uint32_t ib_len); int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); + + int (*restore_process_bos)(void *master_vm); }; /** -- 2.7.4