From 4958a4ae62cc8e1a70ca2877b51cf7bff91d2669 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Wed, 13 Jul 2016 13:24:37 -0400 Subject: [PATCH 1520/4131] drm/amdkfd: Sync with PD/PT fence after VM update Sync only to these fences. It not required to wait on BO fences. Change-Id: I24e8899e4b75ba6b73a114cdeda903c078aebc02 Signed-off-by: Harish Kasiviswanathan --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 55 ++++++++++++++++-------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ad61cb6..2abce72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -154,6 +154,10 @@ static int try_pin_bo(struct amdgpu_bo *bo, uint32_t domain) ret = amdgpu_bo_pin(bo, domain, NULL); if (ret != 0) goto error; + /* Since TTM pipelines evictions and moves, wait for bo + * validate to complete. + */ + ttm_bo_wait(&bo->tbo, false, false); } else { /* amdgpu_bo_pin doesn't support userptr. Therefore we * can use the bo->pin_count for our version of @@ -467,6 +471,7 @@ struct bo_vm_reservation_context { struct amdgpu_bo_list_entry *vm_pd; struct ww_acquire_ctx ticket; struct list_head list, duplicates; + struct amdgpu_sync sync; bool reserved; }; @@ -487,6 +492,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->reserved = false; ctx->n_vms = 1; + amdgpu_sync_create(&ctx->sync); INIT_LIST_HEAD(&ctx->list); INIT_LIST_HEAD(&ctx->duplicates); @@ -548,6 +554,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->reserved = false; ctx->n_vms = 0; ctx->vm_pd = NULL; + amdgpu_sync_create(&ctx->sync); INIT_LIST_HEAD(&ctx->list); INIT_LIST_HEAD(&ctx->duplicates); @@ -608,22 +615,15 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, static void unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, bool wait) { - if (wait) { - struct ttm_validate_buffer *entry; - int ret; + if (wait) + amdgpu_sync_wait(&ctx->sync); - list_for_each_entry(entry, &ctx->list, head) { - ret = ttm_bo_wait(entry->bo, false, false); - if (ret != 0) - pr_err("amdkfd: Failed to wait for PT/PD update (err == %d)\n", - ret); - } - } if (ctx->reserved) ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); if (ctx->vm_pd) { kfree(ctx->vm_pd); } + amdgpu_sync_free(&ctx->sync); ctx->reserved = false; ctx->vm_pd = NULL; } @@ -710,7 +710,8 @@ static int update_user_pages(struct kgd_mem *mem, struct mm_struct *mm, } static int map_bo_to_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo, - struct amdgpu_bo_va *bo_va, uint32_t domain) + struct amdgpu_bo_va *bo_va, uint32_t domain, + struct amdgpu_sync *sync) { struct amdgpu_vm *vm; int ret; @@ -742,6 +743,8 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo, goto err_unpin_bo; } + amdgpu_sync_fence(adev, sync, vm->page_directory_fence); + /* * The previously "released" BOs are really released and their VAs are * removed from PT. This function is called here because it requires @@ -760,7 +763,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo, goto err_unpin_bo; } - ret = amdgpu_vm_clear_invalids(adev, vm, NULL); + amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); + + ret = amdgpu_vm_clear_invalids(adev, vm, sync); if (ret != 0) { pr_err("amdkfd: Failed to radeon_vm_clear_invalids\n"); goto err_failed_to_vm_clear_invalids; @@ -773,6 +778,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo, err_failed_to_vm_clear_invalids: amdgpu_vm_bo_update(adev, bo_va, NULL); + amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); err_unpin_bo: /* PTs are not needed to be unpinned*/ unpin_bo(bo); @@ -1029,7 +1035,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( pr_debug("amdkfd: Trying to map VA 0x%llx to vm %p\n", mem->data2.va, vm); - ret = map_bo_to_gpuvm(adev, bo, entry->bo_va, domain); + ret = map_bo_to_gpuvm(adev, bo, entry->bo_va, domain, + &ctx.sync); if (ret != 0) { pr_err("amdkfd: Failed to map radeon bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; @@ -1190,7 +1197,8 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, struct amdgpu_bo *bo, - struct amdgpu_bo_va *bo_va) + struct amdgpu_bo_va *bo_va, + struct amdgpu_sync *sync) { struct amdgpu_vm *vm = bo_va->vm; @@ -1203,8 +1211,9 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, /* Update the page tables - Remove the mapping from bo_va */ amdgpu_vm_bo_update(adev, bo_va, NULL); + amdgpu_sync_fence(adev, sync, bo_va->last_pt_update); - amdgpu_vm_clear_invalids(adev, vm, NULL); + amdgpu_vm_clear_invalids(adev, vm, sync); /* Unpin BO*/ unpin_bo(bo); @@ -1281,7 +1290,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mem->data2.bo->tbo.mem.size); ret = unmap_bo_from_gpuvm(adev, mem->data2.bo, - entry->bo_va); + entry->bo_va, &ctx.sync); if (ret == 0) { entry->is_mapped = false; } else { @@ -1604,7 +1613,8 @@ int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm) goto fail; } - r = unmap_bo_from_gpuvm(adev, mem->data2.bo, entry->bo_va); + r = unmap_bo_from_gpuvm(adev, mem->data2.bo, + entry->bo_va, &ctx.sync); if (r != 0) { pr_err("failed unmap va 0x%llx\n", mem->data2.va); @@ -1691,7 +1701,8 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm) continue; } - r = map_bo_to_gpuvm(adev, mem->data2.bo, entry->bo_va, domain); + r = map_bo_to_gpuvm(adev, mem->data2.bo, entry->bo_va, domain, + &ctx.sync); if (unlikely(r != 0)) { pr_err("Failed to map BO to gpuvm\n"); entry->is_mapped = false; @@ -1812,6 +1823,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm) list_cut_position(&ctx.list, &master_vm->kfd_bo_list, &last_pd_bo_entry->tv.head); + amdgpu_sync_create(&ctx.sync); + /* Validate PDs and PTs */ list_for_each_entry(entry, &ctx.list, tv.head) { struct amdgpu_bo *bo = entry->robj; @@ -1864,7 +1877,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm) ret = map_bo_to_gpuvm((struct amdgpu_device *) bo_va_entry->kgd_dev, bo, bo_va_entry->bo_va, - domain); + domain, + &ctx.sync); if (ret) { pr_debug("Memory eviction: Map failed. Try again\n"); goto validate_map_fail; @@ -1872,6 +1886,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm) } } + amdgpu_sync_wait(&ctx.sync); + /* Wait for validate to finish and attach new eviction fence */ list_for_each_entry(mem, &master_vm->kfd_bo_list, data2.bo_list_entry.tv.head) { @@ -1884,6 +1900,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm) /* Add PDs to kfd_bo_list for unreserve */ list_splice_init(&ctx.list, &master_vm->kfd_bo_list); ttm_eu_backoff_reservation(&ctx.ticket, &master_vm->kfd_bo_list); + amdgpu_sync_free(&ctx.sync); ttm_reserve_fail: /* Restore kfd_bo_list */ list_cut_position(&ctx.list, &master_vm->kfd_bo_list, -- 2.7.4