From 2eb776d90ca915cfb121a2b684f9e7347a57b4e4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 23 Feb 2018 19:51:28 -0500 Subject: [PATCH 3709/4131] drm/amdgpu: Move KFD-specific fields into struct amdgpu_vm Remove struct amdkfd_vm and move the fields into struct amdgpu_vm. This will allow turning a VM created by a DRM render node into a KFD VM. Change-Id: I34112b358e29cdebc8c6af6ce1ffb62d3f22c884 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 258 ++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 - 4 files changed, 92 insertions(+), 171 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 312515c..0b343eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -167,8 +167,7 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT && - !bo->tbo.sg) { + } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); } @@ -357,15 +356,21 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); } -static u64 get_vm_pd_gpu_offset(struct amdgpu_vm *vm) +static u64 get_vm_pd_gpu_offset(struct amdgpu_vm *vm, bool reserve) { struct amdgpu_device *adev = amdgpu_ttm_adev(vm->root.base.bo->tbo.bdev); u64 offset; uint64_t flags = AMDGPU_PTE_VALID; + if (reserve) + amdgpu_bo_reserve(vm->root.base.bo, false); + offset = amdgpu_bo_gpu_offset(vm->root.base.bo); + if (reserve) + amdgpu_bo_unreserve(vm->root.base.bo); + /* On some ASICs the FB doesn't start at 0. Adjust FB offset * to an actual MC address. */ @@ -404,7 +409,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return ret; } - vm->pd_phys_addr = get_vm_pd_gpu_offset(vm); + vm->pd_phys_addr = get_vm_pd_gpu_offset(vm, false); if (vm->use_cpu_for_update) { ret = amdgpu_bo_kmap(pd, NULL); @@ -417,6 +422,23 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return 0; } +static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + struct dma_fence *f) +{ + int ret = amdgpu_sync_fence(adev, sync, f, false); + + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. + */ + if (sync->last_vm_update) { + dma_fence_put(sync->last_vm_update); + sync->last_vm_update = NULL; + } + + return ret; +} + static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) { struct amdgpu_bo *pd = vm->root.base.bo; @@ -427,7 +449,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(NULL, sync, vm->last_update, false); + return sync_vm_fence(adev, sync, vm->last_update); } /* add_bo_to_vm - Add a BO to a VM @@ -833,7 +855,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, /* Add the eviction fence back */ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); - amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + sync_vm_fence(adev, sync, bo_va->last_pt_update); return 0; } @@ -858,7 +880,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + return sync_vm_fence(adev, sync, bo_va->last_pt_update); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -936,7 +958,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info, vm_list_node) { struct amdgpu_bo *pd = peer_vm->root.base.bo; - ret = amdgpu_sync_resv(NULL, + ret = amdgpu_sync_resv(amdgpu_ttm_adev(pd->tbo.bdev), sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_UNDEFINED, false); if (ret) @@ -962,16 +984,32 @@ static int process_update_pds(struct amdkfd_process_info *process_info, return 0; } -static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, - struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef) { - struct amdkfd_process_info *info = NULL; int ret; + struct amdgpu_vm *new_vm; + struct amdkfd_process_info *info; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); + if (!new_vm) + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ + ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); + if (ret) { + pr_err("Failed init vm ret %d\n", ret); + goto vm_init_fail; + } if (!*process_info) { info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; + if (!info) { + ret = -ENOMEM; + goto alloc_process_info_fail; + } mutex_init(&info->lock); INIT_LIST_HEAD(&info->vm_list_head); @@ -984,11 +1022,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, current->mm); if (!info->eviction_fence) { pr_err("Failed to create eviction fence\n"); - ret = -ENOMEM; goto create_evict_fence_fail; } - info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); + info->pid = get_task_pid(current->group_leader, + PIDTYPE_PID); atomic_set(&info->evicted_bos, 0); INIT_DELAYED_WORK(&info->work, amdgpu_amdkfd_restore_userptr_worker); @@ -997,131 +1035,56 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, *ef = dma_fence_get(&info->eviction_fence->base); } - vm->process_info = *process_info; + new_vm->process_info = *process_info; + new_vm->pd_phys_addr = get_vm_pd_gpu_offset(new_vm, true); - /* Validate page directory and attach eviction fence */ - ret = amdgpu_bo_reserve(vm->root.base.bo, true); - if (ret) - goto reserve_pd_fail; - ret = vm_validate_pt_pd_bos(vm); - if (ret) { - pr_err("validate_pt_pd_bos() failed\n"); - goto validate_pd_fail; - } - amdgpu_bo_fence(vm->root.base.bo, - &vm->process_info->eviction_fence->base, true); - amdgpu_bo_unreserve(vm->root.base.bo); + mutex_lock(&new_vm->process_info->lock); + list_add_tail(&new_vm->vm_list_node, + &(new_vm->process_info->vm_list_head)); + new_vm->process_info->n_vms++; + mutex_unlock(&new_vm->process_info->lock); - /* Update process info */ - mutex_lock(&vm->process_info->lock); - list_add_tail(&vm->vm_list_node, - &(vm->process_info->vm_list_head)); - vm->process_info->n_vms++; - mutex_unlock(&vm->process_info->lock); + *vm = (void *) new_vm; - return 0; + pr_debug("Created process vm %p\n", *vm); -validate_pd_fail: - amdgpu_bo_unreserve(vm->root.base.bo); -reserve_pd_fail: - vm->process_info = NULL; - if (info) { - /* Two fence references: one in info and one in *ef */ - dma_fence_put(&info->eviction_fence->base); - dma_fence_put(*ef); - *ef = NULL; - *process_info = NULL; -create_evict_fence_fail: - kfree(info); - } return ret; -} - -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - void **process_info, - struct dma_fence **ef) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *new_vm; - int ret; - - new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); - if (!new_vm) - return -ENOMEM; - - /* Initialize AMDGPU part of the VM */ - ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); - if (ret) { - pr_err("Failed init vm ret %d\n", ret); - goto amdgpu_vm_init_fail; - } - /* Initialize KFD part of the VM and process info */ - ret = init_kfd_vm(new_vm, process_info, ef); - if (ret) - goto init_kfd_vm_fail; - - *vm = (void *) new_vm; - - return 0; - -init_kfd_vm_fail: +create_evict_fence_fail: + kfree(info); +alloc_process_info_fail: amdgpu_vm_fini(adev, new_vm); -amdgpu_vm_init_fail: +vm_init_fail: kfree(new_vm); return ret; -} -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, - struct file *filp, - void **vm, void **process_info, - struct dma_fence **ef) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct drm_file *drm_priv = filp->private_data; - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; - struct amdgpu_vm *avm = &drv_priv->vm; - int ret; - - /* Convert VM into a compute VM */ - ret = amdgpu_vm_make_compute(adev, avm); - if (ret) - return ret; - - /* Initialize KFD part of the VM and process info */ - ret = init_kfd_vm(avm, process_info, ef); - if (ret) - return ret; - - *vm = (void *)avm; - - return 0; } -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) { - struct amdkfd_process_info *process_info = vm->process_info; - struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_bo *pd; + struct amdkfd_process_info *process_info; - if (vm->vm_context != AMDGPU_VM_CONTEXT_COMPUTE) + if (WARN_ON(!kgd || !vm)) return; + pr_debug("Destroying process vm %p\n", vm); /* Release eviction fence from PD */ + pd = avm->root.base.bo; amdgpu_bo_reserve(pd, false); amdgpu_bo_fence(pd, NULL, false); amdgpu_bo_unreserve(pd); - if (!process_info) - return; + process_info = avm->process_info; - /* Update process info */ mutex_lock(&process_info->lock); process_info->n_vms--; - list_del(&vm->vm_list_node); + list_del(&avm->vm_list_node); mutex_unlock(&process_info->lock); - /* Release per-process resources when last compute VM is destroyed */ + /* Release per-process resources */ if (!process_info->n_vms) { WARN_ON(!list_empty(&process_info->kfd_bo_list)); WARN_ON(!list_empty(&process_info->userptr_valid_list)); @@ -1132,17 +1095,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, put_pid(process_info->pid); kfree(process_info); } -} - -void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) -{ - struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; - - if (WARN_ON(!kgd || !vm)) - return; - - pr_debug("Destroying process vm %p\n", vm); /* Release the VM context */ amdgpu_vm_fini(adev, avm); @@ -1165,7 +1117,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; uint64_t user_addr = 0; struct sg_table *sg = NULL; - enum ttm_bo_type bo_type = ttm_bo_type_device; struct amdgpu_bo *bo; int byte_align; u32 domain, alloc_domain; @@ -1193,15 +1144,13 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( return -EINVAL; user_addr = *offset; } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { - domain = AMDGPU_GEM_DOMAIN_GTT; - alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; if (size > UINT_MAX) return -EINVAL; sg = create_doorbell_sg(*offset, size); if (!sg) return -ENOMEM; - bo_type = ttm_bo_type_sg; } else { return -EINVAL; } @@ -1242,13 +1191,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - if (!sg) { - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, - alloc_domain); - if (ret) { - pr_debug("Insufficient system memory\n"); - goto err_reserve_limit; - } + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + if (ret) { + pr_debug("Insufficient system memory\n"); + goto err_reserve_limit; } pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", @@ -1257,22 +1203,13 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( /* Allocate buffer object. Userptr objects need to start out * in the CPU domain, get moved to GTT when pinned. */ -#if 0 - ret = amdgpu_bo_create(adev, size, byte_align, alloc_domain, - alloc_flags, bo_type, NULL, &bo); -#else - ret = amdgpu_bo_create(adev, size, byte_align, false , alloc_domain, - alloc_flags, sg , NULL, &bo); -#endif + ret = amdgpu_bo_create(adev, size, byte_align, false, + alloc_domain, alloc_flags, sg, NULL, &bo); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); goto err_bo_create; } - if (bo_type == ttm_bo_type_sg) { - bo->tbo.sg = sg; - bo->tbo.ttm->sg = sg; - } bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) @@ -1302,8 +1239,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: amdgpu_bo_unref(&bo); err_bo_create: - if (!sg) - unreserve_system_mem_limit(adev, size, alloc_domain); + unreserve_system_mem_limit(adev, size, alloc_domain); err_reserve_limit: kfree(*mem); err: @@ -1352,7 +1288,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( pr_debug("%s: Freeing user_pages array\n", __func__); if (mem->user_pages[0]) release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages, 0); + mem->bo->tbo.ttm->num_pages, 0); kvfree(mem->user_pages); } @@ -1607,14 +1543,11 @@ int amdgpu_amdkfd_gpuvm_sync_memory( { struct amdgpu_sync sync; int ret; - struct amdgpu_device *adev; - - adev = get_amdgpu_device(kgd); amdgpu_sync_create(&sync); mutex_lock(&mem->lock); - amdgpu_sync_clone(adev , &mem->sync, &sync); + amdgpu_sync_clone(&mem->sync, &sync); mutex_unlock(&mem->lock); ret = amdgpu_sync_wait(&sync, intr); @@ -1758,8 +1691,7 @@ static int get_sg_table(struct amdgpu_device *adev, goto out; if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) { - bus_addr = amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start - + adev->gmc.aper_base + offset; + bus_addr = bo->tbo.offset + adev->gmc.aper_base + offset; for_each_sg(sg->sgl, s, sg->orig_nents, i) { uint64_t chunk_size, length; @@ -1890,22 +1822,13 @@ int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_dev *kgd, void *vm, struct dma_buf **dmabuf) { struct amdgpu_device *adev = NULL; - struct amdgpu_bo *bo = NULL; - struct drm_gem_object *gobj = NULL; if (!dmabuf || !kgd || !vm || !mem) return -EINVAL; adev = get_amdgpu_device(kgd); - bo = mem->bo; - - gobj = amdgpu_gem_prime_foreign_bo(adev, bo); - if (gobj == NULL) { - pr_err("Export BO failed. Unable to find/create GEM object\n"); - return -EINVAL; - } - *dmabuf = amdgpu_gem_prime_export(adev->ddev, gobj, 0); + *dmabuf = amdgpu_gem_prime_export(adev->ddev, &mem->bo->gem_base, 0); return 0; } @@ -2006,8 +1929,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return -ENOMEM; } } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, - bo->tbo.ttm->num_pages, 0); + release_pages(mem->user_pages, bo->tbo.ttm->num_pages, 0); } /* Get updated user pages */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index e3e5646..3168565 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -185,7 +185,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) continue; - r = kcl_reservation_object_wait_timeout_rcu(bo->tbo.resv, + r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 77f15fe..f3b4241 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2506,7 +2506,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* First Compute VM: enable compute power profile */ if (adev->powerplay.pp_funcs->switch_power_profile) amdgpu_dpm_switch_power_profile(adev, - AMD_PP_COMPUTE_PROFILE); + PP_SMC_POWER_PROFILE_COMPUTE, true); } mutex_unlock(&id_mgr->lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3e28398..b603f40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4573,7 +4573,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; cu_info->simd_per_cu = NUM_SIMD_PER_CU; - cu_info->simd_per_cu = NUM_SIMD_PER_CU; return 0; } -- 2.7.4