diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1640-drm-amdgpu-New-HSA-MMU-notifiers-to-work-under-memor.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1640-drm-amdgpu-New-HSA-MMU-notifiers-to-work-under-memor.patch | 1434 |
1 files changed, 0 insertions, 1434 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1640-drm-amdgpu-New-HSA-MMU-notifiers-to-work-under-memor.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1640-drm-amdgpu-New-HSA-MMU-notifiers-to-work-under-memor.patch deleted file mode 100644 index faa756f6..00000000 --- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1640-drm-amdgpu-New-HSA-MMU-notifiers-to-work-under-memor.patch +++ /dev/null @@ -1,1434 +0,0 @@ -From 8683beb2f68e2116a28f5a922ec9d7eb81c35faa Mon Sep 17 00:00:00 2001 -From: Felix Kuehling <Felix.Kuehling@amd.com> -Date: Tue, 21 Mar 2017 11:55:47 -0400 -Subject: [PATCH 1640/4131] drm/amdgpu: New HSA MMU notifiers to work under - memory pressure - -MMU notifiers can be invoked in reclaim-fs context under memory -pressure. Any locks held in this case are subject to limitations to -prevent potential deadlocks: memory allocations performed while -holding those locks in any context must not cause memory reclaims. - -Particularly reservation objects are problematic in this respect, -because they are locked (reserved) in too many places, potentially -even other drivers, to enforce such limitations. - -This commit rewrites the HSA MMU notifier to avoid locking reservation -objects. To allow this, the MMU notifier cannot take the current -mapping state of a BO into account. MMU notifiers on HSA userptr -BOs cause evictions on all GPUs. The only locks taken by the new -MMU notifiers are the rmn->lock and the KFD DQM lock. - -Restore is still done by a delayed worker, but handles multiple BOs -in a single per-process worker, rather than per-BO workers. - -Change-Id: I011fd11c95747caa117a592845ba0a85dc6e77be -Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 170 ----- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 27 +- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 843 +++++++++++++---------- - drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 7 +- - 4 files changed, 480 insertions(+), 567 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -index 3a1776b..f27d67bc 100755 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -@@ -202,176 +202,6 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev) - return r; - } - --/* Cancel any scheduled restore work or wait for it to finish. Must be -- * called with the mem->lock held. First drop the mm reference. If the -- * worker has already started, it will detect that mm was dropped and -- * cancel itself. -- * -- * If the worker has already started, it needs to take the -- * mem->lock. To prevent deadlocks, we need to briefly drop the lock -- * while waiting. During that time someone else may schedule another -- * restore. So repeat the process if necessary. -- * -- * mmput needs to be called without holding the lock to prevent -- * circular lock dependencies. -- */ --static void cancel_restore_locked(struct kgd_mem *mem) --{ -- struct mm_struct *mm; -- -- while (mem->mm) { -- /* update_user_pages needs to drop the lock -- * briefly. Therefore holding the lock is no guarantee -- * that no restore is in progress -- */ -- if (mem->busy) { -- mutex_unlock(&mem->lock); -- schedule_timeout_uninterruptible(1); -- mutex_lock(&mem->lock); -- continue; -- } -- -- mm = mem->mm; -- mem->mm = NULL; -- -- mutex_unlock(&mem->lock); -- -- mmput(mm); -- cancel_delayed_work_sync(&mem->work); -- -- mutex_lock(&mem->lock); -- } --} -- --void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem) --{ -- mutex_lock(&mem->lock); -- cancel_restore_locked(mem); -- mutex_unlock(&mem->lock); --} -- --int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem, -- struct mm_struct *mm) --{ -- int r; -- -- if (!adev->kfd) -- return -ENODEV; -- -- mutex_lock(&mem->lock); -- -- if (++mem->evicted > 1) { -- /* Memory was already evicted. It may have been -- * scheduled for restoration, but that restoration -- * hasn't happened yet. When the worker starts it will -- * know and abort. -- */ -- mutex_unlock(&mem->lock); -- return 0; -- } -- -- r = amdgpu_amdkfd_gpuvm_evict_mem(mem, mm); -- -- if (r != 0) -- /* First eviction failed, setting count back to 0 will -- * make the corresponding restore fail gracefully */ -- mem->evicted = 0; -- else -- /* First eviction counts as 2. Eviction counter == 1 -- * means that restoration is scheduled. */ -- mem->evicted = 2; -- -- mutex_unlock(&mem->lock); -- -- return r; --} -- --static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work) --{ -- struct delayed_work *dwork = to_delayed_work(work); -- struct kgd_mem *mem = container_of(dwork, struct kgd_mem, work); -- struct amdgpu_device *adev; -- struct mm_struct *mm; -- int ret = 0; -- -- mutex_lock(&mem->lock); -- -- adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); -- mm = mem->mm; -- -- /* Check if restore was canceled */ -- if (!mm) { -- mutex_unlock(&mem->lock); -- return; -- } -- -- /* Only restore if no other eviction happened since restore -- * was scheduled. -- */ -- if (mem->evicted == 1) { -- ret = amdgpu_amdkfd_gpuvm_restore_mem(mem, mm); -- if (ret != -EBUSY && ret != -EDEADLK) -- mem->evicted = 0; -- } -- -- /* If restore failed due to the VM being updated concurrently, -- * reschedule restore again in a jiffie -- */ -- if (ret == -EDEADLK && mem->evicted == 1) { -- pr_err("Rescheduling restore\n"); -- mm = NULL; -- schedule_delayed_work(&mem->work, 1); -- } else { -- BUG_ON(mem->mm != mm); -- mem->mm = NULL; -- } -- -- mutex_unlock(&mem->lock); -- -- if (mm) -- mmput(mm); --} -- --int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, -- struct kgd_mem *mem, -- struct mm_struct *mm, -- unsigned long delay) --{ -- int r = 0; -- -- if (!adev->kfd) -- return -ENODEV; -- -- mutex_lock(&mem->lock); -- -- if (mem->evicted <= 1) { -- /* Buffer is not evicted (== 0) or its restoration is -- * already scheduled (== 1) */ -- pr_err("Unbalanced restore of evicted buffer %p\n", mem); -- mutex_unlock(&mem->lock); -- return -EFAULT; -- } else if (--mem->evicted > 1) { -- mutex_unlock(&mem->lock); -- return 0; -- } -- -- /* mem->evicted is 1 after decrementing. If a restoration was -- * already scheduled, just let it do its job. Otherwise -- * schedule another one. -- */ -- if (!mem->mm) { -- mem->mm = mm; -- atomic_inc(&mm->mm_users); -- INIT_DELAYED_WORK(&mem->work, -- amdgdu_amdkfd_restore_mem_worker); -- schedule_delayed_work(&mem->work, delay); -- } -- -- mutex_unlock(&mem->lock); -- -- return r; --} -- - int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, - uint32_t vmid, uint64_t gpu_addr, - uint32_t *ib_cmd, uint32_t ib_len) -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -index a6a538ac..9e89aee 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -@@ -57,17 +57,18 @@ struct kgd_mem { - unsigned int mapped_to_gpu_memory; - void *kptr; - uint64_t va; -- unsigned int evicted; /* eviction counter */ -- struct delayed_work work; /* for restore evicted mem */ -- struct mm_struct *mm; /* for restore */ - - uint32_t mapping_flags; - -+ atomic_t invalid; -+ struct amdkfd_process_info *process_info; -+ struct page **user_pages; -+ -+ - /* flags bitfield */ - bool coherent : 1; - bool no_substitute : 1; - bool aql_queue : 1; -- bool busy : 1; - }; - - -@@ -89,6 +90,9 @@ struct amdkfd_process_info { - struct list_head vm_list_head; - /* List head for all KFD BOs that belong to a KFD process. */ - struct list_head kfd_bo_list; -+ /* List of userptr BOs that are valid or invalid */ -+ struct list_head userptr_valid_list; -+ struct list_head userptr_inval_list; - /* Lock to protect kfd_bo_list */ - struct mutex lock; - -@@ -96,6 +100,11 @@ struct amdkfd_process_info { - unsigned int n_vms; - /* Eviction Fence */ - struct amdgpu_amdkfd_fence *eviction_fence; -+ -+ /* MMU-notifier related fields */ -+ atomic_t evicted_bos; -+ struct delayed_work work; -+ struct pid *pid; - }; - - /* struct amdkfd_vm - -@@ -130,13 +139,9 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); - void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); - void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); - --int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem, -- struct mm_struct *mm); --int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, -- struct kgd_mem *mem, -- struct mm_struct *mm, -- unsigned long delay); --void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem); -+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); -+int amdgpu_amdkfd_schedule_restore_userptr(struct kgd_mem *mem, -+ unsigned long delay); - int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, - uint32_t vmid, uint64_t gpu_addr, - uint32_t *ib_cmd, uint32_t ib_len); -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -index 29ca428..443348e 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -@@ -74,6 +74,8 @@ static const char * const domain_bit_to_string[] = { - - #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] - -+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); -+ - - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) - { -@@ -389,44 +391,28 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, - static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, - bool wait) - { -- int ret = 0; -+ int ret; - -- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { -- amdgpu_ttm_placement_from_domain(bo, domain); -+ if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), -+ "Called with userptr BO")) -+ return -EINVAL; - -- ret = ttm_bo_validate(&bo->tbo, &bo->placement, -- false, false); -+ amdgpu_ttm_placement_from_domain(bo, domain); -+ -+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); -+ if (ret) -+ goto validate_fail; -+ if (wait) { -+ struct amdgpu_amdkfd_fence **ef_list; -+ unsigned int ef_count; -+ -+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, -+ &ef_count); - if (ret) - goto validate_fail; -- if (wait) { -- struct amdgpu_amdkfd_fence **ef_list; -- unsigned int ef_count; -- -- ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, -- &ef_list, -- &ef_count); -- if (ret) -- goto validate_fail; -- -- ttm_bo_wait(&bo->tbo, false, false); -- amdgpu_amdkfd_add_eviction_fence(bo, ef_list, -- ef_count); -- } -- } else { -- amdgpu_ttm_placement_from_domain(bo, domain); -- ret = ttm_bo_validate(&bo->tbo, &bo->placement, -- true, false); -- if (ret) { -- /* Don't leak the pages. If validate failed, -- * the pages aren't bound, and won't be -- * released by unbind later on -- */ -- release_pages(bo->tbo.ttm->pages, -- bo->tbo.ttm->num_pages, 0); -- goto validate_fail; -- } -- if (wait) -- ttm_bo_wait(&bo->tbo, false, false); -+ -+ ttm_bo_wait(&bo->tbo, false, false); -+ amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); - } - - validate_fail: -@@ -440,19 +426,6 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) - return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); - } - --static int amdgpu_amdkfd_bo_invalidate(struct amdgpu_bo *bo) --{ -- int ret = 0; -- -- if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { -- amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); -- ret = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); -- if (ret != 0) -- pr_err("Failed to invalidate userptr BO\n"); -- } -- return ret; --} -- - static int validate_pt_pd_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm) - { - struct amdgpu_bo *pd = vm->root.bo; -@@ -479,7 +452,8 @@ static int validate_pt_pd_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm) - } - - static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, -- struct amdkfd_process_info *process_info) -+ struct amdkfd_process_info *process_info, -+ bool userptr) - { - struct ttm_validate_buffer *entry = &mem->validate_list; - struct amdgpu_bo *bo = mem->bo; -@@ -488,10 +462,97 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, - entry->shared = true; - entry->bo = &bo->tbo; - mutex_lock(&process_info->lock); -- list_add_tail(&entry->head, &process_info->kfd_bo_list); -+ if (userptr) -+ list_add_tail(&entry->head, &process_info->userptr_valid_list); -+ else -+ list_add_tail(&entry->head, &process_info->kfd_bo_list); - mutex_unlock(&process_info->lock); - } - -+/* Initializes user pages. It registers the MMU notifier and validates -+ * the userptr BO in the GTT domain. -+ * -+ * The BO must already be on the userptr_valid_list. Otherwise an -+ * eviction and restore may happen that leaves the new BO unmapped -+ * with the user mode queues running. -+ * -+ * Takes the process_info->lock to protect against concurrent restore -+ * workers. -+ * -+ * Returns 0 for success, negative errno for errors. -+ */ -+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, -+ uint64_t user_addr) -+{ -+ struct amdkfd_process_info *process_info = mem->process_info; -+ struct amdgpu_bo *bo = mem->bo; -+ int ret = 0; -+ -+ mutex_lock(&process_info->lock); -+ -+ ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); -+ if (ret) { -+ pr_err("%s: Failed to set userptr: %d\n", __func__, ret); -+ goto out; -+ } -+ -+ ret = amdgpu_mn_register(bo, user_addr); -+ if (ret) { -+ pr_err("%s: Failed to register MMU notifier: %d\n", -+ __func__, ret); -+ goto out; -+ } -+ -+ /* If no restore worker is running concurrently, user_pages -+ * should not be allocated -+ */ -+ WARN(mem->user_pages, "Leaking user_pages array"); -+ -+ mem->user_pages = drm_calloc_large(bo->tbo.ttm->num_pages, -+ sizeof(struct page *)); -+ if (!mem->user_pages) { -+ pr_err("%s: Failed to allocate pages array\n", __func__); -+ ret = -ENOMEM; -+ goto unregister_out; -+ } -+ -+ down_read(&mm->mmap_sem); -+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); -+ up_read(&mm->mmap_sem); -+ if (ret) { -+ pr_err("%s: Failed to get user pages\n", __func__); -+ goto free_out; -+ } -+ -+ memcpy(bo->tbo.ttm->pages, mem->user_pages, -+ sizeof(struct page *) * bo->tbo.ttm->num_pages); -+ -+ ret = amdgpu_bo_reserve(bo, true); -+ if (ret) { -+ pr_err("%s: Failed to reserve BO\n", __func__); -+ goto release_out; -+ } -+ amdgpu_ttm_placement_from_domain(bo, mem->domain); -+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, -+ true, false); -+ if (ret) -+ pr_err("%s: failed to validate BO\n", __func__); -+ amdgpu_bo_unreserve(bo); -+ -+release_out: -+ if (ret) -+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages, 0); -+free_out: -+ drm_free_large(mem->user_pages); -+ mem->user_pages = NULL; -+unregister_out: -+ if (ret) -+ amdgpu_mn_unregister(bo); -+out: -+ mutex_unlock(&process_info->lock); -+ return ret; -+} -+ - static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, - uint64_t size, void *vm, struct kgd_mem **mem, - uint64_t *offset, void **kptr, -@@ -578,21 +639,6 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, - if (userptr) - bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; - -- if (userptr) { -- ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); -- if (ret) { -- pr_err("Failed to set userptr. ret %d\n", ret); -- goto allocate_mem_set_userptr_failed; -- } -- -- ret = amdgpu_mn_register(bo, user_addr); -- if (ret) { -- pr_err("Failed to register MMU notifier %d\n", -- ret); -- goto allocate_mem_set_userptr_failed; -- } -- } -- - if (kptr) { - ret = amdgpu_bo_reserve(bo, true); - if (ret) { -@@ -621,7 +667,18 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, - (*mem)->va = va; - (*mem)->domain = domain; - (*mem)->mapped_to_gpu_memory = 0; -- add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); -+ (*mem)->process_info = kfd_vm->process_info; -+ add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, userptr); -+ -+ if (userptr) { -+ ret = init_user_pages(*mem, current->mm, user_addr); -+ if (ret) { -+ mutex_lock(&kfd_vm->process_info->lock); -+ list_del(&(*mem)->validate_list.head); -+ mutex_unlock(&kfd_vm->process_info->lock); -+ goto allocate_init_user_pages_failed; -+ } -+ } - - if (offset) - *offset = amdgpu_bo_mmap_offset(bo); -@@ -633,9 +690,8 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va, - allocate_mem_pin_bo_failed: - amdgpu_bo_unreserve(bo); - allocate_mem_reserve_bo_failed: -- if (userptr) -- amdgpu_mn_unregister(bo); --allocate_mem_set_userptr_failed: -+ -+allocate_init_user_pages_failed: - amdgpu_bo_unref(&bo); - err_bo_create: - kfree(*mem); -@@ -807,90 +863,6 @@ static void unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, - ctx->vm_pd = NULL; - } - --/* Must be called with mem->lock held and a BO/VM reservation -- * context. Temporarily drops the lock and reservation for updating -- * user pointers, to avoid circular lock dependencies between MM locks -- * and buffer reservations. If user pages are invalidated while the -- * lock and reservation are dropped, try again. */ --static int update_user_pages(struct kgd_mem *mem, struct mm_struct *mm, -- struct bo_vm_reservation_context *ctx) --{ -- struct amdgpu_bo *bo; -- unsigned tries = 10; -- int ret; -- -- bo = mem->bo; -- if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) -- return 0; -- -- if (bo->tbo.ttm->state != tt_bound) { -- struct page **pages; -- int invalidated; -- -- /* get user pages without locking the BO to avoid -- * circular lock dependency with MMU notifier. Retry -- * until we have the current version. */ -- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); -- ctx->reserved = false; -- pages = drm_calloc_large(bo->tbo.ttm->num_pages, -- sizeof(struct page *)); -- if (!pages) -- return -ENOMEM; -- -- mem->busy = true; -- mutex_unlock(&mem->lock); -- -- while (true) { -- down_read(&mm->mmap_sem); -- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, pages); -- up_read(&mm->mmap_sem); -- -- mutex_lock(&mem->lock); -- mem->busy = false; -- if (ret != 0) -- return ret; -- -- BUG_ON(bo != mem->bo); -- -- ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, -- false, &ctx->duplicates); -- if (unlikely(ret != 0)) { -- release_pages(pages, bo->tbo.ttm->num_pages, 0); -- drm_free_large(pages); -- return ret; -- } -- ctx->reserved = true; -- if (!amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, -- &invalidated) || -- bo->tbo.ttm->state == tt_bound || -- --tries == 0) -- break; -- -- release_pages(pages, bo->tbo.ttm->num_pages, 0); -- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); -- ctx->reserved = false; -- mem->busy = true; -- mutex_unlock(&mem->lock); -- } -- -- /* If someone else already bound it, release our pages -- * array, otherwise copy it into the ttm BO. */ -- if (bo->tbo.ttm->state == tt_bound || tries == 0) -- release_pages(pages, bo->tbo.ttm->num_pages, 0); -- else -- memcpy(bo->tbo.ttm->pages, pages, -- sizeof(struct page *) * bo->tbo.ttm->num_pages); -- drm_free_large(pages); -- } -- -- if (tries == 0) { -- pr_err("Gave up trying to update user pages\n"); -- return -EDEADLK; -- } -- -- return 0; --} -- - static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, - struct amdgpu_sync *sync) -@@ -976,7 +948,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, - } - - static int map_bo_to_gpuvm(struct amdgpu_device *adev, -- struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) -+ struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, -+ bool no_update_pte) - { - int ret; - struct amdgpu_bo *bo = entry->bo_va->bo; -@@ -1025,6 +998,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, - /* Add the eviction fence back */ - amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); - -+ if (no_update_pte) -+ return 0; -+ - ret = update_gpuvm_pte(adev, entry, sync); - if (ret != 0) { - pr_err("update_gpuvm_pte() failed\n"); -@@ -1169,8 +1145,23 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - /* lock is not needed after this, since mem is unused and will - * be freed anyway */ - -+ /* No more MMU notifiers */ - amdgpu_mn_unregister(mem->bo); -- amdgpu_amdkfd_cancel_restore_mem(mem); -+ -+ /* Make sure restore workers don't access the BO any more */ -+ bo_list_entry = &mem->validate_list; -+ mutex_lock(&process_info->lock); -+ list_del(&bo_list_entry->head); -+ mutex_unlock(&process_info->lock); -+ -+ /* Free user pages if necessary */ -+ if (mem->user_pages) { -+ pr_debug("%s: Freeing user_pages array\n", __func__); -+ if (mem->user_pages[0]) -+ release_pages(mem->user_pages, -+ mem->bo->tbo.ttm->num_pages, 0); -+ drm_free_large(mem->user_pages); -+ } - - ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx); - if (unlikely(ret != 0)) -@@ -1203,11 +1194,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - } - - /* Free the BO*/ -- bo_list_entry = &mem->validate_list; -- mutex_lock(&process_info->lock); -- list_del(&bo_list_entry->head); -- mutex_unlock(&process_info->lock); -- - amdgpu_bo_unref(&mem->bo); - kfree(mem); - -@@ -1226,14 +1212,28 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kfd_bo_va_list *bo_va_entry = NULL; - struct kfd_bo_va_list *bo_va_entry_aql = NULL; - struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; -- int num_to_quiesce = 0; - unsigned long bo_size; -+ bool is_invalid_userptr; - - BUG_ON(kgd == NULL); - BUG_ON(mem == NULL); - - adev = get_amdgpu_device(kgd); - -+ /* Make sure restore is not running concurrently. Since we -+ * don't map invalid userptr BOs, we rely on the next restore -+ * worker to do the mapping -+ */ -+ mutex_lock(&mem->process_info->lock); -+ -+ /* Lock mmap-sem. If we find an invalid userptr BO, we can be -+ * sure that the MMU notifier is no longer running -+ * concurrently and the queues are actually stopped -+ */ -+ down_read(¤t->mm->mmap_sem); -+ is_invalid_userptr = atomic_read(&mem->invalid); -+ up_read(¤t->mm->mmap_sem); -+ - mutex_lock(&mem->lock); - - bo = mem->bo; -@@ -1252,6 +1252,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - if (unlikely(ret != 0)) - goto bo_reserve_failed; - -+ /* Userptr can be marked as "not invalid", but not actually be -+ * validated yet (still in the system domain). In that case -+ * the queues are still stopped and we can leave mapping for -+ * the next restore worker -+ */ -+ if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) -+ is_invalid_userptr = true; -+ - if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { - ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, - &bo_va_entry); -@@ -1265,13 +1273,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - } - } - -- if (mem->mapped_to_gpu_memory == 0 && !mem->evicted) { -- ret = update_user_pages(mem, current->mm, &ctx); -- if (ret != 0) { -- pr_err("update_user_pages failed\n"); -- goto update_user_pages_failed; -- } -- -+ if (mem->mapped_to_gpu_memory == 0 && -+ !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - /* Validate BO only once. The eviction fence gets added to BO - * the first time it is mapped. Validate will wait for all - * background evictions to complete. -@@ -1285,22 +1288,12 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - - list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->vm == vm && !entry->is_mapped) { -- if (mem->evicted) { -- /* If the BO is evicted, just mark the -- * mapping as mapped and the GPU's queues -- * will be stopped later. -- */ -- entry->is_mapped = true; -- mem->mapped_to_gpu_memory++; -- num_to_quiesce++; -- continue; -- } -- - pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", - entry->va, entry->va + bo_size, - entry); - -- ret = map_bo_to_gpuvm(adev, entry, &ctx.sync); -+ ret = map_bo_to_gpuvm(adev, entry, &ctx.sync, -+ is_invalid_userptr); - if (ret != 0) { - pr_err("Failed to map radeon bo to gpuvm\n"); - goto map_bo_to_gpuvm_failed; -@@ -1318,24 +1311,11 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - true); - unreserve_bo_and_vms(&ctx, true); - -- while (num_to_quiesce--) { -- /* Now stop the GPU's queues while bo and VMs are unreserved. -- * quiesce_mm() is reference counted, and that is why we can -- * call it multiple times. -- */ -- ret = kgd2kfd->quiesce_mm(adev->kfd, current->mm); -- if (ret != 0) { -- pr_err("quiesce_mm() failed\n"); -- reserve_bo_and_vm(mem, vm, &ctx); -- goto map_bo_to_gpuvm_failed; -- } -- } -- -+ mutex_unlock(&mem->process_info->lock); - mutex_unlock(&mem->lock); - return ret; - - map_bo_to_gpuvm_failed: --update_user_pages_failed: - if (bo_va_entry_aql) - remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); - add_bo_to_vm_failed_aql: -@@ -1344,6 +1324,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - add_bo_to_vm_failed: - unreserve_bo_and_vms(&ctx, false); - bo_reserve_failed: -+ mutex_unlock(&mem->process_info->lock); - mutex_unlock(&mem->lock); - return ret; - } -@@ -1407,6 +1388,8 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - mutex_init(&info->lock); - INIT_LIST_HEAD(&info->vm_list_head); - INIT_LIST_HEAD(&info->kfd_bo_list); -+ INIT_LIST_HEAD(&info->userptr_valid_list); -+ INIT_LIST_HEAD(&info->userptr_inval_list); - - info->eviction_fence = - amdgpu_amdkfd_fence_create(fence_context_alloc(1), -@@ -1416,6 +1399,12 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - goto create_evict_fence_fail; - } - -+ info->pid = get_task_pid(current->group_leader, -+ PIDTYPE_PID); -+ atomic_set(&info->evicted_bos, 0); -+ INIT_DELAYED_WORK(&info->work, -+ amdgpu_amdkfd_restore_userptr_worker); -+ - *process_info = info; - } - -@@ -1468,9 +1457,15 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) - list_del(&kfd_vm->vm_list_node); - mutex_unlock(&process_info->lock); - -- /* Release eviction fence */ -+ /* Release per-process resources */ - if (!process_info->n_vms) { -+ WARN_ON(!list_empty(&process_info->kfd_bo_list)); -+ WARN_ON(!list_empty(&process_info->userptr_valid_list)); -+ WARN_ON(!list_empty(&process_info->userptr_inval_list)); -+ - fence_put(&process_info->eviction_fence->base); -+ cancel_delayed_work_sync(&process_info->work); -+ put_pid(process_info->pid); - kfree(process_info); - } - -@@ -1521,7 +1516,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - int ret = 0; - struct bo_vm_reservation_context ctx; - struct amdkfd_process_info *process_info; -- int num_to_resume = 0; - unsigned long bo_size; - - BUG_ON(kgd == NULL); -@@ -1561,17 +1555,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - - list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->vm == vm && entry->is_mapped) { -- if (mem->evicted) { -- /* If the BO is evicted, just mark the -- * mapping as unmapped and the GPU's queues -- * will be resumed later. -- */ -- entry->is_mapped = false; -- mem->mapped_to_gpu_memory--; -- num_to_resume++; -- continue; -- } -- - pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", - entry->va, - entry->va + bo_size, -@@ -1593,14 +1576,13 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - } - - /* If BO is unmapped from all VMs, unfence it. It can be evicted if -- * required. User pages of userptr BOs can be released. -+ * required. - */ -- if (mem->mapped_to_gpu_memory == 0) { -+ if (mem->mapped_to_gpu_memory == 0 && -+ !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) - amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence, - NULL, NULL); -- amdgpu_amdkfd_bo_invalidate(mem->bo); -- } - - if (mapped_before == mem->mapped_to_gpu_memory) { - pr_debug("BO VA 0x%llx size 0x%lx is not mapped to vm %p\n", -@@ -1610,21 +1592,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - - unreserve_out: - unreserve_bo_and_vms(&ctx, false); -- -- while (current->mm && num_to_resume--) { -- /* Now resume GPU's queues while bo and VMs are -- * unreserved. This function runs in a work queue -- * during process termination. Only resume queues if -- * we're running in process context. resume_mm() is -- * reference counted, and that is why we can call it -- * multiple times. -- */ -- ret = kgd2kfd->resume_mm(adev->kfd, current->mm); -- if (ret != 0) { -- pr_err("resume_mm() failed.\n"); -- break; -- } -- } - out: - mutex_unlock(&mem->lock); - return ret; -@@ -1875,7 +1842,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, - (*mem)->domain = (bo->prefered_domains & AMDGPU_GEM_DOMAIN_VRAM) ? - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; - (*mem)->mapped_to_gpu_memory = 0; -- add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); -+ (*mem)->process_info = kfd_vm->process_info; -+ add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info, false); - - return 0; - } -@@ -1938,208 +1906,321 @@ static int validate_pd_pt_bos(struct amdkfd_process_info *process_info) - return 0; - } - --/* Runs out of process context. mem->lock must be held. */ --int amdgpu_amdkfd_gpuvm_evict_mem(struct kgd_mem *mem, struct mm_struct *mm) -+/* Evict a userptr BO by stopping the queues if necessary -+ * -+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it -+ * cannot do any memory allocations, and cannot take any locks that -+ * are held elsewhere while allocating memory. Therefore this is as -+ * simple as possible, using atomic counters. -+ * -+ * It doesn't do anything to the BO itself. The real work happens in -+ * restore, where we get updated page addresses. This function only -+ * ensures that GPU access to the BO is stopped. -+ */ -+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, -+ struct mm_struct *mm) - { -- struct kfd_bo_va_list *entry; -- unsigned int n_evicted = 0, n_unmapped = 0; -+ struct amdkfd_process_info *process_info = mem->process_info; -+ int invalid, evicted_bos; - int r = 0; -- struct bo_vm_reservation_context ctx; - -- pr_debug("Evicting buffer %p\n", mem); -- -- if (mem->mapped_to_gpu_memory == 0) -- return 0; -+ invalid = atomic_inc_return(&mem->invalid); -+ evicted_bos = atomic_inc_return(&process_info->evicted_bos); -+ if (evicted_bos == 1) { -+ /* First eviction, stop the queues */ -+ r = kgd2kfd->quiesce_mm(NULL, mm); -+ if (r != 0) -+ pr_err("Failed to quiesce KFD\n"); -+ } - -- /* Remove all GPU mappings of the buffer, but don't change any -- * of the is_mapped flags so we can restore it later. The -- * queues of the affected GPUs are quiesced first. Count the -- * number of evicted mappings so we can roll back if something -- * goes wrong. */ -+ return r; -+} - -- list_for_each_entry(entry, &mem->bo_va_list, bo_list) { -- struct amdgpu_device *adev; -+/* Update invalid userptr BOs -+ * -+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to -+ * userptr_inval_list and updates user pages for all BOs that have -+ * been invalidated since their last update. -+ */ -+static int update_invalid_user_pages(struct amdkfd_process_info *process_info, -+ struct mm_struct *mm) -+{ -+ struct kgd_mem *mem, *tmp_mem; -+ struct amdgpu_bo *bo; -+ int invalid, ret = 0; - -- if (!entry->is_mapped) -- continue; -+ /* Move all invalidated BOs to the userptr_inval_list and -+ * release their user pages by migration to the CPU domain -+ */ -+ list_for_each_entry_safe(mem, tmp_mem, -+ &process_info->userptr_valid_list, -+ validate_list.head) { -+ if (!atomic_read(&mem->invalid)) -+ continue; /* BO is still valid */ - -- adev = (struct amdgpu_device *)entry->kgd_dev; -+ bo = mem->bo; - -- r = kgd2kfd->quiesce_mm(adev->kfd, mm); -- if (r != 0) { -- pr_err("Failed to quiesce KFD\n"); -- goto fail; -+ if (amdgpu_bo_reserve(bo, true)) -+ return -EAGAIN; -+ amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); -+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); -+ amdgpu_bo_unreserve(bo); -+ if (ret) { -+ pr_err("%s: Failed to invalidate userptr BO\n", -+ __func__); -+ return -EAGAIN; - } - -- n_evicted++; -+ list_move_tail(&mem->validate_list.head, -+ &process_info->userptr_inval_list); - } - -- r = reserve_bo_and_cond_vms(mem, NULL, VA_MAPPED, &ctx); -- if (unlikely(r != 0)) -- goto fail; -- -- list_for_each_entry(entry, &mem->bo_va_list, bo_list) { -- struct amdgpu_device *adev; -+ if (list_empty(&process_info->userptr_inval_list)) -+ return 0; /* All evicted userptr BOs were freed */ - -- if (!entry->is_mapped) -+ /* Go through userptr_inval_list and update any invalid user_pages */ -+ down_read(&mm->mmap_sem); -+ list_for_each_entry(mem, &process_info->userptr_inval_list, -+ validate_list.head) { -+ invalid = atomic_read(&mem->invalid); -+ if (!invalid) -+ /* BO hasn't been invalidated since the last -+ * revalidation attempt. Keep its BO list. -+ */ - continue; - -- adev = (struct amdgpu_device *)entry->kgd_dev; -+ bo = mem->bo; -+ -+ if (!mem->user_pages) { -+ mem->user_pages = -+ drm_calloc_large(bo->tbo.ttm->num_pages, -+ sizeof(struct page *)); -+ if (!mem->user_pages) { -+ ret = -ENOMEM; -+ pr_err("%s: Failed to allocate pages array\n", -+ __func__); -+ goto unlock_mmap_out; -+ } -+ } else if (mem->user_pages[0]) { -+ release_pages(mem->user_pages, -+ bo->tbo.ttm->num_pages, 0); -+ } - -- r = unmap_bo_from_gpuvm(adev, entry, &ctx.sync); -- if (r != 0) { -- pr_err("Failed unmap VA 0x%llx\n", -- mem->va); -- unreserve_bo_and_vms(&ctx, true); -- goto fail; -+ /* Get updated user pages */ -+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, -+ mem->user_pages); -+ if (ret) { -+ mem->user_pages[0] = NULL; -+ pr_err("%s: Failed to get user pages\n", __func__); -+ goto unlock_mmap_out; - } - -- n_unmapped++; -+ /* Mark the BO as valid unless it was invalidated -+ * again concurrently -+ */ -+ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) { -+ ret = -EAGAIN; -+ goto unlock_mmap_out; -+ } - } -+unlock_mmap_out: -+ up_read(&mm->mmap_sem); -+ return ret; -+} - -- amdgpu_amdkfd_bo_invalidate(mem->bo); -- -- unreserve_bo_and_vms(&ctx, true); -- -- return 0; -+/* Validate invalid userptr BOs -+ * -+ * Validates BOs on the userptr_inval_list, and moves them back to the -+ * userptr_valid_list. Also updates GPUVM page tables with new page -+ * addresses and waits for the page table updates to complete. -+ */ -+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) -+{ -+ struct amdgpu_bo_list_entry *pd_bo_list_entries; -+ struct list_head resv_list, duplicates; -+ struct ww_acquire_ctx ticket; -+ struct amdgpu_sync sync; - --fail: -- /* To avoid hangs and keep state consistent, roll back partial -- * eviction by restoring queues and marking mappings as -- * unmapped. Access to now unmapped buffers will fault. */ -- list_for_each_entry(entry, &mem->bo_va_list, bo_list) { -- struct amdgpu_device *adev; -+ struct amdkfd_vm *peer_vm; -+ struct kgd_mem *mem, *tmp_mem; -+ struct amdgpu_bo *bo; -+ int i, ret; - -- if (n_evicted == 0) -- break; -- if (!entry->is_mapped) -- continue; -+ pd_bo_list_entries = kcalloc(process_info->n_vms, -+ sizeof(struct amdgpu_bo_list_entry), -+ GFP_KERNEL); -+ if (!pd_bo_list_entries) { -+ pr_err("%s: Failed to allocate PD BO list entries\n", __func__); -+ return -ENOMEM; -+ } - -- if (n_unmapped) { -- entry->is_mapped = false; -- n_unmapped--; -- } -+ INIT_LIST_HEAD(&resv_list); -+ INIT_LIST_HEAD(&duplicates); - -- adev = (struct amdgpu_device *)entry->kgd_dev; -- if (kgd2kfd->resume_mm(adev->kfd, mm)) -- pr_err("Failed to resume KFD\n"); -- n_evicted--; -+ /* Get all the page directory BOs that need to be reserved */ -+ i = 0; -+ list_for_each_entry(peer_vm, &process_info->vm_list_head, -+ vm_list_node) -+ amdgpu_vm_get_pd_bo(&peer_vm->base, &resv_list, -+ &pd_bo_list_entries[i++]); -+ /* Add the userptr_inval_list entries to resv_list */ -+ list_for_each_entry(mem, &process_info->userptr_inval_list, -+ validate_list.head) { -+ list_add_tail(&mem->resv_list.head, &resv_list); -+ mem->resv_list.bo = mem->validate_list.bo; -+ mem->resv_list.shared = mem->validate_list.shared; - } - -- return r; --} -+ /* Reserve all BOs and page tables for validation */ -+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); -+ WARN(!list_empty(&duplicates), "Duplicates should be empty"); -+ if (ret) -+ goto out; - --/* Runs out of process context. mem->lock must be held. */ --int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm) --{ -- struct bo_vm_reservation_context ctx; -- struct kfd_bo_va_list *entry; -- uint32_t domain; -- int r, ret = 0; -- bool have_pages = false; -+ amdgpu_sync_create(&sync); - -- pr_debug("Restoring buffer %p\n", mem); -+ ret = validate_pd_pt_bos(process_info); -+ if (ret) -+ goto unreserve_out; - -- if (mem->mapped_to_gpu_memory == 0) -- return 0; -+ /* Validate BOs and update GPUVM page tables */ -+ list_for_each_entry_safe(mem, tmp_mem, -+ &process_info->userptr_inval_list, -+ validate_list.head) { -+ struct kfd_bo_va_list *bo_va_entry; - -- domain = mem->domain; -+ bo = mem->bo; - -- ret = reserve_bo_and_cond_vms(mem, NULL, VA_MAPPED, &ctx); -- if (likely(ret == 0)) { -- ret = update_user_pages(mem, mm, &ctx); -- have_pages = !ret; -- if (!have_pages) { -- unreserve_bo_and_vms(&ctx, false); -- if (ret == -ESRCH) -- /* process terminating, fail quiet and fast */ -- return ret; -- else if (ret == -EDEADLK) -- /* Someone else is still updating the -- * VM, let's try again later -- */ -- return ret; -- pr_err("get_user_pages failed. Probably userptr is freed. %d\n", -- ret); -+ /* Copy pages array and validate the BO */ -+ memcpy(bo->tbo.ttm->pages, mem->user_pages, -+ sizeof(struct page *) * bo->tbo.ttm->num_pages); -+ amdgpu_ttm_placement_from_domain(bo, mem->domain); -+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, -+ false, false); -+ if (ret) { -+ pr_err("%s: failed to validate BO\n", __func__); -+ goto unreserve_out; - } -- /* update_user_pages drops the lock briefly. Check if -- * someone else evicted or restored the buffer in the -- * mean time. Return -EBUSY to let the caller know. -+ -+ /* Validate succeeded, now the BO owns the pages, free -+ * our copy of the pointer array. Put this BO back on -+ * the userptr_valid_list. If we need to revalidate -+ * it, we need to start from scratch. - */ -- if (mem->evicted != 1) { -- if (have_pages) -- unreserve_bo_and_vms(&ctx, false); -- return -EBUSY; -- } -- } -+ drm_free_large(mem->user_pages); -+ mem->user_pages = NULL; -+ list_move_tail(&mem->validate_list.head, -+ &process_info->userptr_valid_list); - -- if (have_pages) { -- r = amdgpu_amdkfd_bo_validate(mem->bo, domain, true); -- if (unlikely(r != 0)) { -- pr_err("Failed to validate BO %p\n", mem); -- have_pages = false; -- unreserve_bo_and_vms(&ctx, false); -+ list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { -+ if (!bo_va_entry->is_mapped) -+ continue; -+ -+ ret = update_gpuvm_pte((struct amdgpu_device *) -+ bo_va_entry->kgd_dev, -+ bo_va_entry, &sync); -+ if (ret) { -+ pr_err("%s: update PTE failed\n", __func__); -+ /* make sure this gets validated again */ -+ atomic_inc(&mem->invalid); -+ goto unreserve_out; -+ } - } - } -+unreserve_out: -+ ttm_eu_backoff_reservation(&ticket, &resv_list); -+ amdgpu_sync_wait(&sync); -+ amdgpu_sync_free(&sync); -+out: -+ kfree(pd_bo_list_entries); - -- /* Try to restore all mappings. Mappings that fail to restore -- * will be marked as unmapped. If we failed to get the user -- * pages, all mappings will be marked as unmapped. */ -- list_for_each_entry(entry, &mem->bo_va_list, bo_list) { -- struct amdgpu_device *adev; -- -- if (!entry->is_mapped) -- continue; -- -- adev = (struct amdgpu_device *)entry->kgd_dev; -+ return ret; -+} - -- if (unlikely(!have_pages)) { -- entry->map_fail = true; -- continue; -- } -+/* Worker callback to restore evicted userptr BOs -+ * -+ * Tries to update and validate all userptr BOs. If successful and no -+ * concurrent evictions happened, the queues are restarted. Otherwise, -+ * reschedule for another attempt later. -+ */ -+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) -+{ -+ struct delayed_work *dwork = to_delayed_work(work); -+ struct amdkfd_process_info *process_info = -+ container_of(dwork, struct amdkfd_process_info, work); -+ struct task_struct *usertask; -+ struct mm_struct *mm; -+ int evicted_bos; -+ -+ evicted_bos = atomic_read(&process_info->evicted_bos); -+ if (!evicted_bos) -+ return; - -- r = map_bo_to_gpuvm(adev, entry, &ctx.sync); -- if (unlikely(r != 0)) { -- pr_err("Failed to map BO to gpuvm\n"); -- entry->map_fail = true; -- if (ret == 0) -- ret = r; -- } -+ /* Reference task and mm in case of concurrent process termination */ -+ usertask = get_pid_task(process_info->pid, PIDTYPE_PID); -+ if (!usertask) -+ return; -+ mm = get_task_mm(usertask); -+ if (!mm) { -+ put_task_struct(usertask); -+ return; - } - -- if (have_pages) -- unreserve_bo_and_vms(&ctx, true); -+ mutex_lock(&process_info->lock); - -- /* Resume queues after unreserving the BOs and most -- * importantly, waiting for the BO fences to guarantee that -- * the page table updates have completed. -+ if (update_invalid_user_pages(process_info, mm)) -+ goto unlock_out; -+ /* userptr_inval_list can be empty if all evicted userptr BOs -+ * have been freed. In that case there is nothing to validate -+ * and we can just restart the queues. - */ -- list_for_each_entry(entry, &mem->bo_va_list, bo_list) { -- struct amdgpu_device *adev; -- -- if (!entry->is_mapped) -- continue; -+ if (!list_empty(&process_info->userptr_inval_list)) { -+ if (atomic_read(&process_info->evicted_bos) != evicted_bos) -+ goto unlock_out; /* Concurrent eviction, try again */ - -- /* Mapping failed. To be in a consistent state, mark the -- * buffer as unmapped, but state of the buffer will be -- * not evicted. A vm fault will generated if user space tries -- * to access this buffer. -+ if (validate_invalid_user_pages(process_info)) -+ goto unlock_out; -+ } -+ /* Final check for concurrent evicton and atomic update. If -+ * another eviction happens after successful update, it will -+ * be a first eviction that calls quiesce_mm. The eviction -+ * reference counting inside KFD will handle this case. -+ */ -+ if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != -+ evicted_bos) -+ goto unlock_out; -+ evicted_bos = 0; -+ if (kgd2kfd->resume_mm(NULL, mm)) { -+ pr_err("%s: Failed to resume KFD\n", __func__); -+ /* No recovery from this failure. Probably the CP is -+ * hanging. No point trying again. - */ -- if (entry->map_fail) { -- entry->is_mapped = false; -- mem->mapped_to_gpu_memory--; -- } -- adev = (struct amdgpu_device *)entry->kgd_dev; -- -- r = kgd2kfd->resume_mm(adev->kfd, mm); -- if (r != 0) { -- pr_err("Failed to resume KFD\n"); -- if (ret == 0) -- ret = r; -- } - } -+unlock_out: -+ mutex_unlock(&process_info->lock); -+ mmput(mm); -+ put_task_struct(usertask); - -- return ret; -+ /* If validation failed, reschedule another attempt */ -+ if (evicted_bos) -+ schedule_delayed_work(&process_info->work, 1); -+} -+ -+/* Schedule delayed restoring of userptr BOs -+ * -+ * This runs in an MMU notifier. See limitations above. The scheduled -+ * worker is free of those limitations. Delaying the restore allows -+ * multiple MMU notifiers to happen in rapid succession, for example -+ * when fork COWs many BOs at once. -+ */ -+int amdgpu_amdkfd_schedule_restore_userptr(struct kgd_mem *mem, -+ unsigned long delay) -+{ -+ struct amdkfd_process_info *process_info = mem->process_info; -+ -+ schedule_delayed_work(&process_info->work, delay); -+ -+ return 0; - } - - /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c -index 7076d08..9d78a4f 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c -@@ -88,8 +88,6 @@ static void amdgpu_mn_destroy(struct work_struct *work) - list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { - bo->mn = NULL; - list_del_init(&bo->mn_list); -- if (rmn->type == AMDGPU_MN_TYPE_HSA) -- amdgpu_amdkfd_cancel_restore_mem(bo->kfd_bo); - } - kfree(node); - } -@@ -230,7 +228,7 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, - - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - start, end)) -- amdgpu_amdkfd_evict_mem(amdgpu_ttm_adev(bo->tbo.bdev), mem, mm); -+ amdgpu_amdkfd_evict_userptr(mem, mm); - } - } - -@@ -277,8 +275,7 @@ static void amdgpu_mn_invalidate_range_end_hsa(struct mmu_notifier *mn, - - if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - start, end)) -- amdgpu_amdkfd_schedule_restore_mem(amdgpu_ttm_adev(bo->tbo.bdev), -- mem, mm, 1); -+ amdgpu_amdkfd_schedule_restore_userptr(mem, 1); - } - } - --- -2.7.4 - |