From b1a00535b81c79595ef183036978c585518d59ab Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 24 Jan 2017 20:41:45 -0500 Subject: [PATCH 1580/4131] drm/amdgpu: Fix handling of userptrs around process termination Delayed workers can update userptr BOs during or shortly after process termination. That means task_structs and mm_struct may be invalid when amdgpu_ttm_tt_get_user_pages is called. Instead of a task_struct pointer, store a struct pid reference and use it to get a safe task reference (or NULL if the process has already terminated). Return -ESRCH to indicate to the caller when process termination is detected. Increment the reference counter of the mm_struct while the restore delayed work is queued to ensure the mm_struct reference is valid until the worker is finished. Change-Id: I7c4c7745bc9da281f30ad02355f5c70de0a52823 Signed-off-by: Felix Kuehling Conflicts: drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 87 ++++++++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 ++++++------ 5 files changed, 98 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 07150a6..05a627a 100755 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -176,6 +176,43 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev) return r; } +/* Cancel any scheduled restore work or wait for it to finish. Must be + * called with the mem->lock held. First drop the mm reference. If the + * worker has already started, it will detect that mm was dropped and + * cancel itself. + * + * If the worker has already started, it needs to take the + * mem->lock. To prevent deadlocks, we need to briefly drop the lock + * while waiting. During that time someone else may schedule another + * restore. So repeat the process if necessary. + * + * mmput needs to be called without holding the lock to prevent + * circular lock dependencies. + */ +static void cancel_restore_locked(struct kgd_mem *mem) +{ + struct mm_struct *mm; + + while (mem->mm) { + mm = mem->mm; + mem->mm = NULL; + + mutex_unlock(&mem->lock); + + mmput(mm); + cancel_delayed_work_sync(&mem->work); + + mutex_lock(&mem->lock); + } +} + +void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem) +{ + mutex_lock(&mem->lock); + cancel_restore_locked(mem); + mutex_unlock(&mem->lock); +} + int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem, struct mm_struct *mm) { @@ -186,11 +223,12 @@ int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem, mutex_lock(&mem->lock); - if (mem->evicted == 1 && delayed_work_pending(&mem->work)) - /* Cancelling a scheduled restoration */ - cancel_delayed_work(&mem->work); - if (++mem->evicted > 1) { + /* Memory was already evicted. It may have been + * scheduled for restoration, but that restoration + * hasn't happened yet. When the worker starts it will + * know and abort. + */ mutex_unlock(&mem->lock); return 0; } @@ -223,14 +261,25 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work) adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); mm = mem->mm; - /* Restoration may have been canceled by another eviction or - * could already be done by a restore scheduled earlier */ + /* Check if restore was canceled */ + if (!mm) { + mutex_unlock(&mem->lock); + return; + } + + /* Only restore if no other eviction happened since restore + * was scheduled. + */ if (mem->evicted == 1) { amdgpu_amdkfd_gpuvm_restore_mem(mem, mm); mem->evicted = 0; } + mem->mm = NULL; + mutex_unlock(&mem->lock); + + mmput(mm); } int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, @@ -256,27 +305,23 @@ int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, return 0; } - /* mem->evicted is 1 after decrememting. Schedule - * restoration. */ - if (delayed_work_pending(&mem->work)) - cancel_delayed_work(&mem->work); - mem->mm = mm; - INIT_DELAYED_WORK(&mem->work, - amdgdu_amdkfd_restore_mem_worker); - schedule_delayed_work(&mem->work, delay); + /* mem->evicted is 1 after decrementing. If a restoration was + * already scheduled, just let it do its job. Otherwise + * schedule another one. + */ + if (!mem->mm) { + mem->mm = mm; + atomic_inc(&mm->mm_users); + INIT_DELAYED_WORK(&mem->work, + amdgdu_amdkfd_restore_mem_worker); + schedule_delayed_work(&mem->work, delay); + } mutex_unlock(&mem->lock); return r; } -void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev, - struct kgd_mem *mem) -{ - if (delayed_work_pending(&mem->work)) - cancel_delayed_work_sync(&mem->work); -} - int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a9f877a..9ac3b6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -128,8 +128,7 @@ int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, struct kgd_mem *mem, struct mm_struct *mm, unsigned long delay); -void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev, - struct kgd_mem *mem); +void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 034bf91..370daae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1111,8 +1111,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( * be freed anyway */ amdgpu_mn_unregister(mem->bo); - if (mem->work.work.func) - cancel_delayed_work_sync(&mem->work); + amdgpu_amdkfd_cancel_restore_mem(mem); ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx); if (unlikely(ret != 0)) @@ -1923,6 +1922,9 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm) have_pages = !ret; if (!have_pages) { unreserve_bo_and_vms(&ctx, false); + if (ret == -ESRCH) + /* process terminating, fail quiet and fast */ + return ret; pr_err("get_user_pages failed. Probably userptr is freed. %d\n", ret); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 40e431a..990e6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -89,8 +89,7 @@ static void amdgpu_mn_destroy(struct work_struct *work) bo->mn = NULL; list_del_init(&bo->mn_list); if (rmn->type == AMDGPU_MN_TYPE_HSA) - amdgpu_amdkfd_cancel_restore_mem( - adev, bo->kfd_bo); + amdgpu_amdkfd_cancel_restore_mem(bo->kfd_bo); } kfree(node); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b028f23..48e74d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -697,7 +697,8 @@ struct amdgpu_ttm_tt { struct amdgpu_device *adev; u64 offset; uint64_t userptr; - struct task_struct *usertask; + struct mm_struct *usermm; + struct pid *userpid; uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; @@ -709,30 +710,34 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct mm_struct *mm = gtt->usertask->mm; unsigned int flags = 0; unsigned pinned = 0; + struct task_struct *usertask; int r; - if (!mm) /* Happens during process shutdown */ - return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) flags |= FOLL_WRITE; - down_read(&mm->mmap_sem); - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory to prevent problems with writeback */ unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); + vma = find_vma(gtt->usermm, gtt->userptr); + if (!vma || vma->vm_file || vma->vm_end < end) return -EPERM; - } + } + + if (!gtt->userpid) + return -EINVAL; + usertask = get_pid_task(gtt->userpid, PIDTYPE_PID); + if (!usertask) + return -ESRCH; + if (usertask->mm != gtt->usermm) { + /* Happens during process shutdown */ + put_task_struct(usertask); + return -ESRCH; } do { @@ -758,13 +763,13 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) pinned += r; } while (pinned < ttm->num_pages); - - up_read(&mm->mmap_sem); + + put_task_struct(usertask); return 0; release_pages: release_pages(pages, pinned, 0); - up_read(&mm->mmap_sem); + put_task_struct(usertask); return r; } @@ -1016,6 +1021,9 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + + if (gtt->userpid) + put_pid(gtt->userpid); ttm_dma_tt_fini(>t->ttm); kfree(gtt); @@ -1118,6 +1126,8 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->userptr = addr; gtt->usertask = current->group_leader; + gtt->usermm = current->mm; + gtt->userpid = get_task_pid(current->group_leader, PIDTYPE_PID); gtt->userflags = flags; spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); @@ -1133,11 +1143,8 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) if (gtt == NULL) return NULL; - - if (gtt->usertask == NULL) - return NULL; - - return gtt->usertask->mm; + + return gtt->usermm; } bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, -- 2.7.4