diff options
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch')
-rw-r--r-- | meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch | 328 |
1 files changed, 0 insertions, 328 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch deleted file mode 100644 index 7c54cc0f..00000000 --- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch +++ /dev/null @@ -1,328 +0,0 @@ -From b1a00535b81c79595ef183036978c585518d59ab Mon Sep 17 00:00:00 2001 -From: Felix Kuehling <Felix.Kuehling@amd.com> -Date: Tue, 24 Jan 2017 20:41:45 -0500 -Subject: [PATCH 1580/4131] drm/amdgpu: Fix handling of userptrs around process - termination - -Delayed workers can update userptr BOs during or shortly after -process termination. That means task_structs and mm_struct may -be invalid when amdgpu_ttm_tt_get_user_pages is called. - -Instead of a task_struct pointer, store a struct pid reference and -use it to get a safe task reference (or NULL if the process has -already terminated). Return -ESRCH to indicate to the caller when -process termination is detected. - -Increment the reference counter of the mm_struct while the restore -delayed work is queued to ensure the mm_struct reference is valid -until the worker is finished. - -Change-Id: I7c4c7745bc9da281f30ad02355f5c70de0a52823 -Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> - - Conflicts: - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c ---- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 87 ++++++++++++++++++------ - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 +- - drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +- - drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 3 +- - drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 ++++++------ - 5 files changed, 98 insertions(+), 46 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -index 07150a6..05a627a 100755 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c -@@ -176,6 +176,43 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev) - return r; - } - -+/* Cancel any scheduled restore work or wait for it to finish. Must be -+ * called with the mem->lock held. First drop the mm reference. If the -+ * worker has already started, it will detect that mm was dropped and -+ * cancel itself. -+ * -+ * If the worker has already started, it needs to take the -+ * mem->lock. To prevent deadlocks, we need to briefly drop the lock -+ * while waiting. During that time someone else may schedule another -+ * restore. So repeat the process if necessary. -+ * -+ * mmput needs to be called without holding the lock to prevent -+ * circular lock dependencies. -+ */ -+static void cancel_restore_locked(struct kgd_mem *mem) -+{ -+ struct mm_struct *mm; -+ -+ while (mem->mm) { -+ mm = mem->mm; -+ mem->mm = NULL; -+ -+ mutex_unlock(&mem->lock); -+ -+ mmput(mm); -+ cancel_delayed_work_sync(&mem->work); -+ -+ mutex_lock(&mem->lock); -+ } -+} -+ -+void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem) -+{ -+ mutex_lock(&mem->lock); -+ cancel_restore_locked(mem); -+ mutex_unlock(&mem->lock); -+} -+ - int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem, - struct mm_struct *mm) - { -@@ -186,11 +223,12 @@ int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem, - - mutex_lock(&mem->lock); - -- if (mem->evicted == 1 && delayed_work_pending(&mem->work)) -- /* Cancelling a scheduled restoration */ -- cancel_delayed_work(&mem->work); -- - if (++mem->evicted > 1) { -+ /* Memory was already evicted. It may have been -+ * scheduled for restoration, but that restoration -+ * hasn't happened yet. When the worker starts it will -+ * know and abort. -+ */ - mutex_unlock(&mem->lock); - return 0; - } -@@ -223,14 +261,25 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work) - adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - mm = mem->mm; - -- /* Restoration may have been canceled by another eviction or -- * could already be done by a restore scheduled earlier */ -+ /* Check if restore was canceled */ -+ if (!mm) { -+ mutex_unlock(&mem->lock); -+ return; -+ } -+ -+ /* Only restore if no other eviction happened since restore -+ * was scheduled. -+ */ - if (mem->evicted == 1) { - amdgpu_amdkfd_gpuvm_restore_mem(mem, mm); - mem->evicted = 0; - } - -+ mem->mm = NULL; -+ - mutex_unlock(&mem->lock); -+ -+ mmput(mm); - } - - int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, -@@ -256,27 +305,23 @@ int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, - return 0; - } - -- /* mem->evicted is 1 after decrememting. Schedule -- * restoration. */ -- if (delayed_work_pending(&mem->work)) -- cancel_delayed_work(&mem->work); -- mem->mm = mm; -- INIT_DELAYED_WORK(&mem->work, -- amdgdu_amdkfd_restore_mem_worker); -- schedule_delayed_work(&mem->work, delay); -+ /* mem->evicted is 1 after decrementing. If a restoration was -+ * already scheduled, just let it do its job. Otherwise -+ * schedule another one. -+ */ -+ if (!mem->mm) { -+ mem->mm = mm; -+ atomic_inc(&mm->mm_users); -+ INIT_DELAYED_WORK(&mem->work, -+ amdgdu_amdkfd_restore_mem_worker); -+ schedule_delayed_work(&mem->work, delay); -+ } - - mutex_unlock(&mem->lock); - - return r; - } - --void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev, -- struct kgd_mem *mem) --{ -- if (delayed_work_pending(&mem->work)) -- cancel_delayed_work_sync(&mem->work); --} -- - int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, - uint32_t vmid, uint64_t gpu_addr, - uint32_t *ib_cmd, uint32_t ib_len) -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -index a9f877a..9ac3b6b 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h -@@ -128,8 +128,7 @@ int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev, - struct kgd_mem *mem, - struct mm_struct *mm, - unsigned long delay); --void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev, -- struct kgd_mem *mem); -+void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem); - int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, - uint32_t vmid, uint64_t gpu_addr, - uint32_t *ib_cmd, uint32_t ib_len); -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -index 034bf91..370daae 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c -@@ -1111,8 +1111,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - * be freed anyway */ - - amdgpu_mn_unregister(mem->bo); -- if (mem->work.work.func) -- cancel_delayed_work_sync(&mem->work); -+ amdgpu_amdkfd_cancel_restore_mem(mem); - - ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx); - if (unlikely(ret != 0)) -@@ -1923,6 +1922,9 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm) - have_pages = !ret; - if (!have_pages) { - unreserve_bo_and_vms(&ctx, false); -+ if (ret == -ESRCH) -+ /* process terminating, fail quiet and fast */ -+ return ret; - pr_err("get_user_pages failed. Probably userptr is freed. %d\n", - ret); - } -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c -index 40e431a..990e6fe 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c -@@ -89,8 +89,7 @@ static void amdgpu_mn_destroy(struct work_struct *work) - bo->mn = NULL; - list_del_init(&bo->mn_list); - if (rmn->type == AMDGPU_MN_TYPE_HSA) -- amdgpu_amdkfd_cancel_restore_mem( -- adev, bo->kfd_bo); -+ amdgpu_amdkfd_cancel_restore_mem(bo->kfd_bo); - } - kfree(node); - } -diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c -index b028f23..48e74d3 100644 ---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c -+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c -@@ -697,7 +697,8 @@ struct amdgpu_ttm_tt { - struct amdgpu_device *adev; - u64 offset; - uint64_t userptr; -- struct task_struct *usertask; -+ struct mm_struct *usermm; -+ struct pid *userpid; - uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; -@@ -709,30 +710,34 @@ struct amdgpu_ttm_tt { - int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) - { - struct amdgpu_ttm_tt *gtt = (void *)ttm; -- struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; -+ struct task_struct *usertask; - int r; - -- if (!mm) /* Happens during process shutdown */ -- return -ESRCH; -- - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; - -- down_read(&mm->mmap_sem); -- - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* check that we only use anonymous memory - to prevent problems with writeback */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; - -- vma = find_vma(mm, gtt->userptr); -- if (!vma || vma->vm_file || vma->vm_end < end) { -- up_read(&mm->mmap_sem); -+ vma = find_vma(gtt->usermm, gtt->userptr); -+ if (!vma || vma->vm_file || vma->vm_end < end) - return -EPERM; -- } -+ } -+ -+ if (!gtt->userpid) -+ return -EINVAL; -+ usertask = get_pid_task(gtt->userpid, PIDTYPE_PID); -+ if (!usertask) -+ return -ESRCH; -+ if (usertask->mm != gtt->usermm) { -+ /* Happens during process shutdown */ -+ put_task_struct(usertask); -+ return -ESRCH; - } - - do { -@@ -758,13 +763,13 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) - pinned += r; - - } while (pinned < ttm->num_pages); -- -- up_read(&mm->mmap_sem); -+ -+ put_task_struct(usertask); - return 0; - - release_pages: - release_pages(pages, pinned, 0); -- up_read(&mm->mmap_sem); -+ put_task_struct(usertask); - return r; - } - -@@ -1016,6 +1021,9 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) - static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) - { - struct amdgpu_ttm_tt *gtt = (void *)ttm; -+ -+ if (gtt->userpid) -+ put_pid(gtt->userpid); - - ttm_dma_tt_fini(>t->ttm); - kfree(gtt); -@@ -1118,6 +1126,8 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, - - gtt->userptr = addr; - gtt->usertask = current->group_leader; -+ gtt->usermm = current->mm; -+ gtt->userpid = get_task_pid(current->group_leader, PIDTYPE_PID); - gtt->userflags = flags; - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); -@@ -1133,11 +1143,8 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) - - if (gtt == NULL) - return NULL; -- -- if (gtt->usertask == NULL) -- return NULL; -- -- return gtt->usertask->mm; -+ -+ return gtt->usermm; - } - - bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, --- -2.7.4 - |