aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch328
1 files changed, 0 insertions, 328 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch
deleted file mode 100644
index 7c54cc0f..00000000
--- a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1580-drm-amdgpu-Fix-handling-of-userptrs-around-process-t.patch
+++ /dev/null
@@ -1,328 +0,0 @@
-From b1a00535b81c79595ef183036978c585518d59ab Mon Sep 17 00:00:00 2001
-From: Felix Kuehling <Felix.Kuehling@amd.com>
-Date: Tue, 24 Jan 2017 20:41:45 -0500
-Subject: [PATCH 1580/4131] drm/amdgpu: Fix handling of userptrs around process
- termination
-
-Delayed workers can update userptr BOs during or shortly after
-process termination. That means task_structs and mm_struct may
-be invalid when amdgpu_ttm_tt_get_user_pages is called.
-
-Instead of a task_struct pointer, store a struct pid reference and
-use it to get a safe task reference (or NULL if the process has
-already terminated). Return -ESRCH to indicate to the caller when
-process termination is detected.
-
-Increment the reference counter of the mm_struct while the restore
-delayed work is queued to ensure the mm_struct reference is valid
-until the worker is finished.
-
-Change-Id: I7c4c7745bc9da281f30ad02355f5c70de0a52823
-Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
-
- Conflicts:
- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
----
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 87 ++++++++++++++++++------
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 3 +-
- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 45 ++++++------
- 5 files changed, 98 insertions(+), 46 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-index 07150a6..05a627a 100755
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
-@@ -176,6 +176,43 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
- return r;
- }
-
-+/* Cancel any scheduled restore work or wait for it to finish. Must be
-+ * called with the mem->lock held. First drop the mm reference. If the
-+ * worker has already started, it will detect that mm was dropped and
-+ * cancel itself.
-+ *
-+ * If the worker has already started, it needs to take the
-+ * mem->lock. To prevent deadlocks, we need to briefly drop the lock
-+ * while waiting. During that time someone else may schedule another
-+ * restore. So repeat the process if necessary.
-+ *
-+ * mmput needs to be called without holding the lock to prevent
-+ * circular lock dependencies.
-+ */
-+static void cancel_restore_locked(struct kgd_mem *mem)
-+{
-+ struct mm_struct *mm;
-+
-+ while (mem->mm) {
-+ mm = mem->mm;
-+ mem->mm = NULL;
-+
-+ mutex_unlock(&mem->lock);
-+
-+ mmput(mm);
-+ cancel_delayed_work_sync(&mem->work);
-+
-+ mutex_lock(&mem->lock);
-+ }
-+}
-+
-+void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem)
-+{
-+ mutex_lock(&mem->lock);
-+ cancel_restore_locked(mem);
-+ mutex_unlock(&mem->lock);
-+}
-+
- int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem,
- struct mm_struct *mm)
- {
-@@ -186,11 +223,12 @@ int amdgpu_amdkfd_evict_mem(struct amdgpu_device *adev, struct kgd_mem *mem,
-
- mutex_lock(&mem->lock);
-
-- if (mem->evicted == 1 && delayed_work_pending(&mem->work))
-- /* Cancelling a scheduled restoration */
-- cancel_delayed_work(&mem->work);
--
- if (++mem->evicted > 1) {
-+ /* Memory was already evicted. It may have been
-+ * scheduled for restoration, but that restoration
-+ * hasn't happened yet. When the worker starts it will
-+ * know and abort.
-+ */
- mutex_unlock(&mem->lock);
- return 0;
- }
-@@ -223,14 +261,25 @@ static void amdgdu_amdkfd_restore_mem_worker(struct work_struct *work)
- adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
- mm = mem->mm;
-
-- /* Restoration may have been canceled by another eviction or
-- * could already be done by a restore scheduled earlier */
-+ /* Check if restore was canceled */
-+ if (!mm) {
-+ mutex_unlock(&mem->lock);
-+ return;
-+ }
-+
-+ /* Only restore if no other eviction happened since restore
-+ * was scheduled.
-+ */
- if (mem->evicted == 1) {
- amdgpu_amdkfd_gpuvm_restore_mem(mem, mm);
- mem->evicted = 0;
- }
-
-+ mem->mm = NULL;
-+
- mutex_unlock(&mem->lock);
-+
-+ mmput(mm);
- }
-
- int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev,
-@@ -256,27 +305,23 @@ int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev,
- return 0;
- }
-
-- /* mem->evicted is 1 after decrememting. Schedule
-- * restoration. */
-- if (delayed_work_pending(&mem->work))
-- cancel_delayed_work(&mem->work);
-- mem->mm = mm;
-- INIT_DELAYED_WORK(&mem->work,
-- amdgdu_amdkfd_restore_mem_worker);
-- schedule_delayed_work(&mem->work, delay);
-+ /* mem->evicted is 1 after decrementing. If a restoration was
-+ * already scheduled, just let it do its job. Otherwise
-+ * schedule another one.
-+ */
-+ if (!mem->mm) {
-+ mem->mm = mm;
-+ atomic_inc(&mm->mm_users);
-+ INIT_DELAYED_WORK(&mem->work,
-+ amdgdu_amdkfd_restore_mem_worker);
-+ schedule_delayed_work(&mem->work, delay);
-+ }
-
- mutex_unlock(&mem->lock);
-
- return r;
- }
-
--void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev,
-- struct kgd_mem *mem)
--{
-- if (delayed_work_pending(&mem->work))
-- cancel_delayed_work_sync(&mem->work);
--}
--
- int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
- uint32_t vmid, uint64_t gpu_addr,
- uint32_t *ib_cmd, uint32_t ib_len)
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-index a9f877a..9ac3b6b 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
-@@ -128,8 +128,7 @@ int amdgpu_amdkfd_schedule_restore_mem(struct amdgpu_device *adev,
- struct kgd_mem *mem,
- struct mm_struct *mm,
- unsigned long delay);
--void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev,
-- struct kgd_mem *mem);
-+void amdgpu_amdkfd_cancel_restore_mem(struct kgd_mem *mem);
- int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
- uint32_t vmid, uint64_t gpu_addr,
- uint32_t *ib_cmd, uint32_t ib_len);
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-index 034bf91..370daae 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
-@@ -1111,8 +1111,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- * be freed anyway */
-
- amdgpu_mn_unregister(mem->bo);
-- if (mem->work.work.func)
-- cancel_delayed_work_sync(&mem->work);
-+ amdgpu_amdkfd_cancel_restore_mem(mem);
-
- ret = reserve_bo_and_cond_vms(mem, NULL, VA_DO_NOT_CARE, &ctx);
- if (unlikely(ret != 0))
-@@ -1923,6 +1922,9 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm)
- have_pages = !ret;
- if (!have_pages) {
- unreserve_bo_and_vms(&ctx, false);
-+ if (ret == -ESRCH)
-+ /* process terminating, fail quiet and fast */
-+ return ret;
- pr_err("get_user_pages failed. Probably userptr is freed. %d\n",
- ret);
- }
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
-index 40e431a..990e6fe 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
-@@ -89,8 +89,7 @@ static void amdgpu_mn_destroy(struct work_struct *work)
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
- if (rmn->type == AMDGPU_MN_TYPE_HSA)
-- amdgpu_amdkfd_cancel_restore_mem(
-- adev, bo->kfd_bo);
-+ amdgpu_amdkfd_cancel_restore_mem(bo->kfd_bo);
- }
- kfree(node);
- }
-diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-index b028f23..48e74d3 100644
---- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
-@@ -697,7 +697,8 @@ struct amdgpu_ttm_tt {
- struct amdgpu_device *adev;
- u64 offset;
- uint64_t userptr;
-- struct task_struct *usertask;
-+ struct mm_struct *usermm;
-+ struct pid *userpid;
- uint32_t userflags;
- spinlock_t guptasklock;
- struct list_head guptasks;
-@@ -709,30 +710,34 @@ struct amdgpu_ttm_tt {
- int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
- {
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
-- struct mm_struct *mm = gtt->usertask->mm;
- unsigned int flags = 0;
- unsigned pinned = 0;
-+ struct task_struct *usertask;
- int r;
-
-- if (!mm) /* Happens during process shutdown */
-- return -ESRCH;
--
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- flags |= FOLL_WRITE;
-
-- down_read(&mm->mmap_sem);
--
- if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /* check that we only use anonymous memory
- to prevent problems with writeback */
- unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
- struct vm_area_struct *vma;
-
-- vma = find_vma(mm, gtt->userptr);
-- if (!vma || vma->vm_file || vma->vm_end < end) {
-- up_read(&mm->mmap_sem);
-+ vma = find_vma(gtt->usermm, gtt->userptr);
-+ if (!vma || vma->vm_file || vma->vm_end < end)
- return -EPERM;
-- }
-+ }
-+
-+ if (!gtt->userpid)
-+ return -EINVAL;
-+ usertask = get_pid_task(gtt->userpid, PIDTYPE_PID);
-+ if (!usertask)
-+ return -ESRCH;
-+ if (usertask->mm != gtt->usermm) {
-+ /* Happens during process shutdown */
-+ put_task_struct(usertask);
-+ return -ESRCH;
- }
-
- do {
-@@ -758,13 +763,13 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
- pinned += r;
-
- } while (pinned < ttm->num_pages);
--
-- up_read(&mm->mmap_sem);
-+
-+ put_task_struct(usertask);
- return 0;
-
- release_pages:
- release_pages(pages, pinned, 0);
-- up_read(&mm->mmap_sem);
-+ put_task_struct(usertask);
- return r;
- }
-
-@@ -1016,6 +1021,9 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
- static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
- {
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
-+
-+ if (gtt->userpid)
-+ put_pid(gtt->userpid);
-
- ttm_dma_tt_fini(&gtt->ttm);
- kfree(gtt);
-@@ -1118,6 +1126,8 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
-
- gtt->userptr = addr;
- gtt->usertask = current->group_leader;
-+ gtt->usermm = current->mm;
-+ gtt->userpid = get_task_pid(current->group_leader, PIDTYPE_PID);
- gtt->userflags = flags;
- spin_lock_init(&gtt->guptasklock);
- INIT_LIST_HEAD(&gtt->guptasks);
-@@ -1133,11 +1143,8 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
-
- if (gtt == NULL)
- return NULL;
--
-- if (gtt->usertask == NULL)
-- return NULL;
--
-- return gtt->usertask->mm;
-+
-+ return gtt->usermm;
- }
-
- bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
---
-2.7.4
-