aboutsummaryrefslogtreecommitdiffstats
path: root/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch')
-rw-r--r--meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch156
1 files changed, 156 insertions, 0 deletions
diff --git a/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch
new file mode 100644
index 00000000..0f653977
--- /dev/null
+++ b/meta-v1000/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch
@@ -0,0 +1,156 @@
+From 4e615881865564e540f40efb96021093bd42c7a3 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Tue, 26 Jul 2016 17:30:54 -0400
+Subject: [PATCH 1477/4131] drm/amdkfd: Don't dereference kfd_process.mm
+
+The kfd_process doesn't own a reference to the mm_struct, so it can
+disappear without warning even while the kfd_process still exists.
+In fact, the delayed kfd_process teardown is triggered by an MMU
+notifier when the mm_struct is destroyed. Permanently holding a
+reference to the mm_struct would prevent this from happening.
+
+Therefore, avoid dereferencing the kfd_process.mm pointer and make
+it opaque. Use other ways to access the mm:
+ * In process context, use current->mm
+ * In calls that know the mm, use it directly
+ * Otherwise use get_task_mm to get a reference
+
+Change-Id: Idcea859d0eaa6d62978b3a8ee54d83cbcfc0d7cd
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 ++++++++-
+ drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++--------
+ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 17 ++++++++++++++---
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 ++++++-
+ 4 files changed, 30 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index af3790f..0111510 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -708,9 +708,16 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm)
+
+ r = -ENODEV;
+ pdd = kfd_get_process_device_data(kfd, p);
+- if (pdd)
++ if (pdd) {
++ if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
++ down_read(&mm->mmap_sem);
++
+ r = process_restore_queues(kfd->dqm, &pdd->qpd);
+
++ if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
++ up_read(&mm->mmap_sem);
++ }
++
+ up_read(&p->lock);
+ return r;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 1506597..df9b3f3 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -409,7 +409,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+ BUG_ON(!dqm || !q || !q->mqd);
+
+ if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+- down_read(&q->process->mm->mmap_sem);
++ down_read(&current->mm->mmap_sem);
+ mutex_lock(&dqm->lock);
+
+ pdd = kfd_get_process_device_data(q->device, q->process);
+@@ -466,7 +466,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+ out_unlock:
+ mutex_unlock(&dqm->lock);
+ if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+- up_read(&q->process->mm->mmap_sem);
++ up_read(&current->mm->mmap_sem);
+
+ return retval;
+ }
+@@ -541,14 +541,10 @@ int process_restore_queues(struct device_queue_manager *dqm,
+ {
+ struct queue *q, *next;
+ struct mqd_manager *mqd;
+- struct kfd_process_device *pdd =
+- container_of(qpd, struct kfd_process_device, qpd);
+ int retval = 0;
+
+ BUG_ON(!dqm || !qpd);
+
+- if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+- down_read(&pdd->process->mm->mmap_sem);
+ mutex_lock(&dqm->lock);
+ if (qpd->evicted == 0) /* already restored, do nothing */
+ goto out_unlock;
+@@ -588,8 +584,6 @@ int process_restore_queues(struct device_queue_manager *dqm,
+
+ out_unlock:
+ mutex_unlock(&dqm->lock);
+- if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
+- up_read(&pdd->process->mm->mmap_sem);
+
+ return retval;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+index eb51873..5f7aa78 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+@@ -1027,14 +1027,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
+ * running so the lookup function returns a read-locked process.
+ */
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
++ struct mm_struct *mm;
+
+ if (!p)
+ return; /* Presumably process exited. */
+
++ /* Take a safe reference to the mm_struct, which may otherwise
++ * disappear even while the kfd_process is still referenced.
++ */
++ mm = get_task_mm(p->lead_thread);
++ if (!mm) {
++ up_read(&p->lock);
++ return; /* Process is exiting */
++ }
++
+ memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+
+- down_read(&p->mm->mmap_sem);
+- vma = find_vma(p->mm, address);
++ down_read(&mm->mmap_sem);
++ vma = find_vma(mm, address);
+
+ memory_exception_data.gpu_id = dev->id;
+ memory_exception_data.va = address;
+@@ -1060,7 +1070,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
+ }
+ }
+
+- up_read(&p->mm->mmap_sem);
++ up_read(&mm->mmap_sem);
++ mmdrop(mm);
+
+ mutex_lock(&p->event_mutex);
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index f540931..7576799 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -607,7 +607,12 @@ struct kfd_process {
+ */
+ struct hlist_node kfd_processes;
+
+- struct mm_struct *mm;
++ /*
++ * Opaque pointer to mm_struct. We don't hold a reference to
++ * it so it should never be dereferenced from here. This is
++ * only used for looking up processes by their mm.
++ */
++ void *mm;
+
+ struct kref ref;
+ struct work_struct release_work;
+--
+2.7.4
+