diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch new file mode 100644 index 00000000..0f653977 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1477-drm-amdkfd-Don-t-dereference-kfd_process.mm.patch @@ -0,0 +1,156 @@ +From 4e615881865564e540f40efb96021093bd42c7a3 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Tue, 26 Jul 2016 17:30:54 -0400 +Subject: [PATCH 1477/4131] drm/amdkfd: Don't dereference kfd_process.mm + +The kfd_process doesn't own a reference to the mm_struct, so it can +disappear without warning even while the kfd_process still exists. +In fact, the delayed kfd_process teardown is triggered by an MMU +notifier when the mm_struct is destroyed. Permanently holding a +reference to the mm_struct would prevent this from happening. + +Therefore, avoid dereferencing the kfd_process.mm pointer and make +it opaque. Use other ways to access the mm: + * In process context, use current->mm + * In calls that know the mm, use it directly + * Otherwise use get_task_mm to get a reference + +Change-Id: Idcea859d0eaa6d62978b3a8ee54d83cbcfc0d7cd +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 9 ++++++++- + drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++-------- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 17 ++++++++++++++--- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 ++++++- + 4 files changed, 30 insertions(+), 13 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index af3790f..0111510 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -708,9 +708,16 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm) + + r = -ENODEV; + pdd = kfd_get_process_device_data(kfd, p); +- if (pdd) ++ if (pdd) { ++ if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) ++ down_read(&mm->mmap_sem); ++ + r = process_restore_queues(kfd->dqm, &pdd->qpd); + ++ if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) ++ up_read(&mm->mmap_sem); ++ } ++ + up_read(&p->lock); + return r; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index 1506597..df9b3f3 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -409,7 +409,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + BUG_ON(!dqm || !q || !q->mqd); + + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) +- down_read(&q->process->mm->mmap_sem); ++ down_read(¤t->mm->mmap_sem); + mutex_lock(&dqm->lock); + + pdd = kfd_get_process_device_data(q->device, q->process); +@@ -466,7 +466,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) + out_unlock: + mutex_unlock(&dqm->lock); + if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) +- up_read(&q->process->mm->mmap_sem); ++ up_read(¤t->mm->mmap_sem); + + return retval; + } +@@ -541,14 +541,10 @@ int process_restore_queues(struct device_queue_manager *dqm, + { + struct queue *q, *next; + struct mqd_manager *mqd; +- struct kfd_process_device *pdd = +- container_of(qpd, struct kfd_process_device, qpd); + int retval = 0; + + BUG_ON(!dqm || !qpd); + +- if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) +- down_read(&pdd->process->mm->mmap_sem); + mutex_lock(&dqm->lock); + if (qpd->evicted == 0) /* already restored, do nothing */ + goto out_unlock; +@@ -588,8 +584,6 @@ int process_restore_queues(struct device_queue_manager *dqm, + + out_unlock: + mutex_unlock(&dqm->lock); +- if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) +- up_read(&pdd->process->mm->mmap_sem); + + return retval; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index eb51873..5f7aa78 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -1027,14 +1027,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, + * running so the lookup function returns a read-locked process. + */ + struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); ++ struct mm_struct *mm; + + if (!p) + return; /* Presumably process exited. */ + ++ /* Take a safe reference to the mm_struct, which may otherwise ++ * disappear even while the kfd_process is still referenced. ++ */ ++ mm = get_task_mm(p->lead_thread); ++ if (!mm) { ++ up_read(&p->lock); ++ return; /* Process is exiting */ ++ } ++ + memset(&memory_exception_data, 0, sizeof(memory_exception_data)); + +- down_read(&p->mm->mmap_sem); +- vma = find_vma(p->mm, address); ++ down_read(&mm->mmap_sem); ++ vma = find_vma(mm, address); + + memory_exception_data.gpu_id = dev->id; + memory_exception_data.va = address; +@@ -1060,7 +1070,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, + } + } + +- up_read(&p->mm->mmap_sem); ++ up_read(&mm->mmap_sem); ++ mmdrop(mm); + + mutex_lock(&p->event_mutex); + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index f540931..7576799 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -607,7 +607,12 @@ struct kfd_process { + */ + struct hlist_node kfd_processes; + +- struct mm_struct *mm; ++ /* ++ * Opaque pointer to mm_struct. We don't hold a reference to ++ * it so it should never be dereferenced from here. This is ++ * only used for looking up processes by their mm. ++ */ ++ void *mm; + + struct kref ref; + struct work_struct release_work; +-- +2.7.4 + |