1 files changed, 164 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3408-drm-amdkfd-Make-kfd_process-reference-counted.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3408-drm-amdkfd-Make-kfd_process-reference-counted.patch
new file mode 100644
index 00000000..290d4532
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3408-drm-amdkfd-Make-kfd_process-reference-counted.patch
@@ -0,0 +1,164 @@
+From 22bc18ee39670854f9cb922ec84664400967d0ae Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Mon, 27 Nov 2017 18:29:51 -0500
+Subject: [PATCH 3408/4131] drm/amdkfd: Make kfd_process reference counted
+
+This will be used to elliminate the use of the process lock for
+preventing concurrent process destruction. This will simplify lock
+dependencies between KFD and KGD.
+
+This also simplifies the process destruction in a few ways:
+* Don't allocate work struct dynamically
+* Remove unnecessary hack that increments mm reference counter
+* Remove unnecessary process locking during destruction
+
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
+Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  4 +++
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 58 ++++++++++++--------------------
+ 2 files changed, 26 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index dca493b..248e4f5 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -34,6 +34,7 @@
+ #include <linux/idr.h>
+ #include <linux/kfifo.h>
+ #include <linux/seq_file.h>
++#include <linux/kref.h>
+ #include <kgd_kfd_interface.h>
+ 
+ #include "amd_shared.h"
+@@ -537,6 +538,9 @@ struct kfd_process {
+ 	 */
+ 	void *mm;
+ 
++	struct kref ref;
++	struct work_struct release_work;
++
+ 	struct mutex mutex;
+ 
+ 	/*
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 660d8bc..e02e8a2 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -48,11 +48,6 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu);
+ 
+ static struct workqueue_struct *kfd_process_wq;
+ 
+-struct kfd_process_release_work {
+-	struct work_struct kfd_work;
+-	struct kfd_process *p;
+-};
+-
+ static struct kfd_process *find_process(const struct task_struct *thread);
+ static struct kfd_process *create_process(const struct task_struct *thread);
+ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
+@@ -151,21 +146,20 @@ static struct kfd_process *find_process(const struct task_struct *thread)
+ 	return p;
+ }
+ 
++/* No process locking is needed in this function, because the process
++ * is not findable any more. We must assume that no other thread is
++ * using it any more, otherwise we couldn't safely free the process
++ * structure in the end.
++ */
+ static void kfd_process_wq_release(struct work_struct *work)
+ {
+-	struct kfd_process_release_work *my_work;
++	struct kfd_process *p = container_of(work, struct kfd_process,
++					     release_work);
+ 	struct kfd_process_device *pdd, *temp;
+-	struct kfd_process *p;
+-
+-	my_work = (struct kfd_process_release_work *) work;
+-
+-	p = my_work->p;
+ 
+ 	pr_debug("Releasing process (pasid %d) in workqueue\n",
+ 			p->pasid);
+ 
+-	mutex_lock(&p->mutex);
+-
+ 	list_for_each_entry_safe(pdd, temp, &p->per_device_data,
+ 							per_device_list) {
+ 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
+@@ -188,33 +182,26 @@ static void kfd_process_wq_release(struct work_struct *work)
+ 	kfd_pasid_free(p->pasid);
+ 	kfd_free_process_doorbells(p);
+ 
+-	mutex_unlock(&p->mutex);
+-
+ 	mutex_destroy(&p->mutex);
+ 
+ 	put_task_struct(p->lead_thread);
+ 
+ 	kfree(p);
+-
+-	kfree(work);
+ }
+ 
+-static void kfd_process_destroy_delayed(struct rcu_head *rcu)
++static void kfd_process_ref_release(struct kref *ref)
+ {
+-	struct kfd_process_release_work *work;
+-	struct kfd_process *p;
+-
+-	p = container_of(rcu, struct kfd_process, rcu);
++	struct kfd_process *p = container_of(ref, struct kfd_process, ref);
+ 
+-	mmdrop(p->mm);
++	INIT_WORK(&p->release_work, kfd_process_wq_release);
++	queue_work(kfd_process_wq, &p->release_work);
++}
+ 
+-	work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
++static void kfd_process_destroy_delayed(struct rcu_head *rcu)
++{
++	struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
+ 
+-	if (work) {
+-		INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
+-		work->p = p;
+-		queue_work(kfd_process_wq, (struct work_struct *) work);
+-	}
++	kref_put(&p->ref, kfd_process_ref_release);
+ }
+ 
+ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+@@ -258,15 +245,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
+ 	kfd_process_dequeue_from_all_devices(p);
+ 	pqm_uninit(&p->pqm);
+ 
++	/* Indicate to other users that MM is no longer valid */
++	p->mm = NULL;
++
+ 	mutex_unlock(&p->mutex);
+ 
+-	/*
+-	 * Because we drop mm_count inside kfd_process_destroy_delayed
+-	 * and because the mmu_notifier_unregister function also drop
+-	 * mm_count we need to take an extra count here.
+-	 */
+-	mmgrab(p->mm);
+-	mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
++	mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
+ 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
+ }
+ 
+@@ -331,6 +315,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
+ 	if (kfd_alloc_process_doorbells(process) < 0)
+ 		goto err_alloc_doorbells;
+ 
++	kref_init(&process->ref);
++
+ 	mutex_init(&process->mutex);
+ 
+ 	process->mm = thread->mm;
+-- 
+2.7.4
+