1 files changed, 269 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2060-drm-amdgpu-Fix-deadlock-during-GPU-reset.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2060-drm-amdgpu-Fix-deadlock-during-GPU-reset.patch
new file mode 100644
index 00000000..11bdd2c4
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2060-drm-amdgpu-Fix-deadlock-during-GPU-reset.patch
@@ -0,0 +1,269 @@
+From 049ba4298fb3a66fe8e313237bec1601f8df6f6d Mon Sep 17 00:00:00 2001
+From: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
+Date: Thu, 12 Oct 2017 16:46:26 -0400
+Subject: [PATCH 2060/4131] drm/amdgpu: Fix deadlock during GPU reset.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Bug:
+Kfifo is limited at size, during GPU reset it would fill up to limit
+and the pushing thread (producer) would wait for the scheduler worker to
+consume the items in the fifo while holding reservation lock
+on a BO. The gpu reset thread on the other hand blocks the scheduler
+during reset. Before it unblocks the sceduler it might want
+to recover VRAM and so will try to reserve the same BO the producer
+thread is already holding creating a deadlock.
+
+Fix:
+Switch from kfifo to SPSC queue which is unlimited in size.
+
+Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+
+ Conflicts:
+        drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+        drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+
+Change-Id: I7ea1927ca40435db424952cd9848920f4290f0f0
+---
+ drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h |  4 +-
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c   | 68 +++++++++----------------
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h   |  4 +-
+ 3 files changed, 29 insertions(+), 47 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+index 283a0dc..705380e 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
++++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+@@ -29,8 +29,8 @@ TRACE_EVENT(amd_sched_job,
+ 			   __entry->id = sched_job->id;
+ 			   __entry->fence = &sched_job->s_fence->finished;
+ 			   __entry->name = sched_job->sched->name;
+-			   __entry->job_count = kfifo_len(
+-				   &sched_job->s_entity->job_queue) / sizeof(sched_job);
++			   __entry->job_count = spsc_queue_count(
++				   &sched_job->s_entity->job_queue);
+ 			   __entry->hw_job_count = atomic_read(
+ 				   &sched_job->sched->hw_rq_count);
+ 			   ),
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+index fe4c4f66..9cbeade 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -28,9 +28,14 @@
+ #include <drm/drmP.h>
+ #include "gpu_scheduler.h"
+ 
++#include "spsc_queue.h"
++
+ #define CREATE_TRACE_POINTS
+ #include "gpu_sched_trace.h"
+ 
++#define to_amd_sched_job(sched_job)		\
++		container_of((sched_job), struct amd_sched_job, queue_node)
++
+ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
+ static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+ static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb);
+@@ -123,8 +128,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
+ 			  struct amd_sched_rq *rq,
+ 			  uint32_t jobs, atomic_t *guilty)
+ {
+-	int r;
+-
+ 	if (!(sched && entity && rq))
+ 		return -EINVAL;
+ 
+@@ -136,9 +139,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
+ 
+ 	spin_lock_init(&entity->rq_lock);
+ 	spin_lock_init(&entity->queue_lock);
+-	r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
+-	if (r)
+-		return r;
++	spsc_queue_init(&entity->job_queue);
+ 
+ 	atomic_set(&entity->fence_seq, 0);
+ 	entity->fence_context = dma_fence_context_alloc(2);
+@@ -171,7 +172,7 @@ static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
+ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
+ {
+ 	rmb();
+-	if (kfifo_is_empty(&entity->job_queue))
++	if (spsc_queue_peek(&entity->job_queue) == NULL)
+ 		return true;
+ 
+ 	return false;
+@@ -186,7 +187,7 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
+  */
+ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
+ {
+-	if (kfifo_is_empty(&entity->job_queue))
++	if (spsc_queue_peek(&entity->job_queue) == NULL)
+ 		return false;
+ 
+ 	if (ACCESS_ONCE(entity->dependency))
+@@ -228,16 +229,14 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
+ 		 */
+ 		kthread_park(sched->thread);
+ 		kthread_unpark(sched->thread);
+-		while (kfifo_out(&entity->job_queue, &job, sizeof(job))) {
++		while ((job = to_amd_sched_job(spsc_queue_pop(&entity->job_queue)))) {
+ 			struct amd_sched_fence *s_fence = job->s_fence;
+ 
+ 			amd_sched_fence_scheduled(s_fence);
+ 			dma_fence_set_error(&s_fence->finished, -ESRCH);
+ 			amd_sched_fence_finished(s_fence);
+ 		}
+-	
+ 	}
+-	kfifo_free(&entity->job_queue);
+ }
+ 
+ static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb)
+@@ -332,40 +331,41 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
+ }
+ 
+ static struct amd_sched_job *
+-amd_sched_entity_peek_job(struct amd_sched_entity *entity)
++amd_sched_entity_pop_job(struct amd_sched_entity *entity)
+ {
+ 	struct amd_gpu_scheduler *sched = entity->sched;
+-	struct amd_sched_job *sched_job;
++	struct amd_sched_job *sched_job = to_amd_sched_job(
++						spsc_queue_peek(&entity->job_queue));
+ 
+-	if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
++	if (!sched_job)
+ 		return NULL;
+ 
+ 	while ((entity->dependency = sched->ops->dependency(sched_job)))
+ 		if (amd_sched_entity_add_dependency_cb(entity))
+ 			return NULL;
+ 
++	sched_job->s_entity = NULL;
++	spsc_queue_pop(&entity->job_queue);
+ 	return sched_job;
+ }
+ 
+ /**
+- * Helper to submit a job to the job queue
++ * Submit a job to the job queue
+  *
+  * @sched_job		The pointer to job required to submit
+  *
+- * Returns true if we could submit the job.
++ * Returns 0 for success, negative error code otherwise.
+  */
+-static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
++void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
+ {
+ 	struct amd_gpu_scheduler *sched = sched_job->sched;
+ 	struct amd_sched_entity *entity = sched_job->s_entity;
+-	bool added, first = false;
++	bool first = false;
+ 
+-	spin_lock(&entity->queue_lock);
+-	added = kfifo_in(&entity->job_queue, &sched_job,
+-			sizeof(sched_job)) == sizeof(sched_job);
++	trace_amd_sched_job(sched_job);
+ 
+-	if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job))
+-		first = true;
++	spin_lock(&entity->queue_lock);
++	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node);
+ 
+ 	spin_unlock(&entity->queue_lock);
+ 
+@@ -377,7 +377,6 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
+ 		spin_unlock(&entity->rq_lock);
+ 		amd_sched_wakeup(sched);
+ 	}
+-	return added;
+ }
+ 
+ /* job_finish is called after hw fence signaled
+@@ -534,22 +533,6 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
+ 	spin_unlock(&sched->job_list_lock);
+ }
+ 
+-/**
+- * Submit a job to the job queue
+- *
+- * @sched_job		The pointer to job required to submit
+- *
+- * Returns 0 for success, negative error code otherwise.
+- */
+-void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
+-{
+-	struct amd_sched_entity *entity = sched_job->s_entity;
+-
+-	trace_amd_sched_job(sched_job);
+-	wait_event(entity->sched->job_scheduled,
+-		   amd_sched_entity_in(sched_job));
+-}
+-
+ /* init a sched_job with basic field */
+ int amd_sched_job_init(struct amd_sched_job *job,
+ 		       struct amd_gpu_scheduler *sched,
+@@ -640,7 +623,7 @@ static int amd_sched_main(void *param)
+ {
+ 	struct sched_param sparam = {.sched_priority = 1};
+ 	struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
+-	int r, count;
++	int r;
+ 
+ 	sched_setscheduler(current, SCHED_FIFO, &sparam);
+ 
+@@ -658,7 +641,7 @@ static int amd_sched_main(void *param)
+ 		if (!entity)
+ 			continue;
+ 
+-		sched_job = amd_sched_entity_peek_job(entity);
++		sched_job = amd_sched_entity_pop_job(entity);
+ 		if (!sched_job)
+ 			continue;
+ 
+@@ -685,9 +668,6 @@ static int amd_sched_main(void *param)
+ 			amd_sched_process_job(NULL, &s_fence->cb);
+ 		}
+ 
+-		count = kfifo_out(&entity->job_queue, &sched_job,
+-				sizeof(sched_job));
+-		WARN_ON(count != sizeof(sched_job));
+ 		wake_up(&sched->job_scheduled);
+ 	}
+ 	return 0;
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+index 64c887f..4fbbbc8 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+@@ -25,6 +25,7 @@
+ #define _GPU_SCHEDULER_H_
+ 
+ #include <linux/kfifo.h>
++#include "spsc_queue.h"
+ 
+ struct amd_gpu_scheduler;
+ struct amd_sched_rq;
+@@ -55,7 +56,7 @@ struct amd_sched_entity {
+ 	struct amd_gpu_scheduler	*sched;
+ 
+ 	spinlock_t			queue_lock;
+-	struct kfifo                    job_queue;
++	struct spsc_queue	job_queue;
+ 
+ 	atomic_t			fence_seq;
+ 	uint64_t                        fence_context;
+@@ -87,6 +88,7 @@ struct amd_sched_fence {
+ };
+ 
+ struct amd_sched_job {
++	struct spsc_node queue_node;
+ 	struct amd_gpu_scheduler        *sched;
+ 	struct amd_sched_entity         *s_entity;
+ 	struct amd_sched_fence          *s_fence;
+-- 
+2.7.4
+