1 files changed, 153 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0480-drm-amdgpu-rework-scheduler-submission-handling.patch b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0480-drm-amdgpu-rework-scheduler-submission-handling.patch
new file mode 100644
index 00000000..5577872f
--- /dev/null
+++ b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0480-drm-amdgpu-rework-scheduler-submission-handling.patch
@@ -0,0 +1,153 @@
+From c746ba222363d2e0b0316b1c0bda21e2646b66f2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Wed, 19 Aug 2015 16:12:15 +0200
+Subject: [PATCH 0480/1050] drm/amdgpu: rework scheduler submission handling.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Remove active_hw_rq and it's protecting queue_lock, they are unused.
+
+User 32bit atomic for hw_rq_count, 64bits for counting to three is a bit
+overkill.
+
+Cleanup the function name and remove incorrect comments.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
+---
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 43 +++++++--------------------
+ drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  5 +---
+ 2 files changed, 11 insertions(+), 37 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+index f8d46b0..981420e 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+@@ -94,25 +94,12 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
+ }
+ 
+ /**
+- * Note: This function should only been called inside scheduler main
+- * function for thread safety, there is no other protection here.
+- * return ture if scheduler has something ready to run.
+- *
+- * For active_hw_rq, there is only one producer(scheduler thread) and
+- * one consumer(ISR). It should be safe to use this function in scheduler
+- * main thread to decide whether to continue emit more IBs.
+-*/
+-static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
++ * Return ture if we can push more jobs to the hw.
++ */
++static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
+ {
+-	unsigned long flags;
+-	bool full;
+-
+-	spin_lock_irqsave(&sched->queue_lock, flags);
+-	full = atomic64_read(&sched->hw_rq_count) <
+-		sched->hw_submission_limit ? true : false;
+-	spin_unlock_irqrestore(&sched->queue_lock, flags);
+-
+-	return full;
++	return atomic_read(&sched->hw_rq_count) <
++		sched->hw_submission_limit;
+ }
+ 
+ /**
+@@ -124,7 +111,7 @@ select_context(struct amd_gpu_scheduler *sched)
+ 	struct amd_sched_entity *wake_entity = NULL;
+ 	struct amd_sched_entity *tmp;
+ 
+-	if (!is_scheduler_ready(sched))
++	if (!amd_sched_ready(sched))
+ 		return NULL;
+ 
+ 	/* Kernel run queue has higher priority than normal run queue*/
+@@ -293,14 +280,10 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
+ 	struct amd_sched_job *sched_job =
+ 		container_of(cb, struct amd_sched_job, cb);
+ 	struct amd_gpu_scheduler *sched;
+-	unsigned long flags;
+ 
+ 	sched = sched_job->sched;
+ 	amd_sched_fence_signal(sched_job->s_fence);
+-	spin_lock_irqsave(&sched->queue_lock, flags);
+-	list_del(&sched_job->list);
+-	atomic64_dec(&sched->hw_rq_count);
+-	spin_unlock_irqrestore(&sched->queue_lock, flags);
++	atomic_dec(&sched->hw_rq_count);
+ 	fence_put(&sched_job->s_fence->base);
+ 	sched->ops->process_job(sched, sched_job);
+ 	wake_up_interruptible(&sched->wait_queue);
+@@ -320,7 +303,7 @@ static int amd_sched_main(void *param)
+ 		struct fence *fence;
+ 
+ 		wait_event_interruptible(sched->wait_queue,
+-					 is_scheduler_ready(sched) &&
++					 amd_sched_ready(sched) &&
+ 					 (c_entity = select_context(sched)));
+ 		r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
+ 		if (r != sizeof(void *))
+@@ -329,11 +312,7 @@ static int amd_sched_main(void *param)
+ 		if (sched->ops->prepare_job)
+ 			r = sched->ops->prepare_job(sched, c_entity, job);
+ 		if (!r) {
+-			unsigned long flags;
+-			spin_lock_irqsave(&sched->queue_lock, flags);
+-			list_add_tail(&job->list, &sched->active_hw_rq);
+-			atomic64_inc(&sched->hw_rq_count);
+-			spin_unlock_irqrestore(&sched->queue_lock, flags);
++			atomic_inc(&sched->hw_rq_count);
+ 		}
+ 		mutex_lock(&sched->sched_lock);
+ 		fence = sched->ops->run_job(sched, c_entity, job);
+@@ -384,13 +363,11 @@ struct amd_gpu_scheduler *amd_sched_create(void *device,
+ 	sched->hw_submission_limit = hw_submission;
+ 	snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
+ 	mutex_init(&sched->sched_lock);
+-	spin_lock_init(&sched->queue_lock);
+ 	amd_sched_rq_init(&sched->sched_rq);
+ 	amd_sched_rq_init(&sched->kernel_rq);
+ 
+ 	init_waitqueue_head(&sched->wait_queue);
+-	INIT_LIST_HEAD(&sched->active_hw_rq);
+-	atomic64_set(&sched->hw_rq_count, 0);
++	atomic_set(&sched->hw_rq_count, 0);
+ 	/* Each scheduler will run on a seperate kernel thread */
+ 	sched->thread = kthread_create(amd_sched_main, sched, name);
+ 	if (sched->thread) {
+diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+index d328e96..81c00da 100644
+--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
++++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+@@ -74,7 +74,6 @@ struct amd_sched_fence {
+ };
+ 
+ struct amd_sched_job {
+-	struct list_head		list;
+ 	struct fence_cb                 cb;
+ 	struct amd_gpu_scheduler        *sched;
+ 	struct amd_sched_entity         *s_entity;
+@@ -115,8 +114,7 @@ struct amd_gpu_scheduler {
+ 	struct task_struct		*thread;
+ 	struct amd_sched_rq		sched_rq;
+ 	struct amd_sched_rq		kernel_rq;
+-	struct list_head		active_hw_rq;
+-	atomic64_t			hw_rq_count;
++	atomic_t			hw_rq_count;
+ 	struct amd_sched_backend_ops	*ops;
+ 	uint32_t			ring_id;
+ 	uint32_t			granularity; /* in ms unit */
+@@ -124,7 +122,6 @@ struct amd_gpu_scheduler {
+ 	wait_queue_head_t		wait_queue;
+ 	struct amd_sched_entity	*current_entity;
+ 	struct mutex			sched_lock;
+-	spinlock_t			queue_lock;
+ 	uint32_t                        hw_submission_limit;
+ };
+ 
+-- 
+1.9.1
+