aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch203
1 files changed, 203 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch
new file mode 100644
index 00000000..840bf54c
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch
@@ -0,0 +1,203 @@
+From e34563308836759fd758d2f8507207acac7cfaf8 Mon Sep 17 00:00:00 2001
+From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Date: Wed, 5 Dec 2018 14:21:28 -0500
+Subject: [PATCH 1278/2940] drm/sched: Rework HW fence processing.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Expedite job deletion from ring mirror list to the HW fence signal
+callback instead from finish_work, together with waiting for all
+such fences to signal in drm_sched_stop we garantee that
+already signaled job will not be processed twice.
+Remove the sched finish fence callback and just submit finish_work
+directly from the HW fence callback.
+
+v2: Fix comments.
+v3: Attach hw fence cb to sched_job
+v5: Rebase
+
+Suggested-by: Christian Koenig <Christian.Koenig@amd.com>
+Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Chaudhary Amit Kumar <Chaudharyamit.Kumar@amd.com>
+---
+ drivers/gpu/drm/scheduler/sched_main.c | 56 +++++++++++++-------------
+ include/drm/gpu_scheduler.h | 6 +--
+ 2 files changed, 30 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
+index 0b20d7180752..0dadc5798478 100644
+--- a/drivers/gpu/drm/scheduler/sched_main.c
++++ b/drivers/gpu/drm/scheduler/sched_main.c
+@@ -286,8 +286,6 @@ static void drm_sched_job_finish(struct work_struct *work)
+ cancel_delayed_work_sync(&sched->work_tdr);
+
+ spin_lock_irqsave(&sched->job_list_lock, flags);
+- /* remove job from ring_mirror_list */
+- list_del_init(&s_job->node);
+ /* queue TDR for next job */
+ drm_sched_start_timeout(sched);
+ spin_unlock_irqrestore(&sched->job_list_lock, flags);
+@@ -295,22 +293,11 @@ static void drm_sched_job_finish(struct work_struct *work)
+ sched->ops->free_job(s_job);
+ }
+
+-static void drm_sched_job_finish_cb(struct dma_fence *f,
+- struct dma_fence_cb *cb)
+-{
+- struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
+- finish_cb);
+- schedule_work(&job->finish_work);
+-}
+-
+ static void drm_sched_job_begin(struct drm_sched_job *s_job)
+ {
+ struct drm_gpu_scheduler *sched = s_job->sched;
+ unsigned long flags;
+
+- dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
+- drm_sched_job_finish_cb);
+-
+ spin_lock_irqsave(&sched->job_list_lock, flags);
+ list_add_tail(&s_job->node, &sched->ring_mirror_list);
+ drm_sched_start_timeout(sched);
+@@ -407,7 +394,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched)
+ list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
+ if (s_job->s_fence->parent &&
+ dma_fence_remove_callback(s_job->s_fence->parent,
+- &s_job->s_fence->cb)) {
++ &s_job->cb)) {
+ dma_fence_put(s_job->s_fence->parent);
+ s_job->s_fence->parent = NULL;
+ atomic_dec(&sched->hw_rq_count);
+@@ -435,29 +422,33 @@ EXPORT_SYMBOL(drm_sched_stop);
+ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
+ {
+ struct drm_sched_job *s_job, *tmp;
+- unsigned long flags;
+ int r;
+
+ if (!full_recovery)
+ goto unpark;
+
+- spin_lock_irqsave(&sched->job_list_lock, flags);
++ /*
++ * Locking the list is not required here as the sched thread is parked
++ * so no new jobs are being pushed in to HW and in drm_sched_stop we
++ * flushed all the jobs who were still in mirror list but who already
++ * signaled and removed them self from the list. Also concurrent
++ * GPU recovers can't run in parallel.
++ */
++
+ list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
+- struct drm_sched_fence *s_fence = s_job->s_fence;
+ struct dma_fence *fence = s_job->s_fence->parent;
+ if (fence) {
+- r = dma_fence_add_callback(fence, &s_fence->cb,
++ r = dma_fence_add_callback(fence, &s_job->cb,
+ drm_sched_process_job);
+ if (r == -ENOENT)
+- drm_sched_process_job(fence, &s_fence->cb);
++ drm_sched_process_job(fence, &s_job->cb);
+ else if (r)
+ DRM_ERROR("fence add callback failed (%d)\n",
+ r);
+ } else
+- drm_sched_process_job(NULL, &s_fence->cb);
++ drm_sched_process_job(NULL, &s_job->cb);
+ }
+ drm_sched_start_timeout(sched);
+- spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
+ unpark:
+ kthread_unpark(sched->thread);
+@@ -606,18 +597,27 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched)
+ */
+ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
+ {
+- struct drm_sched_fence *s_fence =
+- container_of(cb, struct drm_sched_fence, cb);
++ struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
++ struct drm_sched_fence *s_fence = s_job->s_fence;
+ struct drm_gpu_scheduler *sched = s_fence->sched;
++ unsigned long flags;
++
++ cancel_delayed_work(&sched->work_tdr);
+
+- dma_fence_get(&s_fence->finished);
+ atomic_dec(&sched->hw_rq_count);
+ atomic_dec(&sched->num_jobs);
++
++ spin_lock_irqsave(&sched->job_list_lock, flags);
++ /* remove job from ring_mirror_list */
++ list_del_init(&s_job->node);
++ spin_unlock_irqrestore(&sched->job_list_lock, flags);
++
+ drm_sched_fence_finished(s_fence);
+
+ trace_drm_sched_process_job(s_fence);
+- dma_fence_put(&s_fence->finished);
+ wake_up_interruptible(&sched->wake_up_worker);
++
++ schedule_work(&s_job->finish_work);
+ }
+
+ /**
+@@ -680,10 +680,10 @@ static int drm_sched_main(void *param)
+
+ if (fence) {
+ s_fence->parent = dma_fence_get(fence);
+- r = dma_fence_add_callback(fence, &s_fence->cb,
++ r = dma_fence_add_callback(fence, &sched_job->cb,
+ drm_sched_process_job);
+ if (r == -ENOENT)
+- drm_sched_process_job(fence, &s_fence->cb);
++ drm_sched_process_job(fence, &sched_job->cb);
+ else if (r)
+ DRM_ERROR("fence add callback failed (%d)\n",
+ r);
+@@ -691,7 +691,7 @@ static int drm_sched_main(void *param)
+ } else {
+ if (s_fence->finished.error < 0)
+ drm_sched_expel_job_unlocked(s_job);
+- drm_sched_process_job(NULL, &s_fence->cb);
++ drm_sched_process_job(NULL, &sched_job->cb);
+ }
+
+ wake_up(&sched->job_scheduled);
+diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
+index 046620acd6de..d1d7fe467787 100644
+--- a/include/drm/gpu_scheduler.h
++++ b/include/drm/gpu_scheduler.h
+@@ -134,10 +134,6 @@ struct drm_sched_fence {
+ */
+ struct dma_fence finished;
+
+- /**
+- * @cb: the callback for the parent fence below.
+- */
+- struct dma_fence_cb cb;
+ /**
+ * @parent: the fence returned by &drm_sched_backend_ops.run_job
+ * when scheduling the job on hardware. We signal the
+@@ -178,6 +174,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
+ * be scheduled further.
+ * @s_priority: the priority of the job.
+ * @entity: the entity to which this job belongs.
++ * @cb: the callback for the parent fence in s_fence.
+ *
+ * A job is created by the driver using drm_sched_job_init(), and
+ * should call drm_sched_entity_push_job() once it wants the scheduler
+@@ -194,6 +191,7 @@ struct drm_sched_job {
+ atomic_t karma;
+ enum drm_sched_priority s_priority;
+ struct drm_sched_entity *entity;
++ struct dma_fence_cb cb;
+ };
+
+ static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
+--
+2.17.1
+