diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch new file mode 100644 index 00000000..840bf54c --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1278-drm-sched-Rework-HW-fence-processing.patch @@ -0,0 +1,203 @@ +From e34563308836759fd758d2f8507207acac7cfaf8 Mon Sep 17 00:00:00 2001 +From: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Date: Wed, 5 Dec 2018 14:21:28 -0500 +Subject: [PATCH 1278/2940] drm/sched: Rework HW fence processing. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Expedite job deletion from ring mirror list to the HW fence signal +callback instead from finish_work, together with waiting for all +such fences to signal in drm_sched_stop we garantee that +already signaled job will not be processed twice. +Remove the sched finish fence callback and just submit finish_work +directly from the HW fence callback. + +v2: Fix comments. +v3: Attach hw fence cb to sched_job +v5: Rebase + +Suggested-by: Christian Koenig <Christian.Koenig@amd.com> +Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Chaudhary Amit Kumar <Chaudharyamit.Kumar@amd.com> +--- + drivers/gpu/drm/scheduler/sched_main.c | 56 +++++++++++++------------- + include/drm/gpu_scheduler.h | 6 +-- + 2 files changed, 30 insertions(+), 32 deletions(-) + +diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c +index 0b20d7180752..0dadc5798478 100644 +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -286,8 +286,6 @@ static void drm_sched_job_finish(struct work_struct *work) + cancel_delayed_work_sync(&sched->work_tdr); + + spin_lock_irqsave(&sched->job_list_lock, flags); +- /* remove job from ring_mirror_list */ +- list_del_init(&s_job->node); + /* queue TDR for next job */ + drm_sched_start_timeout(sched); + spin_unlock_irqrestore(&sched->job_list_lock, flags); +@@ -295,22 +293,11 @@ static void drm_sched_job_finish(struct work_struct *work) + sched->ops->free_job(s_job); + } + +-static void drm_sched_job_finish_cb(struct dma_fence *f, +- struct dma_fence_cb *cb) +-{ +- struct drm_sched_job *job = container_of(cb, struct drm_sched_job, +- finish_cb); +- schedule_work(&job->finish_work); +-} +- + static void drm_sched_job_begin(struct drm_sched_job *s_job) + { + struct drm_gpu_scheduler *sched = s_job->sched; + unsigned long flags; + +- dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, +- drm_sched_job_finish_cb); +- + spin_lock_irqsave(&sched->job_list_lock, flags); + list_add_tail(&s_job->node, &sched->ring_mirror_list); + drm_sched_start_timeout(sched); +@@ -407,7 +394,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched) + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { + if (s_job->s_fence->parent && + dma_fence_remove_callback(s_job->s_fence->parent, +- &s_job->s_fence->cb)) { ++ &s_job->cb)) { + dma_fence_put(s_job->s_fence->parent); + s_job->s_fence->parent = NULL; + atomic_dec(&sched->hw_rq_count); +@@ -435,29 +422,33 @@ EXPORT_SYMBOL(drm_sched_stop); + void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) + { + struct drm_sched_job *s_job, *tmp; +- unsigned long flags; + int r; + + if (!full_recovery) + goto unpark; + +- spin_lock_irqsave(&sched->job_list_lock, flags); ++ /* ++ * Locking the list is not required here as the sched thread is parked ++ * so no new jobs are being pushed in to HW and in drm_sched_stop we ++ * flushed all the jobs who were still in mirror list but who already ++ * signaled and removed them self from the list. Also concurrent ++ * GPU recovers can't run in parallel. ++ */ ++ + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { +- struct drm_sched_fence *s_fence = s_job->s_fence; + struct dma_fence *fence = s_job->s_fence->parent; + if (fence) { +- r = dma_fence_add_callback(fence, &s_fence->cb, ++ r = dma_fence_add_callback(fence, &s_job->cb, + drm_sched_process_job); + if (r == -ENOENT) +- drm_sched_process_job(fence, &s_fence->cb); ++ drm_sched_process_job(fence, &s_job->cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", + r); + } else +- drm_sched_process_job(NULL, &s_fence->cb); ++ drm_sched_process_job(NULL, &s_job->cb); + } + drm_sched_start_timeout(sched); +- spin_unlock_irqrestore(&sched->job_list_lock, flags); + + unpark: + kthread_unpark(sched->thread); +@@ -606,18 +597,27 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) + */ + static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) + { +- struct drm_sched_fence *s_fence = +- container_of(cb, struct drm_sched_fence, cb); ++ struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb); ++ struct drm_sched_fence *s_fence = s_job->s_fence; + struct drm_gpu_scheduler *sched = s_fence->sched; ++ unsigned long flags; ++ ++ cancel_delayed_work(&sched->work_tdr); + +- dma_fence_get(&s_fence->finished); + atomic_dec(&sched->hw_rq_count); + atomic_dec(&sched->num_jobs); ++ ++ spin_lock_irqsave(&sched->job_list_lock, flags); ++ /* remove job from ring_mirror_list */ ++ list_del_init(&s_job->node); ++ spin_unlock_irqrestore(&sched->job_list_lock, flags); ++ + drm_sched_fence_finished(s_fence); + + trace_drm_sched_process_job(s_fence); +- dma_fence_put(&s_fence->finished); + wake_up_interruptible(&sched->wake_up_worker); ++ ++ schedule_work(&s_job->finish_work); + } + + /** +@@ -680,10 +680,10 @@ static int drm_sched_main(void *param) + + if (fence) { + s_fence->parent = dma_fence_get(fence); +- r = dma_fence_add_callback(fence, &s_fence->cb, ++ r = dma_fence_add_callback(fence, &sched_job->cb, + drm_sched_process_job); + if (r == -ENOENT) +- drm_sched_process_job(fence, &s_fence->cb); ++ drm_sched_process_job(fence, &sched_job->cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", + r); +@@ -691,7 +691,7 @@ static int drm_sched_main(void *param) + } else { + if (s_fence->finished.error < 0) + drm_sched_expel_job_unlocked(s_job); +- drm_sched_process_job(NULL, &s_fence->cb); ++ drm_sched_process_job(NULL, &sched_job->cb); + } + + wake_up(&sched->job_scheduled); +diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h +index 046620acd6de..d1d7fe467787 100644 +--- a/include/drm/gpu_scheduler.h ++++ b/include/drm/gpu_scheduler.h +@@ -134,10 +134,6 @@ struct drm_sched_fence { + */ + struct dma_fence finished; + +- /** +- * @cb: the callback for the parent fence below. +- */ +- struct dma_fence_cb cb; + /** + * @parent: the fence returned by &drm_sched_backend_ops.run_job + * when scheduling the job on hardware. We signal the +@@ -178,6 +174,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); + * be scheduled further. + * @s_priority: the priority of the job. + * @entity: the entity to which this job belongs. ++ * @cb: the callback for the parent fence in s_fence. + * + * A job is created by the driver using drm_sched_job_init(), and + * should call drm_sched_entity_push_job() once it wants the scheduler +@@ -194,6 +191,7 @@ struct drm_sched_job { + atomic_t karma; + enum drm_sched_priority s_priority; + struct drm_sched_entity *entity; ++ struct dma_fence_cb cb; + }; + + static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, +-- +2.17.1 + |