diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4643-drm-scheduler-Avoid-accessing-freed-bad-job.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4643-drm-scheduler-Avoid-accessing-freed-bad-job.patch | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4643-drm-scheduler-Avoid-accessing-freed-bad-job.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4643-drm-scheduler-Avoid-accessing-freed-bad-job.patch new file mode 100644 index 00000000..a2a0a85d --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4643-drm-scheduler-Avoid-accessing-freed-bad-job.patch @@ -0,0 +1,97 @@ +From de6e01403013d5d7ec970b8b4eb4ca07cc736d19 Mon Sep 17 00:00:00 2001 +From: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Date: Mon, 25 Nov 2019 15:51:29 -0500 +Subject: [PATCH 4643/4736] drm/scheduler: Avoid accessing freed bad job. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Problem: +Due to a race between drm_sched_cleanup_jobs in sched thread and +drm_sched_job_timedout in timeout work there is a possiblity that +bad job was already freed while still being accessed from the +timeout thread. + +Fix: +Instead of just peeking at the bad job in the mirror list +remove it from the list under lock and then put it back later when +we are garanteed no race with main sched thread is possible which +is after the thread is parked. + +v2: Lock around processing ring_mirror_list in drm_sched_cleanup_jobs. + +v3: Rebase on top of drm-misc-next. v2 is not needed anymore as +drm_sched_get_cleanup_job already has a lock there. + +v4: Fix comments to relfect latest code in drm-misc. + +Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +Reviewed-by: Emily Deng <Emily.Deng@amd.com> +Tested-by: Emily Deng <Emily.Deng@amd.com> +Signed-off-by: Christian König <christian.koenig@amd.com> +Link: https://patchwork.freedesktop.org/patch/342356 +Signed-off-by: Rahul Kumar <rahul.kumar1@amd.com> +--- + drivers/gpu/drm/scheduler/sched_main.c | 27 ++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c +index 108bac88dedb..0ccce80513e5 100644 +--- a/drivers/gpu/drm/scheduler/sched_main.c ++++ b/drivers/gpu/drm/scheduler/sched_main.c +@@ -288,10 +288,21 @@ static void drm_sched_job_timedout(struct work_struct *work) + unsigned long flags; + + sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); ++ ++ /* Protects against concurrent deletion in drm_sched_get_cleanup_job */ ++ spin_lock_irqsave(&sched->job_list_lock, flags); + job = list_first_entry_or_null(&sched->ring_mirror_list, + struct drm_sched_job, node); + + if (job) { ++ /* ++ * Remove the bad job so it cannot be freed by concurrent ++ * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread ++ * is parked at which point it's safe. ++ */ ++ list_del_init(&job->node); ++ spin_unlock_irqrestore(&sched->job_list_lock, flags); ++ + job->sched->ops->timedout_job(job); + + /* +@@ -302,6 +313,8 @@ static void drm_sched_job_timedout(struct work_struct *work) + job->sched->ops->free_job(job); + sched->free_guilty = false; + } ++ } else { ++ spin_unlock_irqrestore(&sched->job_list_lock, flags); + } + + spin_lock_irqsave(&sched->job_list_lock, flags); +@@ -413,6 +426,20 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) + } + } + ++ /* ++ * Reinsert back the bad job here - now it's safe as ++ * drm_sched_get_cleanup_job cannot race against us and release the ++ * bad job at this point - we parked (waited for) any in progress ++ * (earlier) cleanups and drm_sched_get_cleanup_job will not be called ++ * now until the scheduler thread is unparked. ++ */ ++ if (bad && bad->sched == sched) ++ /* ++ * Add at the head of the queue to reflect it was the earliest ++ * job extracted. ++ */ ++ list_add(&bad->node, &sched->ring_mirror_list); ++ + /* + * Stop pending timer in flight as we rearm it in drm_sched_start. This + * avoids the pending timeout work in progress to fire right away after +-- +2.17.1 + |