common/recipes-kernel/linux/linux-yocto-4.19.8/1851-drm-scheduler-Add-flag-to-hint-the-release-of-guilty.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

From 854071a4ef27a0f5956e688dafef059099be04d0 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Thu, 18 Apr 2019 10:48:29 -0400
Subject: [PATCH 1851/2940] drm/scheduler: Add flag to hint the release of
 guilty job.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem:
Sched thread's cleanup function races against TO handler
and removes the guilty job from mirror list and we
have no way of differentiating if the job was removed from within the
TO handler or from the sched thread's clean-up function.

Fix:
Add a flag to scheduler to hint the TO handler that the guilty job needs
to be explicitly released.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Chaudhary Amit Kumar <Chaudharyamit.Kumar@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 9 +++++++--
 include/drm/gpu_scheduler.h            | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 30565d17e82e..ce614457b884 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -295,8 +295,10 @@ static void drm_sched_job_timedout(struct work_struct *work)
 	 * Guilty job did complete and hence needs to be manually removed
 	 * See drm_sched_stop doc.
 	 */
-	if (list_empty(&job->node))
+	if (sched->free_guilty) {
 		job->sched->ops->free_job(job);
+		sched->free_guilty = false;
+	}
 
 	spin_lock_irqsave(&sched->job_list_lock, flags);
 	drm_sched_start_timeout(sched);
@@ -397,10 +399,13 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 
                         /*
                          * We must keep bad job alive for later use during
-                         * recovery by some of the drivers
+                         * recovery by some of the drivers but leave a hint
+                         * that the guilty job must be released.
                          */
                         if (bad != s_job)
                                 sched->ops->free_job(s_job);
+			else
+                                sched->free_guilty = true;
 		}
 	}
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index ca71df7da495..4d877441145e 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -256,6 +256,7 @@ struct drm_sched_backend_ops {
  *              guilty and it will be considered for scheduling further.
  * @num_jobs: the number of jobs in queue in the scheduler
  * @ready: marks if the underlying HW is ready to work
+ * @free_guilty: A hit to time out handler to free the guilty job.
  *
  * One scheduler is implemented for each hardware ring.
  */
@@ -276,6 +277,7 @@ struct drm_gpu_scheduler {
 	int				hang_limit;
 	atomic_t                        num_jobs;
 	bool			ready;
+	bool 				free_guilty;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,
-- 
2.17.1