meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/0809-drm-scheduler-remove-timeout-work_struct-from-drm_sc.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

From 575034e093dce49b6c2f164cda1e6faaf7017a8c Mon Sep 17 00:00:00 2001
From: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Date: Wed, 26 Sep 2018 02:09:02 +0900
Subject: [PATCH 0809/2940] drm/scheduler: remove timeout work_struct from
 drm_sched_job (v3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

having a delayed work item per job is redundant as we only need one
per scheduler to track the time out the currently executing job.

v2: the first element of the ring mirror list is the currently
executing job so we don't need a additional variable for it

v3: squash in fixes for v3d and etnaviv

Signed-off-by: Nayan Deshmukh <nayan26deshmukh@gmail.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_sched.c |  2 +-
 drivers/gpu/drm/scheduler/sched_main.c  | 31 +++++++++++++------------
 drivers/gpu/drm/v3d/v3d_sched.c         |  2 +-
 include/drm/gpu_scheduler.h             |  6 ++---
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index e5a9fae31ab7..9b476368aa31 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -105,7 +105,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 	change = dma_addr - gpu->hangcheck_dma_addr;
 	if (change < 0 || change > 16) {
 		gpu->hangcheck_dma_addr = dma_addr;
-		schedule_delayed_work(&sched_job->work_tdr,
+		schedule_delayed_work(&sched_job->sched->work_tdr,
 				      sched_job->sched->timeout);
 		return;
 	}
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 2e416c3fd206..10403a232729 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -210,19 +210,15 @@ static void drm_sched_job_finish(struct work_struct *work)
 	 * manages to find this job as the next job in the list, the fence
 	 * signaled check below will prevent the timeout to be restarted.
 	 */
-	cancel_delayed_work_sync(&s_job->work_tdr);
+	cancel_delayed_work_sync(&sched->work_tdr);
 
 	spin_lock(&sched->job_list_lock);
-	/* queue TDR for next job */
-	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-	    !list_is_last(&s_job->node, &sched->ring_mirror_list)) {
-		struct drm_sched_job *next = list_next_entry(s_job, node);
-
-		if (!dma_fence_is_signaled(&next->s_fence->finished))
-			schedule_delayed_work(&next->work_tdr, sched->timeout);
-	}
 	/* remove job from ring_mirror_list */
 	list_del(&s_job->node);
+	/* queue TDR for next job */
+	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
+	    !list_empty(&sched->ring_mirror_list))
+		schedule_delayed_work(&sched->work_tdr, sched->timeout);
 	spin_unlock(&sched->job_list_lock);
 
 	dma_fence_put(&s_job->s_fence->finished);
@@ -249,16 +245,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
 	    list_first_entry_or_null(&sched->ring_mirror_list,
 				     struct drm_sched_job, node) == s_job)
-		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+		schedule_delayed_work(&sched->work_tdr, sched->timeout);
 	spin_unlock(&sched->job_list_lock);
 }
 
 static void drm_sched_job_timedout(struct work_struct *work)
 {
-	struct drm_sched_job *job = container_of(work, struct drm_sched_job,
-						 work_tdr.work);
+	struct drm_gpu_scheduler *sched;
+	struct drm_sched_job *job;
+
+	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
+	job = list_first_entry_or_null(&sched->ring_mirror_list,
+				       struct drm_sched_job, node);
 
-	job->sched->ops->timedout_job(job);
+	if (job)
+		job->sched->ops->timedout_job(job);
 }
 
 /**
@@ -328,7 +329,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
 					 struct drm_sched_job, node);
 	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
-		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+		schedule_delayed_work(&sched->work_tdr, sched->timeout);
 
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -400,7 +401,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
 
 	INIT_WORK(&job->finish_work, drm_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
-	INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);
 
 	return 0;
 }
@@ -591,6 +591,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	INIT_LIST_HEAD(&sched->ring_mirror_list);
 	spin_lock_init(&sched->job_list_lock);
 	atomic_set(&sched->hw_rq_count, 0);
+	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
 	atomic_set(&sched->num_jobs, 0);
 	atomic64_set(&sched->job_id_count, 0);
 
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index a5501581d96b..9243dea6e6ad 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -168,7 +168,7 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
 		job->timedout_ctca = ctca;
 		job->timedout_ctra = ctra;
 
-		schedule_delayed_work(&job->base.work_tdr,
+		schedule_delayed_work(&job->base.sched->work_tdr,
 				      job->base.sched->timeout);
 		return;
 	}
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index b0646520b971..8799458a4f23 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -172,8 +172,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  *               finished to remove the job from the
  *               @drm_gpu_scheduler.ring_mirror_list.
  * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
- * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout
- *            interval is over.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
  *         limit of the scheduler then the job is marked guilty and will not
@@ -192,7 +190,6 @@ struct drm_sched_job {
 	struct dma_fence_cb		finish_cb;
 	struct work_struct		finish_work;
 	struct list_head		node;
-	struct delayed_work		work_tdr;
 	uint64_t			id;
 	atomic_t			karma;
 	enum drm_sched_priority		s_priority;
@@ -256,6 +253,8 @@ struct drm_sched_backend_ops {
  *                 finished.
  * @hw_rq_count: the number of jobs currently in the hardware queue.
  * @job_id_count: used to assign unique id to the each job.
+ * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
+ *            timeout interval is over.
  * @thread: the kthread on which the scheduler which run.
  * @ring_mirror_list: the list of jobs which are currently in the job queue.
  * @job_list_lock: lock to protect the ring_mirror_list.
@@ -276,6 +275,7 @@ struct drm_gpu_scheduler {
 	wait_queue_head_t		job_scheduled;
 	atomic_t			hw_rq_count;
 	atomic64_t			job_id_count;
+	struct delayed_work		work_tdr;
 	struct task_struct		*thread;
 	struct list_head		ring_mirror_list;
 	spinlock_t			job_list_lock;
-- 
2.17.1