common/recipes-kernel/linux/files/0480-drm-amdgpu-rework-scheduler-submission-handling.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

From c746ba222363d2e0b0316b1c0bda21e2646b66f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 19 Aug 2015 16:12:15 +0200
Subject: [PATCH 0480/1050] drm/amdgpu: rework scheduler submission handling.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove active_hw_rq and it's protecting queue_lock, they are unused.

User 32bit atomic for hw_rq_count, 64bits for counting to three is a bit
overkill.

Cleanup the function name and remove incorrect comments.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
---
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 43 +++++++--------------------
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  5 +---
 2 files changed, 11 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index f8d46b0..981420e 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -94,25 +94,12 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
 }
 
 /**
- * Note: This function should only been called inside scheduler main
- * function for thread safety, there is no other protection here.
- * return ture if scheduler has something ready to run.
- *
- * For active_hw_rq, there is only one producer(scheduler thread) and
- * one consumer(ISR). It should be safe to use this function in scheduler
- * main thread to decide whether to continue emit more IBs.
-*/
-static bool is_scheduler_ready(struct amd_gpu_scheduler *sched)
+ * Return ture if we can push more jobs to the hw.
+ */
+static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
 {
-	unsigned long flags;
-	bool full;
-
-	spin_lock_irqsave(&sched->queue_lock, flags);
-	full = atomic64_read(&sched->hw_rq_count) <
-		sched->hw_submission_limit ? true : false;
-	spin_unlock_irqrestore(&sched->queue_lock, flags);
-
-	return full;
+	return atomic_read(&sched->hw_rq_count) <
+		sched->hw_submission_limit;
 }
 
 /**
@@ -124,7 +111,7 @@ select_context(struct amd_gpu_scheduler *sched)
 	struct amd_sched_entity *wake_entity = NULL;
 	struct amd_sched_entity *tmp;
 
-	if (!is_scheduler_ready(sched))
+	if (!amd_sched_ready(sched))
 		return NULL;
 
 	/* Kernel run queue has higher priority than normal run queue*/
@@ -293,14 +280,10 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 	struct amd_sched_job *sched_job =
 		container_of(cb, struct amd_sched_job, cb);
 	struct amd_gpu_scheduler *sched;
-	unsigned long flags;
 
 	sched = sched_job->sched;
 	amd_sched_fence_signal(sched_job->s_fence);
-	spin_lock_irqsave(&sched->queue_lock, flags);
-	list_del(&sched_job->list);
-	atomic64_dec(&sched->hw_rq_count);
-	spin_unlock_irqrestore(&sched->queue_lock, flags);
+	atomic_dec(&sched->hw_rq_count);
 	fence_put(&sched_job->s_fence->base);
 	sched->ops->process_job(sched, sched_job);
 	wake_up_interruptible(&sched->wait_queue);
@@ -320,7 +303,7 @@ static int amd_sched_main(void *param)
 		struct fence *fence;
 
 		wait_event_interruptible(sched->wait_queue,
-					 is_scheduler_ready(sched) &&
+					 amd_sched_ready(sched) &&
 					 (c_entity = select_context(sched)));
 		r = kfifo_out(&c_entity->job_queue, &job, sizeof(void *));
 		if (r != sizeof(void *))
@@ -329,11 +312,7 @@ static int amd_sched_main(void *param)
 		if (sched->ops->prepare_job)
 			r = sched->ops->prepare_job(sched, c_entity, job);
 		if (!r) {
-			unsigned long flags;
-			spin_lock_irqsave(&sched->queue_lock, flags);
-			list_add_tail(&job->list, &sched->active_hw_rq);
-			atomic64_inc(&sched->hw_rq_count);
-			spin_unlock_irqrestore(&sched->queue_lock, flags);
+			atomic_inc(&sched->hw_rq_count);
 		}
 		mutex_lock(&sched->sched_lock);
 		fence = sched->ops->run_job(sched, c_entity, job);
@@ -384,13 +363,11 @@ struct amd_gpu_scheduler *amd_sched_create(void *device,
 	sched->hw_submission_limit = hw_submission;
 	snprintf(name, sizeof(name), "gpu_sched[%d]", ring);
 	mutex_init(&sched->sched_lock);
-	spin_lock_init(&sched->queue_lock);
 	amd_sched_rq_init(&sched->sched_rq);
 	amd_sched_rq_init(&sched->kernel_rq);
 
 	init_waitqueue_head(&sched->wait_queue);
-	INIT_LIST_HEAD(&sched->active_hw_rq);
-	atomic64_set(&sched->hw_rq_count, 0);
+	atomic_set(&sched->hw_rq_count, 0);
 	/* Each scheduler will run on a seperate kernel thread */
 	sched->thread = kthread_create(amd_sched_main, sched, name);
 	if (sched->thread) {
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index d328e96..81c00da 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -74,7 +74,6 @@ struct amd_sched_fence {
 };
 
 struct amd_sched_job {
-	struct list_head		list;
 	struct fence_cb                 cb;
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
@@ -115,8 +114,7 @@ struct amd_gpu_scheduler {
 	struct task_struct		*thread;
 	struct amd_sched_rq		sched_rq;
 	struct amd_sched_rq		kernel_rq;
-	struct list_head		active_hw_rq;
-	atomic64_t			hw_rq_count;
+	atomic_t			hw_rq_count;
 	struct amd_sched_backend_ops	*ops;
 	uint32_t			ring_id;
 	uint32_t			granularity; /* in ms unit */
@@ -124,7 +122,6 @@ struct amd_gpu_scheduler {
 	wait_queue_head_t		wait_queue;
 	struct amd_sched_entity	*current_entity;
 	struct mutex			sched_lock;
-	spinlock_t			queue_lock;
 	uint32_t                        hw_submission_limit;
 };
 
-- 
1.9.1