aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0401-drm-amdgpu-signal-fences-directly-in-amdgpu_fence_pr.patch
blob: 1c7c11588ea43e12890f81079c913a43465b0b05 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
From b7071d88be433418c213ae57acff2e3ee9fe93d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 14 Mar 2016 14:29:46 +0100
Subject: [PATCH 0401/1110] drm/amdgpu: signal fences directly in
 amdgpu_fence_process
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because of the scheduler we need to signal all fences immediately
anyway, so try to avoid the waitqueue overhead.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 97 ++++++++++---------------------
 2 files changed, 31 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 43c948d..05a0ffb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -353,8 +353,8 @@ struct amdgpu_fence_driver {
 	struct amdgpu_irq_src		*irq_src;
 	unsigned			irq_type;
 	struct timer_list		fallback_timer;
-	wait_queue_head_t		fence_queue;
 	unsigned			num_fences_mask;
+	spinlock_t			lock;
 	struct fence			**fences;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d5bdd96..c5980c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -53,8 +53,6 @@ struct amdgpu_fence {
 	/* RB, DMA, etc. */
 	struct amdgpu_ring		*ring;
 	uint64_t			seq;
-
-	wait_queue_t			fence_wake;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -124,7 +122,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_fence *fence;
-	struct fence *old, **ptr;
+	struct fence **ptr;
 	unsigned idx;
 
 	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
@@ -134,7 +132,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 	fence->seq = ++ring->fence_drv.sync_seq;
 	fence->ring = ring;
 	fence_init(&fence->base, &amdgpu_fence_ops,
-		   &ring->fence_drv.fence_queue.lock,
+		   &ring->fence_drv.lock,
 		   adev->fence_context + ring->idx,
 		   fence->seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
@@ -145,13 +143,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 	/* This function can't be called concurrently anyway, otherwise
 	 * emitting the fence would mess up the hardware ring buffer.
 	 */
-	old = rcu_dereference_protected(*ptr, 1);
+	BUG_ON(rcu_dereference_protected(*ptr, 1));
 
 	rcu_assign_pointer(*ptr, fence_get(&fence->base));
 
-	BUG_ON(old && !fence_is_signaled(old));
-	fence_put(old);
-
 	*f = &fence->base;
 
 	return 0;
@@ -181,11 +176,12 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
  */
 void amdgpu_fence_process(struct amdgpu_ring *ring)
 {
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
 	uint64_t seq, last_seq, last_emitted;
-	bool wake = false;
+	int r;
 
-	last_seq = atomic64_read(&ring->fence_drv.last_seq);
 	do {
+		last_seq = atomic64_read(&ring->fence_drv.last_seq);
 		last_emitted = ring->fence_drv.sync_seq;
 		seq = amdgpu_fence_read(ring);
 		seq |= last_seq & 0xffffffff00000000LL;
@@ -195,22 +191,32 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 		}
 
 		if (seq <= last_seq || seq > last_emitted)
-			break;
+			return;
 
-		/* If we loop over we don't want to return without
-		 * checking if a fence is signaled as it means that the
-		 * seq we just read is different from the previous on.
-		 */
-		wake = true;
-		last_seq = seq;
-
-	} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
+	} while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
 
 	if (seq < last_emitted)
 		amdgpu_fence_schedule_fallback(ring);
 
-	if (wake)
-		wake_up_all(&ring->fence_drv.fence_queue);
+	while (last_seq != seq) {
+		struct fence *fence, **ptr;
+
+		ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+
+		/* There is always exactly one thread signaling this fence slot */
+		fence = rcu_dereference_protected(*ptr, 1);
+		rcu_assign_pointer(*ptr, NULL);
+
+		BUG_ON(!fence);
+
+		r = fence_signal(fence);
+		if (!r)
+			FENCE_TRACE(fence, "signaled from irq context\n");
+		else
+			BUG();
+
+		fence_put(fence);
+	}
 }
 
 /**
@@ -356,8 +362,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 	setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
 		    (unsigned long)ring);
 
-	init_waitqueue_head(&ring->fence_drv.fence_queue);
 	ring->fence_drv.num_fences_mask = num_hw_submission - 1;
+	spin_lock_init(&ring->fence_drv.lock);
 	ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
 					 GFP_KERNEL);
 	if (!ring->fence_drv.fences)
@@ -436,7 +442,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
 			/* no need to trigger GPU reset as we are unloading */
 			amdgpu_fence_driver_force_completion(adev);
 		}
-		wake_up_all(&ring->fence_drv.fence_queue);
 		amdgpu_irq_put(adev, ring->fence_drv.irq_src,
 			       ring->fence_drv.irq_type);
 		amd_sched_fini(&ring->sched);
@@ -569,42 +574,6 @@ static bool amdgpu_fence_is_signaled(struct fence *f)
 }
 
 /**
- * amdgpu_fence_check_signaled - callback from fence_queue
- *
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
- */
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
-{
-	struct amdgpu_fence *fence;
-	struct amdgpu_device *adev;
-	u64 seq;
-	int ret;
-
-	fence = container_of(wait, struct amdgpu_fence, fence_wake);
-	adev = fence->ring->adev;
-
-	/*
-	 * We cannot use amdgpu_fence_process here because we're already
-	 * in the waitqueue, in a call from wake_up_all.
-	 */
-	seq = atomic64_read(&fence->ring->fence_drv.last_seq);
-	if (seq >= fence->seq) {
-		ret = fence_signal_locked(&fence->base);
-		if (!ret)
-			FENCE_TRACE(&fence->base, "signaled from irq context\n");
-		else
-			FENCE_TRACE(&fence->base, "was already signaled\n");
-
-		__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
-		fence_put(&fence->base);
-	} else
-		FENCE_TRACE(&fence->base, "pending\n");
-	return 0;
-}
-
-/**
  * amdgpu_fence_enable_signaling - enable signalling on fence
  * @fence: fence
  *
@@ -617,17 +586,11 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
 	struct amdgpu_fence *fence = to_amdgpu_fence(f);
 	struct amdgpu_ring *ring = fence->ring;
 
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return false;
-
-	fence->fence_wake.flags = 0;
-	fence->fence_wake.private = NULL;
-	fence->fence_wake.func = amdgpu_fence_check_signaled;
-	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
-	fence_get(f);
 	if (!timer_pending(&ring->fence_drv.fallback_timer))
 		amdgpu_fence_schedule_fallback(ring);
+
 	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+
 	return true;
 }
 
-- 
2.7.4