aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch
blob: 5dca95454a684f2b8280de5f380dbf4ab7f3b2dc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
From 1ba674acd69d4537e78b893d2de7cd9f24c7b146 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 14 Mar 2016 15:46:06 +0100
Subject: [PATCH 0404/1110] drm/amdgpu: switch back to 32bit hw fences v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We don't need to extend them to 64bits any more, so avoid the extra overhead.

v2: update commit message.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 49 ++++++++++++-------------------
 2 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 05a0ffb..f1b23e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -347,8 +347,8 @@ struct amdgpu_fence_driver {
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
 	/* sync_seq is protected by ring emission lock */
-	uint64_t			sync_seq;
-	atomic64_t			last_seq;
+	uint32_t			sync_seq;
+	atomic_t			last_seq;
 	bool				initialized;
 	struct amdgpu_irq_src		*irq_src;
 	unsigned			irq_type;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index da9a155..4303b44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -52,7 +52,6 @@ struct amdgpu_fence {
 
 	/* RB, DMA, etc. */
 	struct amdgpu_ring		*ring;
-	uint64_t			seq;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -104,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
 	if (drv->cpu_addr)
 		seq = le32_to_cpu(*drv->cpu_addr);
 	else
-		seq = lower_32_bits(atomic64_read(&drv->last_seq));
+		seq = atomic_read(&drv->last_seq);
 
 	return seq;
 }
@@ -123,23 +122,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_fence *fence;
 	struct fence **ptr;
-	unsigned idx;
+	uint32_t seq;
 
 	fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
 	if (fence == NULL)
 		return -ENOMEM;
 
-	fence->seq = ++ring->fence_drv.sync_seq;
+	seq = ++ring->fence_drv.sync_seq;
 	fence->ring = ring;
 	fence_init(&fence->base, &amdgpu_fence_ops,
 		   &ring->fence_drv.lock,
 		   adev->fence_context + ring->idx,
-		   fence->seq);
+		   seq);
 	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
-			       fence->seq, AMDGPU_FENCE_FLAG_INT);
+			       seq, AMDGPU_FENCE_FLAG_INT);
 
-	idx = fence->seq & ring->fence_drv.num_fences_mask;
-	ptr = &ring->fence_drv.fences[idx];
+	ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
 	/* This function can't be called concurrently anyway, otherwise
 	 * emitting the fence would mess up the hardware ring buffer.
 	 */
@@ -177,22 +175,16 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
 void amdgpu_fence_process(struct amdgpu_ring *ring)
 {
 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
-	uint64_t seq, last_seq, last_emitted;
+	uint32_t seq, last_seq;
 	int r;
 
 	do {
-		last_seq = atomic64_read(&ring->fence_drv.last_seq);
-		last_emitted = ring->fence_drv.sync_seq;
+		last_seq = atomic_read(&ring->fence_drv.last_seq);
 		seq = amdgpu_fence_read(ring);
-		seq |= last_seq & 0xffffffff00000000LL;
-		if (seq < last_seq) {
-			seq &= 0xffffffff;
-			seq |= last_emitted & 0xffffffff00000000LL;
-		}
 
-	} while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
+	} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
 
-	if (seq < last_emitted)
+	if (seq != ring->fence_drv.sync_seq)
 		amdgpu_fence_schedule_fallback(ring);
 
 	while (last_seq != seq) {
@@ -279,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 	 * but it's ok to report slightly wrong fence count here.
 	 */
 	amdgpu_fence_process(ring);
-	emitted = ring->fence_drv.sync_seq
-		- atomic64_read(&ring->fence_drv.last_seq);
-	/* to avoid 32bits warp around */
-	if (emitted > 0x10000000)
-		emitted = 0x10000000;
-
-	return (unsigned)emitted;
+	emitted = 0x100000000ull;
+	emitted -= atomic_read(&ring->fence_drv.last_seq);
+	emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
+	return lower_32_bits(emitted);
 }
 
 /**
@@ -317,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 		ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
 		ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
 	}
-	amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
+	amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
 	amdgpu_irq_get(adev, irq_src, irq_type);
 
 	ring->fence_drv.irq_src = irq_src;
@@ -353,7 +342,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 	ring->fence_drv.cpu_addr = NULL;
 	ring->fence_drv.gpu_addr = 0;
 	ring->fence_drv.sync_seq = 0;
-	atomic64_set(&ring->fence_drv.last_seq, 0);
+	atomic_set(&ring->fence_drv.last_seq, 0);
 	ring->fence_drv.initialized = false;
 
 	setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
@@ -621,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 		amdgpu_fence_process(ring);
 
 		seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
-		seq_printf(m, "Last signaled fence 0x%016llx\n",
-			   (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
-		seq_printf(m, "Last emitted        0x%016llx\n",
+		seq_printf(m, "Last signaled fence 0x%08x\n",
+			   atomic_read(&ring->fence_drv.last_seq));
+		seq_printf(m, "Last emitted        0x%08x\n",
 			   ring->fence_drv.sync_seq);
 	}
 	return 0;
-- 
2.7.4