aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch')
-rw-r--r--common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch164
1 files changed, 164 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch b/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch
new file mode 100644
index 00000000..5dca9545
--- /dev/null
+++ b/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch
@@ -0,0 +1,164 @@
+From 1ba674acd69d4537e78b893d2de7cd9f24c7b146 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Mon, 14 Mar 2016 15:46:06 +0100
+Subject: [PATCH 0404/1110] drm/amdgpu: switch back to 32bit hw fences v2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We don't need to extend them to 64bits any more, so avoid the extra overhead.
+
+v2: update commit message.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +--
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 49 ++++++++++++-------------------
+ 2 files changed, 21 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 05a0ffb..f1b23e5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -347,8 +347,8 @@ struct amdgpu_fence_driver {
+ uint64_t gpu_addr;
+ volatile uint32_t *cpu_addr;
+ /* sync_seq is protected by ring emission lock */
+- uint64_t sync_seq;
+- atomic64_t last_seq;
++ uint32_t sync_seq;
++ atomic_t last_seq;
+ bool initialized;
+ struct amdgpu_irq_src *irq_src;
+ unsigned irq_type;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index da9a155..4303b44 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -52,7 +52,6 @@ struct amdgpu_fence {
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+- uint64_t seq;
+ };
+
+ static struct kmem_cache *amdgpu_fence_slab;
+@@ -104,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
+ if (drv->cpu_addr)
+ seq = le32_to_cpu(*drv->cpu_addr);
+ else
+- seq = lower_32_bits(atomic64_read(&drv->last_seq));
++ seq = atomic_read(&drv->last_seq);
+
+ return seq;
+ }
+@@ -123,23 +122,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_fence *fence;
+ struct fence **ptr;
+- unsigned idx;
++ uint32_t seq;
+
+ fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+ if (fence == NULL)
+ return -ENOMEM;
+
+- fence->seq = ++ring->fence_drv.sync_seq;
++ seq = ++ring->fence_drv.sync_seq;
+ fence->ring = ring;
+ fence_init(&fence->base, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx,
+- fence->seq);
++ seq);
+ amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+- fence->seq, AMDGPU_FENCE_FLAG_INT);
++ seq, AMDGPU_FENCE_FLAG_INT);
+
+- idx = fence->seq & ring->fence_drv.num_fences_mask;
+- ptr = &ring->fence_drv.fences[idx];
++ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+@@ -177,22 +175,16 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
+ void amdgpu_fence_process(struct amdgpu_ring *ring)
+ {
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+- uint64_t seq, last_seq, last_emitted;
++ uint32_t seq, last_seq;
+ int r;
+
+ do {
+- last_seq = atomic64_read(&ring->fence_drv.last_seq);
+- last_emitted = ring->fence_drv.sync_seq;
++ last_seq = atomic_read(&ring->fence_drv.last_seq);
+ seq = amdgpu_fence_read(ring);
+- seq |= last_seq & 0xffffffff00000000LL;
+- if (seq < last_seq) {
+- seq &= 0xffffffff;
+- seq |= last_emitted & 0xffffffff00000000LL;
+- }
+
+- } while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
++ } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
+
+- if (seq < last_emitted)
++ if (seq != ring->fence_drv.sync_seq)
+ amdgpu_fence_schedule_fallback(ring);
+
+ while (last_seq != seq) {
+@@ -279,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
+ * but it's ok to report slightly wrong fence count here.
+ */
+ amdgpu_fence_process(ring);
+- emitted = ring->fence_drv.sync_seq
+- - atomic64_read(&ring->fence_drv.last_seq);
+- /* to avoid 32bits warp around */
+- if (emitted > 0x10000000)
+- emitted = 0x10000000;
+-
+- return (unsigned)emitted;
++ emitted = 0x100000000ull;
++ emitted -= atomic_read(&ring->fence_drv.last_seq);
++ emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
++ return lower_32_bits(emitted);
+ }
+
+ /**
+@@ -317,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
+ ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
+ ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
+ }
+- amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
++ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
+ amdgpu_irq_get(adev, irq_src, irq_type);
+
+ ring->fence_drv.irq_src = irq_src;
+@@ -353,7 +342,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ ring->fence_drv.cpu_addr = NULL;
+ ring->fence_drv.gpu_addr = 0;
+ ring->fence_drv.sync_seq = 0;
+- atomic64_set(&ring->fence_drv.last_seq, 0);
++ atomic_set(&ring->fence_drv.last_seq, 0);
+ ring->fence_drv.initialized = false;
+
+ setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
+@@ -621,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
+ amdgpu_fence_process(ring);
+
+ seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
+- seq_printf(m, "Last signaled fence 0x%016llx\n",
+- (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
+- seq_printf(m, "Last emitted 0x%016llx\n",
++ seq_printf(m, "Last signaled fence 0x%08x\n",
++ atomic_read(&ring->fence_drv.last_seq));
++ seq_printf(m, "Last emitted 0x%08x\n",
+ ring->fence_drv.sync_seq);
+ }
+ return 0;
+--
+2.7.4
+