diff options
Diffstat (limited to 'common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch')
-rw-r--r-- | common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch b/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch new file mode 100644 index 00000000..5dca9545 --- /dev/null +++ b/common/recipes-kernel/linux/files/0404-drm-amdgpu-switch-back-to-32bit-hw-fences-v2.patch @@ -0,0 +1,164 @@ +From 1ba674acd69d4537e78b893d2de7cd9f24c7b146 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Mon, 14 Mar 2016 15:46:06 +0100 +Subject: [PATCH 0404/1110] drm/amdgpu: switch back to 32bit hw fences v2 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +We don't need to extend them to 64bits any more, so avoid the extra overhead. + +v2: update commit message. + +Signed-off-by: Christian König <christian.koenig@amd.com> +Acked-by: Alex Deucher <alexander.deucher@amd.com> +Reviewed-by: Chunming Zhou <david1.zhou@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +-- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 49 ++++++++++++------------------- + 2 files changed, 21 insertions(+), 32 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 05a0ffb..f1b23e5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -347,8 +347,8 @@ struct amdgpu_fence_driver { + uint64_t gpu_addr; + volatile uint32_t *cpu_addr; + /* sync_seq is protected by ring emission lock */ +- uint64_t sync_seq; +- atomic64_t last_seq; ++ uint32_t sync_seq; ++ atomic_t last_seq; + bool initialized; + struct amdgpu_irq_src *irq_src; + unsigned irq_type; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +index da9a155..4303b44 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -52,7 +52,6 @@ struct amdgpu_fence { + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; +- uint64_t seq; + }; + + static struct kmem_cache *amdgpu_fence_slab; +@@ -104,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) + if (drv->cpu_addr) + seq = le32_to_cpu(*drv->cpu_addr); + else +- seq = lower_32_bits(atomic64_read(&drv->last_seq)); ++ seq = atomic_read(&drv->last_seq); + + return seq; + } +@@ -123,23 +122,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f) + struct amdgpu_device *adev = ring->adev; + struct amdgpu_fence *fence; + struct fence **ptr; +- unsigned idx; ++ uint32_t seq; + + fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL); + if (fence == NULL) + return -ENOMEM; + +- fence->seq = ++ring->fence_drv.sync_seq; ++ seq = ++ring->fence_drv.sync_seq; + fence->ring = ring; + fence_init(&fence->base, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, +- fence->seq); ++ seq); + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, +- fence->seq, AMDGPU_FENCE_FLAG_INT); ++ seq, AMDGPU_FENCE_FLAG_INT); + +- idx = fence->seq & ring->fence_drv.num_fences_mask; +- ptr = &ring->fence_drv.fences[idx]; ++ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; + /* This function can't be called concurrently anyway, otherwise + * emitting the fence would mess up the hardware ring buffer. + */ +@@ -177,22 +175,16 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) + void amdgpu_fence_process(struct amdgpu_ring *ring) + { + struct amdgpu_fence_driver *drv = &ring->fence_drv; +- uint64_t seq, last_seq, last_emitted; ++ uint32_t seq, last_seq; + int r; + + do { +- last_seq = atomic64_read(&ring->fence_drv.last_seq); +- last_emitted = ring->fence_drv.sync_seq; ++ last_seq = atomic_read(&ring->fence_drv.last_seq); + seq = amdgpu_fence_read(ring); +- seq |= last_seq & 0xffffffff00000000LL; +- if (seq < last_seq) { +- seq &= 0xffffffff; +- seq |= last_emitted & 0xffffffff00000000LL; +- } + +- } while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); ++ } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); + +- if (seq < last_emitted) ++ if (seq != ring->fence_drv.sync_seq) + amdgpu_fence_schedule_fallback(ring); + + while (last_seq != seq) { +@@ -279,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) + * but it's ok to report slightly wrong fence count here. + */ + amdgpu_fence_process(ring); +- emitted = ring->fence_drv.sync_seq +- - atomic64_read(&ring->fence_drv.last_seq); +- /* to avoid 32bits warp around */ +- if (emitted > 0x10000000) +- emitted = 0x10000000; +- +- return (unsigned)emitted; ++ emitted = 0x100000000ull; ++ emitted -= atomic_read(&ring->fence_drv.last_seq); ++ emitted += ACCESS_ONCE(ring->fence_drv.sync_seq); ++ return lower_32_bits(emitted); + } + + /** +@@ -317,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, + ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index; + ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index; + } +- amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq)); ++ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq)); + amdgpu_irq_get(adev, irq_src, irq_type); + + ring->fence_drv.irq_src = irq_src; +@@ -353,7 +342,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, + ring->fence_drv.cpu_addr = NULL; + ring->fence_drv.gpu_addr = 0; + ring->fence_drv.sync_seq = 0; +- atomic64_set(&ring->fence_drv.last_seq, 0); ++ atomic_set(&ring->fence_drv.last_seq, 0); + ring->fence_drv.initialized = false; + + setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, +@@ -621,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) + amdgpu_fence_process(ring); + + seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name); +- seq_printf(m, "Last signaled fence 0x%016llx\n", +- (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); +- seq_printf(m, "Last emitted 0x%016llx\n", ++ seq_printf(m, "Last signaled fence 0x%08x\n", ++ atomic_read(&ring->fence_drv.last_seq)); ++ seq_printf(m, "Last emitted 0x%08x\n", + ring->fence_drv.sync_seq); + } + return 0; +-- +2.7.4 + |