aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0401-drm-amdgpu-signal-fences-directly-in-amdgpu_fence_pr.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/files/0401-drm-amdgpu-signal-fences-directly-in-amdgpu_fence_pr.patch')
-rw-r--r--common/recipes-kernel/linux/files/0401-drm-amdgpu-signal-fences-directly-in-amdgpu_fence_pr.patch223
1 files changed, 223 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0401-drm-amdgpu-signal-fences-directly-in-amdgpu_fence_pr.patch b/common/recipes-kernel/linux/files/0401-drm-amdgpu-signal-fences-directly-in-amdgpu_fence_pr.patch
new file mode 100644
index 00000000..1c7c1158
--- /dev/null
+++ b/common/recipes-kernel/linux/files/0401-drm-amdgpu-signal-fences-directly-in-amdgpu_fence_pr.patch
@@ -0,0 +1,223 @@
+From b7071d88be433418c213ae57acff2e3ee9fe93d2 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Mon, 14 Mar 2016 14:29:46 +0100
+Subject: [PATCH 0401/1110] drm/amdgpu: signal fences directly in
+ amdgpu_fence_process
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Because of the scheduler we need to signal all fences immediately
+anyway, so try to avoid the waitqueue overhead.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 97 ++++++++++---------------------
+ 2 files changed, 31 insertions(+), 68 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 43c948d..05a0ffb 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -353,8 +353,8 @@ struct amdgpu_fence_driver {
+ struct amdgpu_irq_src *irq_src;
+ unsigned irq_type;
+ struct timer_list fallback_timer;
+- wait_queue_head_t fence_queue;
+ unsigned num_fences_mask;
++ spinlock_t lock;
+ struct fence **fences;
+ };
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index d5bdd96..c5980c4 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -53,8 +53,6 @@ struct amdgpu_fence {
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+ uint64_t seq;
+-
+- wait_queue_t fence_wake;
+ };
+
+ static struct kmem_cache *amdgpu_fence_slab;
+@@ -124,7 +122,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+ {
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_fence *fence;
+- struct fence *old, **ptr;
++ struct fence **ptr;
+ unsigned idx;
+
+ fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+@@ -134,7 +132,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+ fence->seq = ++ring->fence_drv.sync_seq;
+ fence->ring = ring;
+ fence_init(&fence->base, &amdgpu_fence_ops,
+- &ring->fence_drv.fence_queue.lock,
++ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx,
+ fence->seq);
+ amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
+@@ -145,13 +143,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+- old = rcu_dereference_protected(*ptr, 1);
++ BUG_ON(rcu_dereference_protected(*ptr, 1));
+
+ rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
+- BUG_ON(old && !fence_is_signaled(old));
+- fence_put(old);
+-
+ *f = &fence->base;
+
+ return 0;
+@@ -181,11 +176,12 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
+ */
+ void amdgpu_fence_process(struct amdgpu_ring *ring)
+ {
++ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ uint64_t seq, last_seq, last_emitted;
+- bool wake = false;
++ int r;
+
+- last_seq = atomic64_read(&ring->fence_drv.last_seq);
+ do {
++ last_seq = atomic64_read(&ring->fence_drv.last_seq);
+ last_emitted = ring->fence_drv.sync_seq;
+ seq = amdgpu_fence_read(ring);
+ seq |= last_seq & 0xffffffff00000000LL;
+@@ -195,22 +191,32 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
+ }
+
+ if (seq <= last_seq || seq > last_emitted)
+- break;
++ return;
+
+- /* If we loop over we don't want to return without
+- * checking if a fence is signaled as it means that the
+- * seq we just read is different from the previous on.
+- */
+- wake = true;
+- last_seq = seq;
+-
+- } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
++ } while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
+
+ if (seq < last_emitted)
+ amdgpu_fence_schedule_fallback(ring);
+
+- if (wake)
+- wake_up_all(&ring->fence_drv.fence_queue);
++ while (last_seq != seq) {
++ struct fence *fence, **ptr;
++
++ ptr = &drv->fences[++last_seq & drv->num_fences_mask];
++
++ /* There is always exactly one thread signaling this fence slot */
++ fence = rcu_dereference_protected(*ptr, 1);
++ rcu_assign_pointer(*ptr, NULL);
++
++ BUG_ON(!fence);
++
++ r = fence_signal(fence);
++ if (!r)
++ FENCE_TRACE(fence, "signaled from irq context\n");
++ else
++ BUG();
++
++ fence_put(fence);
++ }
+ }
+
+ /**
+@@ -356,8 +362,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
+ (unsigned long)ring);
+
+- init_waitqueue_head(&ring->fence_drv.fence_queue);
+ ring->fence_drv.num_fences_mask = num_hw_submission - 1;
++ spin_lock_init(&ring->fence_drv.lock);
+ ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
+ GFP_KERNEL);
+ if (!ring->fence_drv.fences)
+@@ -436,7 +442,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
+ /* no need to trigger GPU reset as we are unloading */
+ amdgpu_fence_driver_force_completion(adev);
+ }
+- wake_up_all(&ring->fence_drv.fence_queue);
+ amdgpu_irq_put(adev, ring->fence_drv.irq_src,
+ ring->fence_drv.irq_type);
+ amd_sched_fini(&ring->sched);
+@@ -569,42 +574,6 @@ static bool amdgpu_fence_is_signaled(struct fence *f)
+ }
+
+ /**
+- * amdgpu_fence_check_signaled - callback from fence_queue
+- *
+- * this function is called with fence_queue lock held, which is also used
+- * for the fence locking itself, so unlocked variants are used for
+- * fence_signal, and remove_wait_queue.
+- */
+-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+-{
+- struct amdgpu_fence *fence;
+- struct amdgpu_device *adev;
+- u64 seq;
+- int ret;
+-
+- fence = container_of(wait, struct amdgpu_fence, fence_wake);
+- adev = fence->ring->adev;
+-
+- /*
+- * We cannot use amdgpu_fence_process here because we're already
+- * in the waitqueue, in a call from wake_up_all.
+- */
+- seq = atomic64_read(&fence->ring->fence_drv.last_seq);
+- if (seq >= fence->seq) {
+- ret = fence_signal_locked(&fence->base);
+- if (!ret)
+- FENCE_TRACE(&fence->base, "signaled from irq context\n");
+- else
+- FENCE_TRACE(&fence->base, "was already signaled\n");
+-
+- __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
+- fence_put(&fence->base);
+- } else
+- FENCE_TRACE(&fence->base, "pending\n");
+- return 0;
+-}
+-
+-/**
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+@@ -617,17 +586,11 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ struct amdgpu_ring *ring = fence->ring;
+
+- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+- return false;
+-
+- fence->fence_wake.flags = 0;
+- fence->fence_wake.private = NULL;
+- fence->fence_wake.func = amdgpu_fence_check_signaled;
+- __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
+- fence_get(f);
+ if (!timer_pending(&ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(ring);
++
+ FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
++
+ return true;
+ }
+
+--
+2.7.4
+