aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch')
-rw-r--r--meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch210
1 files changed, 210 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch
new file mode 100644
index 00000000..f8925c22
--- /dev/null
+++ b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch
@@ -0,0 +1,210 @@
+From 93323131d66db68802e646204c0562cddc81a651 Mon Sep 17 00:00:00 2001
+From: "monk.liu" <monk.liu@amd.com>
+Date: Wed, 15 Jul 2015 17:21:45 +0800
+Subject: [PATCH 0328/1050] drm/amdgpu: different emit_ib for gfx and compute
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+compute ring didn't use const engine byfar, so ignore CE things in
+compute routine
+
+Signed-off-by: monk.liu <monk.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 46 ++++++++++++++++++++++++++--------
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 47 +++++++++++++++++++++++++++--------
+ 2 files changed, 71 insertions(+), 22 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+index 2c188fb..2db6ab0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+@@ -2561,7 +2561,7 @@ static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring,
+ * sheduling on the ring. This function schedules the IB
+ * on the gfx ring for execution by the GPU.
+ */
+-static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
++static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib)
+ {
+ bool need_ctx_switch = ring->current_ctx != ib->ctx;
+@@ -2569,15 +2569,10 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ u32 next_rptr = ring->wptr + 5;
+
+ /* drop the CE preamble IB for the same context */
+- if ((ring->type == AMDGPU_RING_TYPE_GFX) &&
+- (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+- !need_ctx_switch)
++ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
+ return;
+
+- if (ring->type == AMDGPU_RING_TYPE_COMPUTE)
+- control |= INDIRECT_BUFFER_VALID;
+-
+- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX)
++ if (need_ctx_switch)
+ next_rptr += 2;
+
+ next_rptr += 4;
+@@ -2588,7 +2583,7 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ amdgpu_ring_write(ring, next_rptr);
+
+ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
+- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) {
++ if (need_ctx_switch) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+@@ -2611,6 +2606,35 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
+ amdgpu_ring_write(ring, control);
+ }
+
++static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
++ struct amdgpu_ib *ib)
++{
++ u32 header, control = 0;
++ u32 next_rptr = ring->wptr + 5;
++
++ control |= INDIRECT_BUFFER_VALID;
++ next_rptr += 4;
++ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
++ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
++ amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
++ amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
++ amdgpu_ring_write(ring, next_rptr);
++
++ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
++
++ control |= ib->length_dw |
++ (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
++
++ amdgpu_ring_write(ring, header);
++ amdgpu_ring_write(ring,
++#ifdef __BIG_ENDIAN
++ (2 << 0) |
++#endif
++ (ib->gpu_addr & 0xFFFFFFFC));
++ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
++ amdgpu_ring_write(ring, control);
++}
++
+ /**
+ * gfx_v7_0_ring_test_ib - basic ring IB test
+ *
+@@ -5555,7 +5579,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
+ .get_wptr = gfx_v7_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v7_0_ring_set_wptr_gfx,
+ .parse_cs = NULL,
+- .emit_ib = gfx_v7_0_ring_emit_ib,
++ .emit_ib = gfx_v7_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v7_0_ring_emit_fence_gfx,
+ .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
+ .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
+@@ -5571,7 +5595,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
+ .get_wptr = gfx_v7_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v7_0_ring_set_wptr_compute,
+ .parse_cs = NULL,
+- .emit_ib = gfx_v7_0_ring_emit_ib,
++ .emit_ib = gfx_v7_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v7_0_ring_emit_fence_compute,
+ .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
+ .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index 1c7c992..9e1d4dd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -3753,7 +3753,7 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+ amdgpu_ring_write(ring, 0x20); /* poll interval */
+ }
+
+-static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
++static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib)
+ {
+ bool need_ctx_switch = ring->current_ctx != ib->ctx;
+@@ -3761,15 +3761,10 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
+ u32 next_rptr = ring->wptr + 5;
+
+ /* drop the CE preamble IB for the same context */
+- if ((ring->type == AMDGPU_RING_TYPE_GFX) &&
+- (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+- !need_ctx_switch)
++ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
+ return;
+
+- if (ring->type == AMDGPU_RING_TYPE_COMPUTE)
+- control |= INDIRECT_BUFFER_VALID;
+-
+- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX)
++ if (need_ctx_switch)
+ next_rptr += 2;
+
+ next_rptr += 4;
+@@ -3780,7 +3775,7 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
+ amdgpu_ring_write(ring, next_rptr);
+
+ /* insert SWITCH_BUFFER packet before first IB in the ring frame */
+- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) {
++ if (need_ctx_switch) {
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+@@ -3803,6 +3798,36 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
+ amdgpu_ring_write(ring, control);
+ }
+
++static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
++ struct amdgpu_ib *ib)
++{
++ u32 header, control = 0;
++ u32 next_rptr = ring->wptr + 5;
++
++ control |= INDIRECT_BUFFER_VALID;
++
++ next_rptr += 4;
++ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
++ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
++ amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
++ amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
++ amdgpu_ring_write(ring, next_rptr);
++
++ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
++
++ control |= ib->length_dw |
++ (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
++
++ amdgpu_ring_write(ring, header);
++ amdgpu_ring_write(ring,
++#ifdef __BIG_ENDIAN
++ (2 << 0) |
++#endif
++ (ib->gpu_addr & 0xFFFFFFFC));
++ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
++ amdgpu_ring_write(ring, control);
++}
++
+ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
+ u64 seq, unsigned flags)
+ {
+@@ -4224,7 +4249,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
+ .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
+ .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
+ .parse_cs = NULL,
+- .emit_ib = gfx_v8_0_ring_emit_ib,
++ .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
+ .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
+ .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
+ .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
+@@ -4240,7 +4265,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
+ .get_wptr = gfx_v8_0_ring_get_wptr_compute,
+ .set_wptr = gfx_v8_0_ring_set_wptr_compute,
+ .parse_cs = NULL,
+- .emit_ib = gfx_v8_0_ring_emit_ib,
++ .emit_ib = gfx_v8_0_ring_emit_ib_compute,
+ .emit_fence = gfx_v8_0_ring_emit_fence_compute,
+ .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
+ .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
+--
+1.9.1
+