diff options
Diffstat (limited to 'meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch')
-rw-r--r-- | meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch new file mode 100644 index 00000000..f8925c22 --- /dev/null +++ b/meta-amdfalconx86/recipes-kernel/linux/linux-yocto/0328-drm-amdgpu-different-emit_ib-for-gfx-and-compute.patch @@ -0,0 +1,210 @@ +From 93323131d66db68802e646204c0562cddc81a651 Mon Sep 17 00:00:00 2001 +From: "monk.liu" <monk.liu@amd.com> +Date: Wed, 15 Jul 2015 17:21:45 +0800 +Subject: [PATCH 0328/1050] drm/amdgpu: different emit_ib for gfx and compute +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +compute ring didn't use const engine byfar, so ignore CE things in +compute routine + +Signed-off-by: monk.liu <monk.liu@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 46 ++++++++++++++++++++++++++-------- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 47 +++++++++++++++++++++++++++-------- + 2 files changed, 71 insertions(+), 22 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +index 2c188fb..2db6ab0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -2561,7 +2561,7 @@ static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring, + * sheduling on the ring. This function schedules the IB + * on the gfx ring for execution by the GPU. + */ +-static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, ++static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + struct amdgpu_ib *ib) + { + bool need_ctx_switch = ring->current_ctx != ib->ctx; +@@ -2569,15 +2569,10 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + u32 next_rptr = ring->wptr + 5; + + /* drop the CE preamble IB for the same context */ +- if ((ring->type == AMDGPU_RING_TYPE_GFX) && +- (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && +- !need_ctx_switch) ++ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) + return; + +- if (ring->type == AMDGPU_RING_TYPE_COMPUTE) +- control |= INDIRECT_BUFFER_VALID; +- +- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) ++ if (need_ctx_switch) + next_rptr += 2; + + next_rptr += 4; +@@ -2588,7 +2583,7 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, next_rptr); + + /* insert SWITCH_BUFFER packet before first IB in the ring frame */ +- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) { ++ if (need_ctx_switch) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + } +@@ -2611,6 +2606,35 @@ static void gfx_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, control); + } + ++static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, ++ struct amdgpu_ib *ib) ++{ ++ u32 header, control = 0; ++ u32 next_rptr = ring->wptr + 5; ++ ++ control |= INDIRECT_BUFFER_VALID; ++ next_rptr += 4; ++ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); ++ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); ++ amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); ++ amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); ++ amdgpu_ring_write(ring, next_rptr); ++ ++ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); ++ ++ control |= ib->length_dw | ++ (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); ++ ++ amdgpu_ring_write(ring, header); ++ amdgpu_ring_write(ring, ++#ifdef __BIG_ENDIAN ++ (2 << 0) | ++#endif ++ (ib->gpu_addr & 0xFFFFFFFC)); ++ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); ++ amdgpu_ring_write(ring, control); ++} ++ + /** + * gfx_v7_0_ring_test_ib - basic ring IB test + * +@@ -5555,7 +5579,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { + .get_wptr = gfx_v7_0_ring_get_wptr_gfx, + .set_wptr = gfx_v7_0_ring_set_wptr_gfx, + .parse_cs = NULL, +- .emit_ib = gfx_v7_0_ring_emit_ib, ++ .emit_ib = gfx_v7_0_ring_emit_ib_gfx, + .emit_fence = gfx_v7_0_ring_emit_fence_gfx, + .emit_semaphore = gfx_v7_0_ring_emit_semaphore, + .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, +@@ -5571,7 +5595,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { + .get_wptr = gfx_v7_0_ring_get_wptr_compute, + .set_wptr = gfx_v7_0_ring_set_wptr_compute, + .parse_cs = NULL, +- .emit_ib = gfx_v7_0_ring_emit_ib, ++ .emit_ib = gfx_v7_0_ring_emit_ib_compute, + .emit_fence = gfx_v7_0_ring_emit_fence_compute, + .emit_semaphore = gfx_v7_0_ring_emit_semaphore, + .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 1c7c992..9e1d4dd 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -3753,7 +3753,7 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) + amdgpu_ring_write(ring, 0x20); /* poll interval */ + } + +-static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, ++static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, + struct amdgpu_ib *ib) + { + bool need_ctx_switch = ring->current_ctx != ib->ctx; +@@ -3761,15 +3761,10 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, + u32 next_rptr = ring->wptr + 5; + + /* drop the CE preamble IB for the same context */ +- if ((ring->type == AMDGPU_RING_TYPE_GFX) && +- (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && +- !need_ctx_switch) ++ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) + return; + +- if (ring->type == AMDGPU_RING_TYPE_COMPUTE) +- control |= INDIRECT_BUFFER_VALID; +- +- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) ++ if (need_ctx_switch) + next_rptr += 2; + + next_rptr += 4; +@@ -3780,7 +3775,7 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, next_rptr); + + /* insert SWITCH_BUFFER packet before first IB in the ring frame */ +- if (need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) { ++ if (need_ctx_switch) { + amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); + amdgpu_ring_write(ring, 0); + } +@@ -3803,6 +3798,36 @@ static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, control); + } + ++static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, ++ struct amdgpu_ib *ib) ++{ ++ u32 header, control = 0; ++ u32 next_rptr = ring->wptr + 5; ++ ++ control |= INDIRECT_BUFFER_VALID; ++ ++ next_rptr += 4; ++ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); ++ amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); ++ amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); ++ amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); ++ amdgpu_ring_write(ring, next_rptr); ++ ++ header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); ++ ++ control |= ib->length_dw | ++ (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0); ++ ++ amdgpu_ring_write(ring, header); ++ amdgpu_ring_write(ring, ++#ifdef __BIG_ENDIAN ++ (2 << 0) | ++#endif ++ (ib->gpu_addr & 0xFFFFFFFC)); ++ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); ++ amdgpu_ring_write(ring, control); ++} ++ + static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) + { +@@ -4224,7 +4249,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { + .get_wptr = gfx_v8_0_ring_get_wptr_gfx, + .set_wptr = gfx_v8_0_ring_set_wptr_gfx, + .parse_cs = NULL, +- .emit_ib = gfx_v8_0_ring_emit_ib, ++ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, + .emit_fence = gfx_v8_0_ring_emit_fence_gfx, + .emit_semaphore = gfx_v8_0_ring_emit_semaphore, + .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, +@@ -4240,7 +4265,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { + .get_wptr = gfx_v8_0_ring_get_wptr_compute, + .set_wptr = gfx_v8_0_ring_set_wptr_compute, + .parse_cs = NULL, +- .emit_ib = gfx_v8_0_ring_emit_ib, ++ .emit_ib = gfx_v8_0_ring_emit_ib_compute, + .emit_fence = gfx_v8_0_ring_emit_fence_compute, + .emit_semaphore = gfx_v8_0_ring_emit_semaphore, + .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, +-- +1.9.1 + |