aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch')
-rw-r--r--common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch238
1 files changed, 238 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch b/common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch
new file mode 100644
index 00000000..a8b70643
--- /dev/null
+++ b/common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch
@@ -0,0 +1,238 @@
+From 5c3422b0b135b46c8dca9c1d909c1ae84f3561bd Mon Sep 17 00:00:00 2001
+From: "monk.liu" <monk.liu@amd.com>
+Date: Wed, 23 Sep 2015 13:49:58 +0800
+Subject: [PATCH 0563/1050] drm/amdgpu: sync ce and me with SWITCH_BUFFER(2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+we used to adopt wait_reg_mem to let CE wait before DE finish page
+updating, but from Tonga+, CE doesn't support wait_reg_mem package so
+this logic no longer works.
+
+so here is another approach to do same thing:
+Insert two of SWITCH_BUFFER at both front and end of vm_flush can
+guarantee that CE not go further to process IB_const before vm_flush
+done.
+
+Insert two of SWITCH_BUFFER also works on CI, so remove legency method
+to sync CE and ME
+
+v2:
+Insert double SWITCH_BUFFER at front of vm flush as well.
+
+Signed-off-by: monk.liu <monk.liu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 --
+ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 55 +++++++--------------------------
+ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 58 ++++++++---------------------------
+ 3 files changed, 23 insertions(+), 92 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 57b427f..6647fb2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -1202,8 +1202,6 @@ struct amdgpu_gfx {
+ struct amdgpu_irq_src priv_inst_irq;
+ /* gfx status */
+ uint32_t gfx_current_status;
+- /* sync signal for const engine */
+- unsigned ce_sync_offs;
+ /* ce ram size*/
+ unsigned ce_ram_size;
+ };
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+index 392ec10..e992bf2 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+@@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
+ return 0;
+ }
+
+-static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring)
+-{
+- struct amdgpu_device *adev = ring->adev;
+- u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
+-
+- /* instruct DE to set a magic number */
+- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+- WRITE_DATA_DST_SEL(5)));
+- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
+- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
+- amdgpu_ring_write(ring, 1);
+-
+- /* let CE wait till condition satisfied */
+- amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+- amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
+- WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+- WAIT_REG_MEM_FUNCTION(3) | /* == */
+- WAIT_REG_MEM_ENGINE(2))); /* ce */
+- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
+- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
+- amdgpu_ring_write(ring, 1);
+- amdgpu_ring_write(ring, 0xffffffff);
+- amdgpu_ring_write(ring, 4); /* poll interval */
+-
+- /* instruct CE to reset wb of ce_sync to zero */
+- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+- WRITE_DATA_DST_SEL(5) |
+- WR_CONFIRM));
+- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
+- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
+- amdgpu_ring_write(ring, 0);
+-}
+-
+ /*
+ * vm
+ * VMID 0 is the physical GPU addresses as used by the kernel.
+@@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
+ {
+ int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
++ if (usepfp) {
++ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
++ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+@@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ amdgpu_ring_write(ring, 0x0);
+
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+- gfx_v7_0_ce_sync_me(ring);
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
+ }
+ }
+
+@@ -4805,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle)
+ return r;
+ }
+
+- r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
+- if (r) {
+- DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
+- return r;
+- }
+-
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+ ring->ring_obj = NULL;
+@@ -4889,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle)
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+- amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
+-
+ gfx_v7_0_cp_compute_fini(adev);
+ gfx_v7_0_rlc_fini(adev);
+ gfx_v7_0_mec_fini(adev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+index 78e5900..cb4f68f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+@@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle)
+ return r;
+ }
+
+- r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
+- if (r) {
+- DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
+- return r;
+- }
+-
+ /* set up the gfx ring */
+ for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+ ring = &adev->gfx.gfx_ring[i];
+@@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle)
+ for (i = 0; i < adev->gfx.num_compute_rings; i++)
+ amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
+
+- amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
+-
+ gfx_v8_0_mec_fini(adev);
+
+ return 0;
+@@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
+ return true;
+ }
+
+-static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring)
+-{
+- struct amdgpu_device *adev = ring->adev;
+- u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
+-
+- /* instruct DE to set a magic number */
+- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+- WRITE_DATA_DST_SEL(5)));
+- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
+- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
+- amdgpu_ring_write(ring, 1);
+-
+- /* let CE wait till condition satisfied */
+- amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+- amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
+- WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+- WAIT_REG_MEM_FUNCTION(3) | /* == */
+- WAIT_REG_MEM_ENGINE(2))); /* ce */
+- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
+- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
+- amdgpu_ring_write(ring, 1);
+- amdgpu_ring_write(ring, 0xffffffff);
+- amdgpu_ring_write(ring, 4); /* poll interval */
+-
+- /* instruct CE to reset wb of ce_sync to zero */
+- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
+- WRITE_DATA_DST_SEL(5) |
+- WR_CONFIRM));
+- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
+- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
+- amdgpu_ring_write(ring, 0);
+-}
+-
+ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
+ {
+@@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ amdgpu_ring_write(ring, 0xffffffff);
+ amdgpu_ring_write(ring, 4); /* poll interval */
+
++ if (usepfp) {
++ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
++ }
++
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+ WRITE_DATA_DST_SEL(0)) |
+@@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ /* sync PFP to ME, otherwise we might get invalid PFP reads */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
+ amdgpu_ring_write(ring, 0x0);
+-
+- /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+- gfx_v8_0_ce_sync_me(ring);
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
++ amdgpu_ring_write(ring, 0);
+ }
+ }
+
+--
+1.9.1
+