diff options
Diffstat (limited to 'common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch')
-rw-r--r-- | common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch b/common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch new file mode 100644 index 00000000..a8b70643 --- /dev/null +++ b/common/recipes-kernel/linux/files/0563-drm-amdgpu-sync-ce-and-me-with-SWITCH_BUFFER-2.patch @@ -0,0 +1,238 @@ +From 5c3422b0b135b46c8dca9c1d909c1ae84f3561bd Mon Sep 17 00:00:00 2001 +From: "monk.liu" <monk.liu@amd.com> +Date: Wed, 23 Sep 2015 13:49:58 +0800 +Subject: [PATCH 0563/1050] drm/amdgpu: sync ce and me with SWITCH_BUFFER(2) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +we used to adopt wait_reg_mem to let CE wait before DE finish page +updating, but from Tonga+, CE doesn't support wait_reg_mem package so +this logic no longer works. + +so here is another approach to do same thing: +Insert two of SWITCH_BUFFER at both front and end of vm_flush can +guarantee that CE not go further to process IB_const before vm_flush +done. + +Insert two of SWITCH_BUFFER also works on CI, so remove legency method +to sync CE and ME + +v2: +Insert double SWITCH_BUFFER at front of vm flush as well. + +Signed-off-by: monk.liu <monk.liu@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 55 +++++++-------------------------- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 58 ++++++++--------------------------- + 3 files changed, 23 insertions(+), 92 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 57b427f..6647fb2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -1202,8 +1202,6 @@ struct amdgpu_gfx { + struct amdgpu_irq_src priv_inst_irq; + /* gfx status */ + uint32_t gfx_current_status; +- /* sync signal for const engine */ +- unsigned ce_sync_offs; + /* ce ram size*/ + unsigned ce_ram_size; + }; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +index 392ec10..e992bf2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +@@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) + return 0; + } + +-static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring) +-{ +- struct amdgpu_device *adev = ring->adev; +- u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; +- +- /* instruct DE to set a magic number */ +- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(5))); +- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); +- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); +- amdgpu_ring_write(ring, 1); +- +- /* let CE wait till condition satisfied */ +- amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); +- amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ +- WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ +- WAIT_REG_MEM_FUNCTION(3) | /* == */ +- WAIT_REG_MEM_ENGINE(2))); /* ce */ +- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); +- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); +- amdgpu_ring_write(ring, 1); +- amdgpu_ring_write(ring, 0xffffffff); +- amdgpu_ring_write(ring, 4); /* poll interval */ +- +- /* instruct CE to reset wb of ce_sync to zero */ +- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | +- WRITE_DATA_DST_SEL(5) | +- WR_CONFIRM)); +- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); +- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); +- amdgpu_ring_write(ring, 0); +-} +- + /* + * vm + * VMID 0 is the physical GPU addresses as used by the kernel. +@@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) + { + int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); ++ if (usepfp) { ++ /* synce CE with ME to prevent CE fetch CEIB before context switch done */ ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); ++ } + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | +@@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, 0x0); + + /* synce CE with ME to prevent CE fetch CEIB before context switch done */ +- gfx_v7_0_ce_sync_me(ring); ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); + } + } + +@@ -4805,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle) + return r; + } + +- r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs); +- if (r) { +- DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r); +- return r; +- } +- + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; + ring->ring_obj = NULL; +@@ -4889,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle) + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + +- amdgpu_wb_free(adev, adev->gfx.ce_sync_offs); +- + gfx_v7_0_cp_compute_fini(adev); + gfx_v7_0_rlc_fini(adev); + gfx_v7_0_mec_fini(adev); +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index 78e5900..cb4f68f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle) + return r; + } + +- r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs); +- if (r) { +- DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r); +- return r; +- } +- + /* set up the gfx ring */ + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + ring = &adev->gfx.gfx_ring[i]; +@@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle) + for (i = 0; i < adev->gfx.num_compute_rings; i++) + amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + +- amdgpu_wb_free(adev, adev->gfx.ce_sync_offs); +- + gfx_v8_0_mec_fini(adev); + + return 0; +@@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring, + return true; + } + +-static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring) +-{ +- struct amdgpu_device *adev = ring->adev; +- u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4; +- +- /* instruct DE to set a magic number */ +- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(5))); +- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); +- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); +- amdgpu_ring_write(ring, 1); +- +- /* let CE wait till condition satisfied */ +- amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); +- amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ +- WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ +- WAIT_REG_MEM_FUNCTION(3) | /* == */ +- WAIT_REG_MEM_ENGINE(2))); /* ce */ +- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); +- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); +- amdgpu_ring_write(ring, 1); +- amdgpu_ring_write(ring, 0xffffffff); +- amdgpu_ring_write(ring, 4); /* poll interval */ +- +- /* instruct CE to reset wb of ce_sync to zero */ +- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | +- WRITE_DATA_DST_SEL(5) | +- WR_CONFIRM)); +- amdgpu_ring_write(ring, gpu_addr & 0xfffffffc); +- amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff); +- amdgpu_ring_write(ring, 0); +-} +- + static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) + { +@@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, 4); /* poll interval */ + ++ if (usepfp) { ++ /* synce CE with ME to prevent CE fetch CEIB before context switch done */ ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); ++ } ++ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | + WRITE_DATA_DST_SEL(0)) | +@@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); +- +- /* synce CE with ME to prevent CE fetch CEIB before context switch done */ +- gfx_v8_0_ce_sync_me(ring); ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); ++ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); ++ amdgpu_ring_write(ring, 0); + } + } + +-- +1.9.1 + |