diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3205-drm-amdgpu-implement-gmc_v9_0_emit_flush_gpu_tlb.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3205-drm-amdgpu-implement-gmc_v9_0_emit_flush_gpu_tlb.patch | 484 |
1 files changed, 484 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3205-drm-amdgpu-implement-gmc_v9_0_emit_flush_gpu_tlb.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3205-drm-amdgpu-implement-gmc_v9_0_emit_flush_gpu_tlb.patch new file mode 100644 index 00000000..9741efb8 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3205-drm-amdgpu-implement-gmc_v9_0_emit_flush_gpu_tlb.patch @@ -0,0 +1,484 @@ +From 8d4eeeda48c27c4f5984b2ed9f7d24f30620780a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Fri, 12 Jan 2018 21:57:53 +0100 +Subject: [PATCH 3205/4131] drm/amdgpu: implement gmc_v9_0_emit_flush_gpu_tlb +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Unify tlb flushing for gmc v9. + +Signed-off-by: Christian König <christian.koenig@amd.com> +Acked-by: Chunming Zhou <david1.zhou@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 - + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 29 ++++------------ + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 25 +++++++++++++- + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 23 ++----------- + drivers/gpu/drm/amd/amdgpu/soc15.h | 2 ++ + drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 43 ++++------------------- + drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 22 +++--------- + drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 61 ++++----------------------------- + 8 files changed, 53 insertions(+), 153 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +index b7c1d00..f4b82ef7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +@@ -69,7 +69,6 @@ struct amdgpu_gmc_funcs { + /* get the pde for a given mc addr */ + void (*get_vm_pde)(struct amdgpu_device *adev, int level, + u64 *dst, u64 *flags); +- uint32_t (*get_invalidate_req)(unsigned int vmid); + }; + + struct amdgpu_gmc { +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +index e95aac4..dd519e9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -3694,31 +3694,16 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint64_t pd_addr) + { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +- int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); +- uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid); +- uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + +- amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); +- pd_addr |= flags; +- +- gfx_v9_0_write_data_to_reg(ring, usepfp, true, +- hub->ctx0_ptb_addr_lo32 + (2 * vmid), +- lower_32_bits(pd_addr)); +- +- gfx_v9_0_write_data_to_reg(ring, usepfp, true, +- hub->ctx0_ptb_addr_hi32 + (2 * vmid), +- upper_32_bits(pd_addr)); +- +- gfx_v9_0_write_data_to_reg(ring, usepfp, true, +- hub->vm_inv_eng0_req + eng, req); ++ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + + /* wait for the invalidate to complete */ +- gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + +- eng, 0, 1 << vmid, 1 << vmid, 0x20); ++ gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + eng, ++ 0, 1 << vmid, 1 << vmid, 0x20); + + /* compute doesn't have PFP */ +- if (usepfp) { ++ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { + /* sync PFP to ME, otherwise we might get invalid PFP reads */ + amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + amdgpu_ring_write(ring, 0x0); +@@ -4318,7 +4303,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ +- 24 + /* VM_FLUSH */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, +@@ -4367,7 +4352,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { + 7 + /* gfx_v9_0_ring_emit_hdp_flush */ + 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ +- 24 + /* gfx_v9_0_ring_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* gfx_v9_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib = gfx_v9_0_ring_emit_ib_compute, +@@ -4399,7 +4384,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { + 7 + /* gfx_v9_0_ring_emit_hdp_flush */ + 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ +- 24 + /* gfx_v9_0_ring_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* gfx_v9_0_ring_emit_vm_flush */ + 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ + .emit_ib = gfx_v9_0_ring_emit_ib_compute, +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +index 04f703c..3b77afb 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +@@ -365,6 +365,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, + spin_unlock(&adev->gmc.invalidate_lock); + } + ++static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, ++ unsigned vmid, unsigned pasid, ++ uint64_t pd_addr) ++{ ++ struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; ++ uint32_t req = gmc_v9_0_get_invalidate_req(vmid); ++ uint64_t flags = AMDGPU_PTE_VALID; ++ unsigned eng = ring->vm_inv_eng; ++ ++ amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); ++ pd_addr |= flags; ++ ++ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), ++ lower_32_bits(pd_addr)); ++ ++ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), ++ upper_32_bits(pd_addr)); ++ ++ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); ++ ++ return pd_addr; ++} ++ + /** + * gmc_v9_0_set_pte_pde - update the page tables using MMIO + * +@@ -490,8 +513,8 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, + + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, ++ .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb, + .set_pte_pde = gmc_v9_0_set_pte_pde, +- .get_invalidate_req = gmc_v9_0_get_invalidate_req, + .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, + .get_vm_pde = gmc_v9_0_get_vm_pde + }; +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +index 5cd8ec4..34a5548 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +@@ -1137,28 +1137,9 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint64_t pd_addr) + { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +- uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid); +- uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + +- amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); +- pd_addr |= flags; +- +- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | +- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); +- amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2); +- amdgpu_ring_write(ring, lower_32_bits(pd_addr)); +- +- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | +- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); +- amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2); +- amdgpu_ring_write(ring, upper_32_bits(pd_addr)); +- +- /* flush TLB */ +- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | +- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); +- amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); +- amdgpu_ring_write(ring, req); ++ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + + /* wait for flush */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | +@@ -1604,7 +1585,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ +- 18 + /* sdma_v4_0_ring_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v4_0_ring_emit_vm_flush */ + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, +diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h +index 26b3feac..a6b3b00 100644 +--- a/drivers/gpu/drm/amd/amdgpu/soc15.h ++++ b/drivers/gpu/drm/amd/amdgpu/soc15.h +@@ -27,6 +27,8 @@ + #include "nbio_v6_1.h" + #include "nbio_v7_0.h" + ++#define SOC15_FLUSH_GPU_TLB_NUM_WREG 3 ++ + extern const struct amd_ip_funcs soc15_common_ip_funcs; + + struct soc15_reg_golden { +diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +index 42c4296..241e730 100644 +--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +@@ -25,6 +25,7 @@ + #include <drm/drmP.h> + #include "amdgpu.h" + #include "amdgpu_uvd.h" ++#include "soc15.h" + #include "soc15d.h" + #include "soc15_common.h" + #include "mmsch_v1_0.h" +@@ -1295,32 +1296,17 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint64_t pd_addr) + { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +- uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid); +- uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + uint32_t data0, data1, mask; + +- amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); +- pd_addr |= flags; +- +- data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; +- data1 = upper_32_bits(pd_addr); +- uvd_v7_0_ring_emit_wreg(ring, data0, data1); +- +- data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; +- data1 = lower_32_bits(pd_addr); +- uvd_v7_0_ring_emit_wreg(ring, data0, data1); ++ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + ++ /* wait for reg writes */ + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + uvd_v7_0_vm_reg_wait(ring, data0, data1, mask); + +- /* flush TLB */ +- data0 = (hub->vm_inv_eng0_req + eng) << 2; +- data1 = req; +- uvd_v7_0_ring_emit_wreg(ring, data0, data1); +- + /* wait for flush */ + data0 = (hub->vm_inv_eng0_ack + eng) << 2; + data1 = 1 << vmid; +@@ -1348,31 +1334,16 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint64_t pd_addr) + { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +- uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid); +- uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + +- amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); +- pd_addr |= flags; +- +- amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); +- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); +- amdgpu_ring_write(ring, upper_32_bits(pd_addr)); +- +- amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); +- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); +- amdgpu_ring_write(ring, lower_32_bits(pd_addr)); ++ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + ++ /* wait for reg writes */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + +- /* flush TLB */ +- amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); +- amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); +- amdgpu_ring_write(ring, req); +- + /* wait for flush */ + amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); +@@ -1724,7 +1695,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = { + .emit_frame_size = + 2 + /* uvd_v7_0_ring_emit_hdp_flush */ + 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */ +- 34 + /* uvd_v7_0_ring_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 16 + /* uvd_v7_0_ring_emit_vm_flush */ + 14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */ + .emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */ + .emit_ib = uvd_v7_0_ring_emit_ib, +@@ -1751,7 +1722,7 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = { + .get_wptr = uvd_v7_0_enc_ring_get_wptr, + .set_wptr = uvd_v7_0_enc_ring_set_wptr, + .emit_frame_size = +- 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* uvd_v7_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */ + 1, /* uvd_v7_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ +diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +index 9a0d6d9..e62a24b 100755 +--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +@@ -28,6 +28,7 @@ + #include <drm/drmP.h> + #include "amdgpu.h" + #include "amdgpu_vce.h" ++#include "soc15.h" + #include "soc15d.h" + #include "soc15_common.h" + #include "mmsch_v1_0.h" +@@ -969,31 +970,16 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, + uint64_t pd_addr) + { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +- uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid); +- uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + +- amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); +- pd_addr |= flags; +- +- amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); +- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); +- amdgpu_ring_write(ring, upper_32_bits(pd_addr)); +- +- amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); +- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); +- amdgpu_ring_write(ring, lower_32_bits(pd_addr)); ++ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + ++ /* wait for reg writes */ + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + +- /* flush TLB */ +- amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); +- amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); +- amdgpu_ring_write(ring, req); +- + /* wait for flush */ + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); +@@ -1078,7 +1064,7 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { + .set_wptr = vce_v4_0_ring_set_wptr, + .parse_cs = amdgpu_vce_ring_parse_cs_vm, + .emit_frame_size = +- 17 + /* vce_v4_0_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* vce_v4_0_emit_vm_flush */ + 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ + 1, /* vce_v4_0_ring_insert_end */ + .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ +diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +index 23a318f..bdc0052 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +@@ -25,6 +25,7 @@ + #include <drm/drmP.h> + #include "amdgpu.h" + #include "amdgpu_vcn.h" ++#include "soc15.h" + #include "soc15d.h" + #include "soc15_common.h" + +@@ -874,22 +875,6 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + amdgpu_ring_write(ring, ib->length_dw); + } + +-static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, +- uint32_t data0, uint32_t data1) +-{ +- struct amdgpu_device *adev = ring->adev; +- +- amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); +- amdgpu_ring_write(ring, data0); +- amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); +- amdgpu_ring_write(ring, data1); +- amdgpu_ring_write(ring, +- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); +- amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); +-} +- + static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, + uint32_t data0, uint32_t data1, uint32_t mask) + { +@@ -914,32 +899,17 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint64_t pd_addr) + { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +- uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid); +- uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + uint32_t data0, data1, mask; + +- amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); +- pd_addr |= flags; +- +- data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2; +- data1 = upper_32_bits(pd_addr); +- vcn_v1_0_dec_vm_reg_write(ring, data0, data1); +- +- data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; +- data1 = lower_32_bits(pd_addr); +- vcn_v1_0_dec_vm_reg_write(ring, data0, data1); ++ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + ++ /* wait for register write */ + data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); + +- /* flush TLB */ +- data0 = (hub->vm_inv_eng0_req + eng) << 2; +- data1 = req; +- vcn_v1_0_dec_vm_reg_write(ring, data0, data1); +- + /* wait for flush */ + data0 = (hub->vm_inv_eng0_ack + eng) << 2; + data1 = 1 << vmid; +@@ -1048,34 +1018,17 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint64_t pd_addr) + { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; +- uint32_t req = ring->adev->gmc.gmc_funcs->get_invalidate_req(vmid); +- uint64_t flags = AMDGPU_PTE_VALID; + unsigned eng = ring->vm_inv_eng; + +- amdgpu_gmc_get_vm_pde(ring->adev, -1, &pd_addr, &flags); +- pd_addr |= flags; +- +- amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); +- amdgpu_ring_write(ring, +- (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2); +- amdgpu_ring_write(ring, upper_32_bits(pd_addr)); +- +- amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); +- amdgpu_ring_write(ring, +- (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); +- amdgpu_ring_write(ring, lower_32_bits(pd_addr)); ++ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + ++ /* wait for reg writes */ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + +- /* flush TLB */ +- amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); +- amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); +- amdgpu_ring_write(ring, req); +- + /* wait for flush */ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); +@@ -1166,7 +1119,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { + .set_wptr = vcn_v1_0_dec_ring_set_wptr, + .emit_frame_size = + 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */ +- 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 16 + /* vcn_v1_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */ +@@ -1195,7 +1148,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { + .get_wptr = vcn_v1_0_enc_ring_get_wptr, + .set_wptr = vcn_v1_0_enc_ring_set_wptr, + .emit_frame_size = +- 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */ ++ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* vcn_v1_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v1_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */ +-- +2.7.4 + |