diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3204-drm-amdgpu-implement-gmc_v8_0_emit_flush_gpu_tlb.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3204-drm-amdgpu-implement-gmc_v8_0_emit_flush_gpu_tlb.patch | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3204-drm-amdgpu-implement-gmc_v8_0_emit_flush_gpu_tlb.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3204-drm-amdgpu-implement-gmc_v8_0_emit_flush_gpu_tlb.patch new file mode 100644 index 00000000..e2d62b5b --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3204-drm-amdgpu-implement-gmc_v8_0_emit_flush_gpu_tlb.patch @@ -0,0 +1,221 @@ +From 2821f18728fbcf149f1418ef4973785d23e845a5 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> +Date: Fri, 12 Jan 2018 19:14:42 +0100 +Subject: [PATCH 3204/4131] drm/amdgpu: implement gmc_v8_0_emit_flush_gpu_tlb +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Unify tlb flushing for gmc v8. + +v2: handle UVD v6 as well + +Signed-off-by: Christian König <christian.koenig@amd.com> +Acked-by: Chunming Zhou <david1.zhou@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 27 +++------------------------ + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 19 +++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 17 ++--------------- + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 15 +-------------- + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 21 +-------------------- + drivers/gpu/drm/amd/amdgpu/vi.h | 2 ++ + 6 files changed, 28 insertions(+), 73 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +index c22b921..4c6f5fc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +@@ -6336,28 +6336,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + { + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + +- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | +- WRITE_DATA_DST_SEL(0)) | +- WR_CONFIRM); +- if (vmid < 8) { +- amdgpu_ring_write(ring, +- (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); +- } else { +- amdgpu_ring_write(ring, +- (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); +- } +- amdgpu_ring_write(ring, 0); +- amdgpu_ring_write(ring, pd_addr >> 12); +- +- /* bits 0-15 are the VM contexts0-15 */ +- /* invalidate the cache */ +- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); +- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | +- WRITE_DATA_DST_SEL(0))); +- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); +- amdgpu_ring_write(ring, 0); +- amdgpu_ring_write(ring, 1 << vmid); ++ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + + /* wait for the invalidate to complete */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); +@@ -6889,7 +6868,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { + .emit_frame_size = /* maximum 215dw if count 16 IBs in */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ +- 19 + /* VM_FLUSH */ ++ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, +@@ -6936,7 +6915,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { + 7 + /* gfx_v8_0_ring_emit_hdp_flush */ + 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ +- 17 + /* gfx_v8_0_ring_emit_vm_flush */ ++ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ + 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ + .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_ib = gfx_v8_0_ring_emit_ib_compute, +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +index 67b1dd2..7d1df3a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +@@ -617,6 +617,24 @@ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + } + ++static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, ++ unsigned vmid, unsigned pasid, ++ uint64_t pd_addr) ++{ ++ uint32_t reg; ++ ++ if (vmid < 8) ++ reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; ++ else ++ reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8; ++ amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12); ++ ++ /* bits 0-15 are the VM contexts0-15 */ ++ amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid); ++ ++ return pd_addr; ++} ++ + /** + * gmc_v8_0_set_pte_pde - update the page tables using MMIO + * +@@ -1676,6 +1694,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { + + static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { + .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb, ++ .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb, + .set_pte_pde = gmc_v8_0_set_pte_pde, + .set_prt = gmc_v8_0_set_prt, + .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +index 89f4b99..439cff2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +@@ -862,20 +862,7 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, unsigned pasid, + uint64_t pd_addr) + { +- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | +- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); +- if (vmid < 8) { +- amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); +- } else { +- amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); +- } +- amdgpu_ring_write(ring, pd_addr >> 12); +- +- /* flush TLB */ +- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | +- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); +- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); +- amdgpu_ring_write(ring, 1 << vmid); ++ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + + /* wait for flush */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | +@@ -1215,7 +1202,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { + 6 + /* sdma_v2_4_ring_emit_hdp_flush */ + 3 + /* sdma_v2_4_ring_emit_hdp_invalidate */ + 6 + /* sdma_v2_4_ring_emit_pipeline_sync */ +- 12 + /* sdma_v2_4_ring_emit_vm_flush */ ++ VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */ + 10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */ + .emit_ib = sdma_v2_4_ring_emit_ib, +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +index 78aca26..2b2677a 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +@@ -1128,20 +1128,7 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, unsigned pasid, + uint64_t pd_addr) + { +- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | +- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); +- if (vmid < 8) { +- amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); +- } else { +- amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); +- } +- amdgpu_ring_write(ring, pd_addr >> 12); +- +- /* flush TLB */ +- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | +- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); +- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); +- amdgpu_ring_write(ring, 1 << vmid); ++ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + + /* wait for flush */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | +diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +index b3296be..6f03eba 100644 +--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +@@ -1074,26 +1074,7 @@ static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, unsigned pasid, + uint64_t pd_addr) + { +- uint32_t reg; +- +- if (vmid < 8) +- reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid; +- else +- reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8; +- +- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); +- amdgpu_ring_write(ring, reg << 2); +- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); +- amdgpu_ring_write(ring, pd_addr >> 12); +- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); +- amdgpu_ring_write(ring, 0x8); +- +- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); +- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); +- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0)); +- amdgpu_ring_write(ring, 1 << vmid); +- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0)); +- amdgpu_ring_write(ring, 0x8); ++ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); + + amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0)); + amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); +diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h +index 575d7ae..6cc2bee 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vi.h ++++ b/drivers/gpu/drm/amd/amdgpu/vi.h +@@ -24,6 +24,8 @@ + #ifndef __VI_H__ + #define __VI_H__ + ++#define VI_FLUSH_GPU_TLB_NUM_WREG 2 ++ + void vi_srbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid); + int vi_set_ip_blocks(struct amdgpu_device *adev); +-- +2.7.4 + |