diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch new file mode 100644 index 00000000..225b7964 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch @@ -0,0 +1,201 @@ +From 9ec6a9973b1007b2aba9d1b28088629819859837 Mon Sep 17 00:00:00 2001 +From: changzhu <Changfeng.Zhu@amd.com> +Date: Thu, 10 Oct 2019 11:02:33 +0800 +Subject: [PATCH 4364/4736] drm/amdgpu: add dummy read by engines for some GCVM + status registers in gfx10 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The GRBM register interface is now capable of bursting 1 cycle per +register wr->wr, wr->rd much faster than previous muticycle per +transaction done interface. This has caused a problem where +status registers requiring HW to update have a 1 cycle delay, due +to the register update having to go through GRBM. + +For cp ucode, it has realized dummy read in cp firmware.It covers +the use of WAIT_REG_MEM operation 1 case only.So it needs to call +gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to +update firmware in case firmware is too old to have function to realize +dummy read in cp firmware. + +For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is +moved to gfxhub in gfx10. So it needs to add dummy read in driver +between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0. + +Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3 +Signed-off-by: changzhu <Changfeng.Zhu@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + + drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 48 +++++++++++++++++++++++++ + drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 ++--- + drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 13 ++++++- + 4 files changed, 64 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +index 459aa9059542..a74ecd449775 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +@@ -267,6 +267,7 @@ struct amdgpu_gfx { + uint32_t mec2_feature_version; + bool mec_fw_write_wait; + bool me_fw_write_wait; ++ bool cp_fw_write_wait; + struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; + unsigned num_gfx_rings; + struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +index 17a5cbfd0024..c7a6f98bf6b8 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) + kfree(adev->gfx.rlc.register_list_format); + } + ++static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) ++{ ++ adev->gfx.cp_fw_write_wait = false; ++ ++ switch (adev->asic_type) { ++ case CHIP_NAVI10: ++ case CHIP_NAVI12: ++ case CHIP_NAVI14: ++ if ((adev->gfx.me_fw_version >= 0x00000046) && ++ (adev->gfx.me_feature_version >= 27) && ++ (adev->gfx.pfp_fw_version >= 0x00000068) && ++ (adev->gfx.pfp_feature_version >= 27) && ++ (adev->gfx.mec_fw_version >= 0x0000005b) && ++ (adev->gfx.mec_feature_version >= 27)) ++ adev->gfx.cp_fw_write_wait = true; ++ break; ++ default: ++ break; ++ } ++ ++ if (adev->gfx.cp_fw_write_wait == false) ++ DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ ++ GRBM requires 1-cycle delay in cp firmware\n"); ++} ++ ++ + static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) + { + const struct rlc_firmware_header_v2_1 *rlc_hdr; +@@ -829,6 +855,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) + } + } + ++ gfx_v10_0_check_fw_write_wait(adev); + out: + if (err) { + dev_err(adev->dev, +@@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); + } + ++static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ++ uint32_t reg0, uint32_t reg1, ++ uint32_t ref, uint32_t mask) ++{ ++ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); ++ struct amdgpu_device *adev = ring->adev; ++ bool fw_version_ok = false; ++ ++ fw_version_ok = adev->gfx.cp_fw_write_wait; ++ ++ if (fw_version_ok) ++ gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, ++ ref, mask, 0x20); ++ else ++ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, ++ ref, mask); ++} ++ + static void + gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, + uint32_t me, uint32_t pipe, +@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { + .emit_tmz = gfx_v10_0_ring_emit_tmz, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, ++ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + }; + + static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { +@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, ++ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + }; + + static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { +@@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { + .emit_rreg = gfx_v10_0_ring_emit_rreg, + .emit_wreg = gfx_v10_0_ring_emit_wreg, + .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, ++ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, + }; + + static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +index 3b00bce14cfb..af2615ba52aa 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), + upper_32_bits(pd_addr)); + +- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); +- +- /* wait for the invalidate to complete */ +- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, +- 1 << vmid, 1 << vmid); ++ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, ++ hub->vm_inv_eng0_ack + eng, ++ req, 1 << vmid); + + return pd_addr; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +index 3460c00f3eaa..ec47542e21b0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); + } + ++static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ++ uint32_t reg0, uint32_t reg1, ++ uint32_t ref, uint32_t mask) ++{ ++ amdgpu_ring_emit_wreg(ring, reg0, ref); ++ /* wait for a cycle to reset vm_inv_eng*_ack */ ++ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); ++ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); ++} ++ + static int sdma_v5_0_early_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { + 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ + /* sdma_v5_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + ++ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ + .emit_ib = sdma_v5_0_ring_emit_ib, +@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { + .pad_ib = sdma_v5_0_ring_pad_ib, + .emit_wreg = sdma_v5_0_ring_emit_wreg, + .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, ++ .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, + .init_cond_exec = sdma_v5_0_ring_init_cond_exec, + .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, + .preempt_ib = sdma_v5_0_ring_preempt_ib, +-- +2.17.1 + |