aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch201
1 files changed, 201 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch
new file mode 100644
index 00000000..225b7964
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4364-drm-amdgpu-add-dummy-read-by-engines-for-some-GCVM-s.patch
@@ -0,0 +1,201 @@
+From 9ec6a9973b1007b2aba9d1b28088629819859837 Mon Sep 17 00:00:00 2001
+From: changzhu <Changfeng.Zhu@amd.com>
+Date: Thu, 10 Oct 2019 11:02:33 +0800
+Subject: [PATCH 4364/4736] drm/amdgpu: add dummy read by engines for some GCVM
+ status registers in gfx10
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The GRBM register interface is now capable of bursting 1 cycle per
+register wr->wr, wr->rd much faster than previous muticycle per
+transaction done interface. This has caused a problem where
+status registers requiring HW to update have a 1 cycle delay, due
+to the register update having to go through GRBM.
+
+For cp ucode, it has realized dummy read in cp firmware.It covers
+the use of WAIT_REG_MEM operation 1 case only.So it needs to call
+gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
+update firmware in case firmware is too old to have function to realize
+dummy read in cp firmware.
+
+For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
+moved to gfxhub in gfx10. So it needs to add dummy read in driver
+between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
+
+Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
+Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 +
+ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 48 +++++++++++++++++++++++++
+ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 ++---
+ drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 13 ++++++-
+ 4 files changed, 64 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+index 459aa9059542..a74ecd449775 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+@@ -267,6 +267,7 @@ struct amdgpu_gfx {
+ uint32_t mec2_feature_version;
+ bool mec_fw_write_wait;
+ bool me_fw_write_wait;
++ bool cp_fw_write_wait;
+ struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
+ unsigned num_gfx_rings;
+ struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+index 17a5cbfd0024..c7a6f98bf6b8 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
+ kfree(adev->gfx.rlc.register_list_format);
+ }
+
++static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
++{
++ adev->gfx.cp_fw_write_wait = false;
++
++ switch (adev->asic_type) {
++ case CHIP_NAVI10:
++ case CHIP_NAVI12:
++ case CHIP_NAVI14:
++ if ((adev->gfx.me_fw_version >= 0x00000046) &&
++ (adev->gfx.me_feature_version >= 27) &&
++ (adev->gfx.pfp_fw_version >= 0x00000068) &&
++ (adev->gfx.pfp_feature_version >= 27) &&
++ (adev->gfx.mec_fw_version >= 0x0000005b) &&
++ (adev->gfx.mec_feature_version >= 27))
++ adev->gfx.cp_fw_write_wait = true;
++ break;
++ default:
++ break;
++ }
++
++ if (adev->gfx.cp_fw_write_wait == false)
++ DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
++ GRBM requires 1-cycle delay in cp firmware\n");
++}
++
++
+ static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
+ {
+ const struct rlc_firmware_header_v2_1 *rlc_hdr;
+@@ -829,6 +855,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
+ }
+ }
+
++ gfx_v10_0_check_fw_write_wait(adev);
+ out:
+ if (err) {
+ dev_err(adev->dev,
+@@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+ }
+
++static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
++ uint32_t reg0, uint32_t reg1,
++ uint32_t ref, uint32_t mask)
++{
++ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
++ struct amdgpu_device *adev = ring->adev;
++ bool fw_version_ok = false;
++
++ fw_version_ok = adev->gfx.cp_fw_write_wait;
++
++ if (fw_version_ok)
++ gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
++ ref, mask, 0x20);
++ else
++ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
++ ref, mask);
++}
++
+ static void
+ gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
+ uint32_t me, uint32_t pipe,
+@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
+ .emit_tmz = gfx_v10_0_ring_emit_tmz,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
++ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ };
+
+ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
+@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
+ .pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
++ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ };
+
+ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
+@@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
+ .emit_rreg = gfx_v10_0_ring_emit_rreg,
+ .emit_wreg = gfx_v10_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
++ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
+ };
+
+ static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+index 3b00bce14cfb..af2615ba52aa 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
+ upper_32_bits(pd_addr));
+
+- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
+-
+- /* wait for the invalidate to complete */
+- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+- 1 << vmid, 1 << vmid);
++ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
++ hub->vm_inv_eng0_ack + eng,
++ req, 1 << vmid);
+
+ return pd_addr;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+index 3460c00f3eaa..ec47542e21b0 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
+ }
+
++static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
++ uint32_t reg0, uint32_t reg1,
++ uint32_t ref, uint32_t mask)
++{
++ amdgpu_ring_emit_wreg(ring, reg0, ref);
++ /* wait for a cycle to reset vm_inv_eng*_ack */
++ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
++ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
++}
++
+ static int sdma_v5_0_early_init(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
+ 6 + /* sdma_v5_0_ring_emit_pipeline_sync */
+ /* sdma_v5_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
++ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
+ 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
+ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
+ .emit_ib = sdma_v5_0_ring_emit_ib,
+@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
+ .pad_ib = sdma_v5_0_ring_pad_ib,
+ .emit_wreg = sdma_v5_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
++ .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
+ .init_cond_exec = sdma_v5_0_ring_init_cond_exec,
+ .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
+ .preempt_ib = sdma_v5_0_ring_preempt_ib,
+--
+2.17.1
+