diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch new file mode 100644 index 00000000..b1d8a50e --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch @@ -0,0 +1,116 @@ +From cc09164e9f4f8e3a48ccb6d820f953e38c43b372 Mon Sep 17 00:00:00 2001 +From: Xiaojie Yuan <xiaojie.yuan@amd.com> +Date: Fri, 16 Aug 2019 16:13:28 +0800 +Subject: [PATCH 3608/4256] drm/amdgpu: add dummy read for some GCVM status + registers + +The GRBM register interface is now capable of bursting 1 cycle per +register wr->wr, wr->rd much faster than previous muticycle per +transaction done interface. This has caused a problem where status +registers requiring HW to update have a 1 cycle delay, due to the +register update having to go through GRBM. + +SW may operate on an incorrect value if they write a register and +immediately check the corresponding status register. + +Registers requiring HW to clear or set fields may be delayed by 1 cycle. +For example, + +1. write VM_INVALIDATE_ENG0_REQ mask = 5a +2. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a + a. HW will reset VM_INVALIDATE_ENG0_ACK = 0 until invalidation is complete +3. write VM_INVALIDATE_ENG0_REQ mask = 5a +4. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a + a. First read of VM_INVALIDATE_ENG0_ACK = 5a instead of 0 + b. Second read of VM_INVALIDATE_ENG0_ACK = 0 because + the remote GRBM h/w register takes one extra cycle to be cleared + c. In this case, SW will see a false ACK if they exit on first read + +Affected registers (only GC variant) | Recommended Dummy Read +--------------------------------------+---------------------------- +VM_INVALIDATE_ENG*_ACK | VM_INVALIDATE_ENG*_REQ +VM_L2_STATUS | VM_L2_STATUS +VM_L2_PROTECTION_FAULT_STATUS | VM_L2_PROTECTION_FAULT_STATUS +VM_L2_PROTECTION_FAULT_ADDR_HI/LO32 | VM_L2_PROTECTION_FAULT_ADDR_HI/LO32 +VM_L2_IH_LOG_BUSY | VM_L2_IH_LOG_BUSY +MC_VM_L2_PERFCOUNTER_HI/LO | MC_VM_L2_PERFCOUNTER_HI/LO +ATC_L2_PERFCOUNTER_HI/LO | ATC_L2_PERFCOUNTER_HI/LO +ATC_L2_PERFCOUNTER2_HI/LO | ATC_L2_PERFCOUNTER2_HI/LO + +Signed-off-by: Xiaojie Yuan <xiaojie.yuan@amd.com> +Reviewed-by: Jack Xiao <Jack.Xiao@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 15 +++++++++++++++ + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 16 ++++++++++++++++ + 2 files changed, 31 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +index d83d8a6a1fc0..56f76a1f32ee 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +@@ -135,6 +135,14 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, + addr |= ((u64)entry->src_data[1] & 0xf) << 44; + + if (!amdgpu_sriov_vf(adev)) { ++ /* ++ * Issue a dummy read to wait for the status register to ++ * be updated to avoid reading an incorrect value due to ++ * the new fast GRBM interface. ++ */ ++ if (entry->vmid_src == AMDGPU_GFXHUB_0) ++ RREG32(hub->vm_l2_pro_fault_status); ++ + status = RREG32(hub->vm_l2_pro_fault_status); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + } +@@ -228,6 +236,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, + + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + ++ /* ++ * Issue a dummy read to wait for the ACK register to be cleared ++ * to avoid a false ACK due to the new fast GRBM interface. ++ */ ++ if (vmhub == AMDGPU_GFXHUB_0) ++ RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); ++ + /* Wait for ACK with a delay.*/ + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); +diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +index 57f0498152be..a22fbb8fe1a5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +@@ -352,6 +352,14 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, + + /* If it's the first fault for this address, process it normally */ + if (!amdgpu_sriov_vf(adev)) { ++ /* ++ * Issue a dummy read to wait for the status register to ++ * be updated to avoid reading an incorrect value due to ++ * the new fast GRBM interface. ++ */ ++ if (entry->vmid_src == AMDGPU_GFXHUB_0) ++ RREG32(hub->vm_l2_pro_fault_status); ++ + status = RREG32(hub->vm_l2_pro_fault_status); + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + } +@@ -480,6 +488,14 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + + spin_lock(&adev->gmc.invalidate_lock); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); ++ ++ /* ++ * Issue a dummy read to wait for the ACK register to be cleared ++ * to avoid a false ACK due to the new fast GRBM interface. ++ */ ++ if (vmhub == AMDGPU_GFXHUB_0) ++ RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); ++ + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + if (tmp & (1 << vmid)) +-- +2.17.1 + |