1 files changed, 116 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch
new file mode 100644
index 00000000..b1d8a50e
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3608-drm-amdgpu-add-dummy-read-for-some-GCVM-status-regis.patch
@@ -0,0 +1,116 @@
+From cc09164e9f4f8e3a48ccb6d820f953e38c43b372 Mon Sep 17 00:00:00 2001
+From: Xiaojie Yuan <xiaojie.yuan@amd.com>
+Date: Fri, 16 Aug 2019 16:13:28 +0800
+Subject: [PATCH 3608/4256] drm/amdgpu: add dummy read for some GCVM status
+ registers
+
+The GRBM register interface is now capable of bursting 1 cycle per
+register wr->wr, wr->rd much faster than previous muticycle per
+transaction done interface.  This has caused a problem where status
+registers requiring HW to update have a 1 cycle delay, due to the
+register update having to go through GRBM.
+
+SW may operate on an incorrect value if they write a register and
+immediately check the corresponding status register.
+
+Registers requiring HW to clear or set fields may be delayed by 1 cycle.
+For example,
+
+1. write VM_INVALIDATE_ENG0_REQ mask = 5a
+2. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a
+    a. HW will reset VM_INVALIDATE_ENG0_ACK = 0 until invalidation is complete
+3. write VM_INVALIDATE_ENG0_REQ mask = 5a
+4. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a
+    a. First read of VM_INVALIDATE_ENG0_ACK = 5a instead of 0
+    b. Second read of VM_INVALIDATE_ENG0_ACK = 0 because
+       the remote GRBM h/w register takes one extra cycle to be cleared
+    c. In this case, SW will see a false ACK if they exit on first read
+
+Affected registers (only GC variant)  |  Recommended Dummy Read
+--------------------------------------+----------------------------
+VM_INVALIDATE_ENG*_ACK                |  VM_INVALIDATE_ENG*_REQ
+VM_L2_STATUS                          |  VM_L2_STATUS
+VM_L2_PROTECTION_FAULT_STATUS         |  VM_L2_PROTECTION_FAULT_STATUS
+VM_L2_PROTECTION_FAULT_ADDR_HI/LO32   |  VM_L2_PROTECTION_FAULT_ADDR_HI/LO32
+VM_L2_IH_LOG_BUSY                     |  VM_L2_IH_LOG_BUSY
+MC_VM_L2_PERFCOUNTER_HI/LO            |  MC_VM_L2_PERFCOUNTER_HI/LO
+ATC_L2_PERFCOUNTER_HI/LO              |  ATC_L2_PERFCOUNTER_HI/LO
+ATC_L2_PERFCOUNTER2_HI/LO             |  ATC_L2_PERFCOUNTER2_HI/LO
+
+Signed-off-by: Xiaojie Yuan <xiaojie.yuan@amd.com>
+Reviewed-by: Jack Xiao <Jack.Xiao@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 15 +++++++++++++++
+ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 16 ++++++++++++++++
+ 2 files changed, 31 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+index d83d8a6a1fc0..56f76a1f32ee 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+@@ -135,6 +135,14 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
+ 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
+ 
+ 	if (!amdgpu_sriov_vf(adev)) {
++		/*
++		 * Issue a dummy read to wait for the status register to
++		 * be updated to avoid reading an incorrect value due to
++		 * the new fast GRBM interface.
++		 */
++		if (entry->vmid_src == AMDGPU_GFXHUB_0)
++			RREG32(hub->vm_l2_pro_fault_status);
++
+ 		status = RREG32(hub->vm_l2_pro_fault_status);
+ 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ 	}
+@@ -228,6 +236,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
+ 
+ 	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+ 
++	/*
++	 * Issue a dummy read to wait for the ACK register to be cleared
++	 * to avoid a false ACK due to the new fast GRBM interface.
++	 */
++	if (vmhub == AMDGPU_GFXHUB_0)
++		RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
++
+ 	/* Wait for ACK with a delay.*/
+ 	for (i = 0; i < adev->usec_timeout; i++) {
+ 		tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+index 57f0498152be..a22fbb8fe1a5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+@@ -352,6 +352,14 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
+ 
+ 	/* If it's the first fault for this address, process it normally */
+ 	if (!amdgpu_sriov_vf(adev)) {
++		/*
++		 * Issue a dummy read to wait for the status register to
++		 * be updated to avoid reading an incorrect value due to
++		 * the new fast GRBM interface.
++		 */
++		if (entry->vmid_src == AMDGPU_GFXHUB_0)
++			RREG32(hub->vm_l2_pro_fault_status);
++
+ 		status = RREG32(hub->vm_l2_pro_fault_status);
+ 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+ 	}
+@@ -480,6 +488,14 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ 
+         spin_lock(&adev->gmc.invalidate_lock);
+         WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
++
++        /*
++         * Issue a dummy read to wait for the ACK register to be cleared
++         * to avoid a false ACK due to the new fast GRBM interface.
++         */
++        if (vmhub == AMDGPU_GFXHUB_0)
++                RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
++
+         for (j = 0; j < adev->usec_timeout; j++) {
+                 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+                 if (tmp & (1 << vmid))
+-- 
+2.17.1
+