1 files changed, 178 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch
new file mode 100644
index 00000000..4a5083f8
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch
@@ -0,0 +1,178 @@
+From e3a0cb8029420303d9a01a4c92942ef91b4aee43 Mon Sep 17 00:00:00 2001
+From: Amber Lin <Amber.Lin@amd.com>
+Date: Tue, 21 Jun 2016 16:49:41 -0400
+Subject: [PATCH 1458/4131] drm/amdgpu: CP dequeue race in SW scheduler
+
+There are multiple writers of CP_HQD_DEQUEUE_REQUEST - HW, HWS uCode and
+the uCode running on the pipe the queue is mapped to. An approach for
+resolving the race is implemented on HWS. This patch implements the
+approach on SW scheduler.
+
+BUG: SWDEV-93596
+
+Change-Id: Idc2e961f93e887b6d86cb726ca99a99fb06c08cb
+Signed-off-by: Amber Lin <Amber.Lin@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 58 ++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 58 ++++++++++++++++++++++-
+ 2 files changed, 114 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+index b643f1d..67b9e56 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+@@ -565,7 +565,8 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd,
+ 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ 	uint32_t temp;
+ 	enum hqd_dequeue_request_type type;
+-	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
++	unsigned long flags, end_jiffies;
++	int retry;
+ 
+ 	acquire_queue(kgd, pipe_id, queue_id);
+ 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+@@ -582,8 +583,63 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd,
+ 		break;
+ 	}
+ 
++	/* Workaround: If IQ timer is active and the wait time is close to or
++	 * equal to 0, dequeueing is not safe. Wait until either the wait time
++	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
++	 * cleared before continuing. Also, ensure wait times are set to at
++	 * least 0x3.
++	 */
++	local_irq_save(flags);
++	preempt_disable();
++	retry = 5000; /* wait for 500 usecs at maximum */
++	while (true) {
++		temp = RREG32(mmCP_HQD_IQ_TIMER);
++		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
++			pr_debug("HW is processing IQ\n");
++			goto loop;
++		}
++		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
++			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
++					== 3) /* SEM-rearm is safe */
++				break;
++			/* Wait time 3 is safe for CP, but our MMIO read/write
++			 * time is close to 1 microsecond, so check for 10 to
++			 * leave more buffer room
++			 */
++			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
++					>= 10)
++				break;
++			pr_debug("IQ timer is active\n");
++		} else
++			break;
++	loop:
++		if (!retry) {
++			pr_err("kfd: CP HQD IQ timer status time out\n");
++			break;
++		}
++		ndelay(100);
++		--retry;
++	}
++	retry = 1000;
++	while (true) {
++		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
++		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
++			break;
++		pr_debug("Dequeue request is pending\n");
++
++		if (!retry) {
++			pr_err("kfd: CP HQD dequeue request time out\n");
++			break;
++		}
++		ndelay(100);
++		--retry;
++	}
++	local_irq_restore(flags);
++	preempt_enable();
++
+ 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+ 
++	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ 	while (true) {
+ 		temp = RREG32(mmCP_HQD_ACTIVE);
+ 		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+index 81ff833..e2c3457 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+@@ -563,7 +563,8 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd,
+ 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ 	uint32_t temp;
+ 	enum hqd_dequeue_request_type type;
+-	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
++	unsigned long flags, end_jiffies;
++	int retry;
+ 
+ 	acquire_queue(kgd, pipe_id, queue_id);
+ 
+@@ -579,8 +580,63 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd,
+ 		break;
+ 	}
+ 
++	/* Workaround: If IQ timer is active and the wait time is close to or
++	 * equal to 0, dequeueing is not safe. Wait until either the wait time
++	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
++	 * cleared before continuing. Also, ensure wait times are set to at
++	 * least 0x3.
++	 */
++	local_irq_save(flags);
++	preempt_disable();
++	retry = 5000; /* wait for 500 usecs at maximum */
++	while (true) {
++		temp = RREG32(mmCP_HQD_IQ_TIMER);
++		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
++			pr_debug("HW is processing IQ\n");
++			goto loop;
++		}
++		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
++			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
++					== 3) /* SEM-rearm is safe */
++				break;
++			/* Wait time 3 is safe for CP, but our MMIO read/write
++			 * time is close to 1 microsecond, so check for 10 to
++			 * leave more buffer room
++			 */
++			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
++					>= 10)
++				break;
++			pr_debug("IQ timer is active\n");
++		} else
++			break;
++	loop:
++		if (!retry) {
++			pr_err("kfd: CP HQD IQ timer status time out\n");
++			break;
++		}
++		ndelay(100);
++		--retry;
++	}
++	retry = 1000;
++	while (true) {
++		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
++		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
++			break;
++		pr_debug("Dequeue request is pending\n");
++
++		if (!retry) {
++			pr_err("kfd: CP HQD dequeue request time out\n");
++			break;
++		}
++		ndelay(100);
++		--retry;
++	}
++	local_irq_restore(flags);
++	preempt_enable();
++
+ 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+ 
++	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ 	while (true) {
+ 		temp = RREG32(mmCP_HQD_ACTIVE);
+ 		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+-- 
+2.7.4
+