diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch new file mode 100644 index 00000000..4a5083f8 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1458-drm-amdgpu-CP-dequeue-race-in-SW-scheduler.patch @@ -0,0 +1,178 @@ +From e3a0cb8029420303d9a01a4c92942ef91b4aee43 Mon Sep 17 00:00:00 2001 +From: Amber Lin <Amber.Lin@amd.com> +Date: Tue, 21 Jun 2016 16:49:41 -0400 +Subject: [PATCH 1458/4131] drm/amdgpu: CP dequeue race in SW scheduler + +There are multiple writers of CP_HQD_DEQUEUE_REQUEST - HW, HWS uCode and +the uCode running on the pipe the queue is mapped to. An approach for +resolving the race is implemented on HWS. This patch implements the +approach on SW scheduler. + +BUG: SWDEV-93596 + +Change-Id: Idc2e961f93e887b6d86cb726ca99a99fb06c08cb +Signed-off-by: Amber Lin <Amber.Lin@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 58 ++++++++++++++++++++++- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 58 ++++++++++++++++++++++- + 2 files changed, 114 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +index b643f1d..67b9e56 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +@@ -565,7 +565,8 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; + enum hqd_dequeue_request_type type; +- unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; ++ unsigned long flags, end_jiffies; ++ int retry; + + acquire_queue(kgd, pipe_id, queue_id); + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0); +@@ -582,8 +583,63 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, + break; + } + ++ /* Workaround: If IQ timer is active and the wait time is close to or ++ * equal to 0, dequeueing is not safe. Wait until either the wait time ++ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is ++ * cleared before continuing. Also, ensure wait times are set to at ++ * least 0x3. ++ */ ++ local_irq_save(flags); ++ preempt_disable(); ++ retry = 5000; /* wait for 500 usecs at maximum */ ++ while (true) { ++ temp = RREG32(mmCP_HQD_IQ_TIMER); ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { ++ pr_debug("HW is processing IQ\n"); ++ goto loop; ++ } ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) ++ == 3) /* SEM-rearm is safe */ ++ break; ++ /* Wait time 3 is safe for CP, but our MMIO read/write ++ * time is close to 1 microsecond, so check for 10 to ++ * leave more buffer room ++ */ ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) ++ >= 10) ++ break; ++ pr_debug("IQ timer is active\n"); ++ } else ++ break; ++ loop: ++ if (!retry) { ++ pr_err("kfd: CP HQD IQ timer status time out\n"); ++ break; ++ } ++ ndelay(100); ++ --retry; ++ } ++ retry = 1000; ++ while (true) { ++ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); ++ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) ++ break; ++ pr_debug("Dequeue request is pending\n"); ++ ++ if (!retry) { ++ pr_err("kfd: CP HQD dequeue request time out\n"); ++ break; ++ } ++ ndelay(100); ++ --retry; ++ } ++ local_irq_restore(flags); ++ preempt_enable(); ++ + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); + ++ end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +index 81ff833..e2c3457 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +@@ -563,7 +563,8 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t temp; + enum hqd_dequeue_request_type type; +- unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; ++ unsigned long flags, end_jiffies; ++ int retry; + + acquire_queue(kgd, pipe_id, queue_id); + +@@ -579,8 +580,63 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, + break; + } + ++ /* Workaround: If IQ timer is active and the wait time is close to or ++ * equal to 0, dequeueing is not safe. Wait until either the wait time ++ * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is ++ * cleared before continuing. Also, ensure wait times are set to at ++ * least 0x3. ++ */ ++ local_irq_save(flags); ++ preempt_disable(); ++ retry = 5000; /* wait for 500 usecs at maximum */ ++ while (true) { ++ temp = RREG32(mmCP_HQD_IQ_TIMER); ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) { ++ pr_debug("HW is processing IQ\n"); ++ goto loop; ++ } ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) { ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE) ++ == 3) /* SEM-rearm is safe */ ++ break; ++ /* Wait time 3 is safe for CP, but our MMIO read/write ++ * time is close to 1 microsecond, so check for 10 to ++ * leave more buffer room ++ */ ++ if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME) ++ >= 10) ++ break; ++ pr_debug("IQ timer is active\n"); ++ } else ++ break; ++ loop: ++ if (!retry) { ++ pr_err("kfd: CP HQD IQ timer status time out\n"); ++ break; ++ } ++ ndelay(100); ++ --retry; ++ } ++ retry = 1000; ++ while (true) { ++ temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); ++ if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK)) ++ break; ++ pr_debug("Dequeue request is pending\n"); ++ ++ if (!retry) { ++ pr_err("kfd: CP HQD dequeue request time out\n"); ++ break; ++ } ++ ndelay(100); ++ --retry; ++ } ++ local_irq_restore(flags); ++ preempt_enable(); ++ + WREG32(mmCP_HQD_DEQUEUE_REQUEST, type); + ++ end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(mmCP_HQD_ACTIVE); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) +-- +2.7.4 + |