aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch103
1 files changed, 103 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch
new file mode 100644
index 00000000..e421e4eb
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch
@@ -0,0 +1,103 @@
+From e7b93cd9b3cbdcb0999eb91a89e81846948899fd Mon Sep 17 00:00:00 2001
+From: Jesse Zhang <zhexi.zhang@amd.com>
+Date: Tue, 30 Jul 2019 19:15:42 +0800
+Subject: [PATCH 3914/4256] drm/amd/amdgpu:Fix compute ring unable to detect
+ hang.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When compute fence did not signal, compute ring cannot detect hardware hang
+because its timeout value is set to be infinite by default.
+
+In SR-IOV and passthrough mode, if user does not declare custome timeout
+value for compute ring, then use gfx ring timeout value as default. So
+that when there is a ture hardware hang, compute ring can detect it.
+
+Change-Id: I794ec0868c6c0aad407749457260ecfee0617c10
+Signed-off-by: Jesse Zhang <zhexi.zhang@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 ++++++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 13 +------------
+ 3 files changed, 13 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 6df43bc6dc5e..8f7e8911d4f3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -1024,12 +1024,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
+
+ amdgpu_device_check_block_size(adev);
+
+- ret = amdgpu_device_get_job_timeout_settings(adev);
+- if (ret) {
+- dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+- return ret;
+- }
+-
+ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+ amdgpu_direct_gma_size = min(amdgpu_direct_gma_size, 96);
+
+@@ -2737,6 +2731,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+ r = amdgpu_device_ip_early_init(adev);
+ if (r)
+ return r;
++
++ r = amdgpu_device_get_job_timeout_settings(adev);
++ if (r) {
++ dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
++ return r;
++ }
+
+ /* doorbell bar mapping and doorbell index init*/
+ amdgpu_device_doorbell_init(adev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index 0d8d292eb236..50f962a78a61 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -1376,10 +1376,15 @@ int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+ /*
+ * By default timeout for non compute jobs is 10000.
+ * And there is no timeout enforced on compute jobs.
++ * In SR-IOV or passthrough mode, timeout for compute
++ * jobs are 10000 by default.
+ */
+ adev->gfx_timeout = msecs_to_jiffies(10000);
+ adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+- adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
++ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
++ adev->compute_timeout = adev->gfx_timeout;
++ else
++ adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+ if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+ while ((timeout_setting = strsep(&input, ",")) &&
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+index a947fe4e2368..295b3f38a89d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+@@ -460,18 +460,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ timeout = adev->gfx_timeout;
+ break;
+ case AMDGPU_RING_TYPE_COMPUTE:
+- /*
+- * For non-sriov case, no timeout enforce
+- * on compute ring by default. Unless user
+- * specifies a timeout for compute ring.
+- *
+- * For sriov case, always use the timeout
+- * as gfx ring
+- */
+- if (!amdgpu_sriov_vf(ring->adev))
+- timeout = adev->compute_timeout;
+- else
+- timeout = adev->gfx_timeout;
++ timeout = adev->compute_timeout;
+ break;
+ case AMDGPU_RING_TYPE_SDMA:
+ timeout = adev->sdma_timeout;
+--
+2.17.1
+