diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch new file mode 100644 index 00000000..e421e4eb --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3914-drm-amd-amdgpu-Fix-compute-ring-unable-to-detect-han.patch @@ -0,0 +1,103 @@ +From e7b93cd9b3cbdcb0999eb91a89e81846948899fd Mon Sep 17 00:00:00 2001 +From: Jesse Zhang <zhexi.zhang@amd.com> +Date: Tue, 30 Jul 2019 19:15:42 +0800 +Subject: [PATCH 3914/4256] drm/amd/amdgpu:Fix compute ring unable to detect + hang. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When compute fence did not signal, compute ring cannot detect hardware hang +because its timeout value is set to be infinite by default. + +In SR-IOV and passthrough mode, if user does not declare custome timeout +value for compute ring, then use gfx ring timeout value as default. So +that when there is a ture hardware hang, compute ring can detect it. + +Change-Id: I794ec0868c6c0aad407749457260ecfee0617c10 +Signed-off-by: Jesse Zhang <zhexi.zhang@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++++++------ + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 ++++++- + drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 13 +------------ + 3 files changed, 13 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 6df43bc6dc5e..8f7e8911d4f3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -1024,12 +1024,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) + + amdgpu_device_check_block_size(adev); + +- ret = amdgpu_device_get_job_timeout_settings(adev); +- if (ret) { +- dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); +- return ret; +- } +- + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + amdgpu_direct_gma_size = min(amdgpu_direct_gma_size, 96); + +@@ -2737,6 +2731,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, + r = amdgpu_device_ip_early_init(adev); + if (r) + return r; ++ ++ r = amdgpu_device_get_job_timeout_settings(adev); ++ if (r) { ++ dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); ++ return r; ++ } + + /* doorbell bar mapping and doorbell index init*/ + amdgpu_device_doorbell_init(adev); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 0d8d292eb236..50f962a78a61 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -1376,10 +1376,15 @@ int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) + /* + * By default timeout for non compute jobs is 10000. + * And there is no timeout enforced on compute jobs. ++ * In SR-IOV or passthrough mode, timeout for compute ++ * jobs are 10000 by default. + */ + adev->gfx_timeout = msecs_to_jiffies(10000); + adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; +- adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; ++ if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) ++ adev->compute_timeout = adev->gfx_timeout; ++ else ++ adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + + if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) { + while ((timeout_setting = strsep(&input, ",")) && +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +index a947fe4e2368..295b3f38a89d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +@@ -460,18 +460,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: +- /* +- * For non-sriov case, no timeout enforce +- * on compute ring by default. Unless user +- * specifies a timeout for compute ring. +- * +- * For sriov case, always use the timeout +- * as gfx ring +- */ +- if (!amdgpu_sriov_vf(ring->adev)) +- timeout = adev->compute_timeout; +- else +- timeout = adev->gfx_timeout; ++ timeout = adev->compute_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; +-- +2.17.1 + |