diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch new file mode 100644 index 00000000..512f648a --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch @@ -0,0 +1,86 @@ +From f8258870fb3346e5920c15901858da7e88a7d29c Mon Sep 17 00:00:00 2001 +From: Le Ma <le.ma@amd.com> +Date: Wed, 27 Nov 2019 13:17:17 +0800 +Subject: [PATCH 4663/4736] drm/amdgpu: support full gpu reset workflow when + ras err_event_athub occurs + +This athub fatal error can be recovered by baco without system-level reboot, +so add a mode to use baco for the recovery. Not affect the default psp reset +situations for now. + +Change-Id: Ib17f2a39254ff6b0473a785752adfdfea79d0e0d +Signed-off-by: Le Ma <le.ma@amd.com> +Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index 2ca9d556c084..e20d324a6d90 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -4026,12 +4026,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; + bool in_ras_intr = amdgpu_ras_intr_triggered(); ++ bool use_baco = ++ (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? ++ true : false; + + /* + * Flush RAM to disk so that after reboot + * the user can read log and see why the system rebooted. + */ +- if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { ++ if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { + + DRM_WARN("Emergency reboot."); + +@@ -4042,7 +4045,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + need_full_reset = job_signaled = false; + INIT_LIST_HEAD(&device_list); + +- dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); ++ dev_info(adev->dev, "GPU %s begin!\n", ++ (in_ras_intr && !use_baco) ? "jobs stop":"reset"); + + cancel_delayed_work_sync(&adev->delayed_init_work); + +@@ -4109,7 +4113,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + amdgpu_unregister_gpu_instance(tmp_adev); + + /* disable ras on ALL IPs */ +- if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) ++ if (!(in_ras_intr && !use_baco) && ++ amdgpu_device_ip_need_full_reset(tmp_adev)) + amdgpu_ras_suspend(tmp_adev); + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { +@@ -4120,13 +4125,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + + drm_sched_stop(&ring->sched, job ? &job->base : NULL); + +- if (in_ras_intr) ++ if (in_ras_intr && !use_baco) + amdgpu_job_stop_all_jobs_on_sched(&ring->sched); + } + } + + +- if (in_ras_intr) ++ if (in_ras_intr && !use_baco) + goto skip_sched_resume; + + /* +@@ -4220,7 +4225,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + skip_sched_resume: + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + /*unlock kfd: SRIOV would do it separately */ +- if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) ++ if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) + amdgpu_amdkfd_post_reset(tmp_adev); + amdgpu_device_unlock_adev(tmp_adev); + } +-- +2.17.1 + |