1 files changed, 86 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch
new file mode 100644
index 00000000..512f648a
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch
@@ -0,0 +1,86 @@
+From f8258870fb3346e5920c15901858da7e88a7d29c Mon Sep 17 00:00:00 2001
+From: Le Ma <le.ma@amd.com>
+Date: Wed, 27 Nov 2019 13:17:17 +0800
+Subject: [PATCH 4663/4736] drm/amdgpu: support full gpu reset workflow when
+ ras err_event_athub occurs
+
+This athub fatal error can be recovered by baco without system-level reboot,
+so add a mode to use baco for the recovery. Not affect the default psp reset
+situations for now.
+
+Change-Id: Ib17f2a39254ff6b0473a785752adfdfea79d0e0d
+Signed-off-by: Le Ma <le.ma@amd.com>
+Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 2ca9d556c084..e20d324a6d90 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -4026,12 +4026,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ 	struct amdgpu_device *tmp_adev = NULL;
+ 	int i, r = 0;
+ 	bool in_ras_intr = amdgpu_ras_intr_triggered();
++	bool use_baco =
++		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
++		true : false;
+ 
+ 	/*
+ 	 * Flush RAM to disk so that after reboot
+ 	 * the user can read log and see why the system rebooted.
+ 	 */
+-	if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
++	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
+ 
+ 		DRM_WARN("Emergency reboot.");
+ 
+@@ -4042,7 +4045,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ 	need_full_reset = job_signaled = false;
+ 	INIT_LIST_HEAD(&device_list);
+ 
+-	dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset");
++	dev_info(adev->dev, "GPU %s begin!\n",
++		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
+ 
+ 	cancel_delayed_work_sync(&adev->delayed_init_work);
+ 
+@@ -4109,7 +4113,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ 		amdgpu_unregister_gpu_instance(tmp_adev);
+ 
+ 		/* disable ras on ALL IPs */
+-		if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
++		if (!(in_ras_intr && !use_baco) &&
++		      amdgpu_device_ip_need_full_reset(tmp_adev))
+ 			amdgpu_ras_suspend(tmp_adev);
+ 
+ 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+@@ -4120,13 +4125,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ 
+ 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
+ 
+-			if (in_ras_intr)
++			if (in_ras_intr && !use_baco)
+ 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
+ 		}
+ 	}
+ 
+ 
+-	if (in_ras_intr)
++	if (in_ras_intr && !use_baco)
+ 		goto skip_sched_resume;
+ 
+ 	/*
+@@ -4220,7 +4225,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ skip_sched_resume:
+ 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+ 		/*unlock kfd: SRIOV would do it separately */
+-		if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
++		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
+ 	                amdgpu_amdkfd_post_reset(tmp_adev);
+ 		amdgpu_device_unlock_adev(tmp_adev);
+ 	}
+-- 
+2.17.1
+