aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4663-drm-amdgpu-support-full-gpu-reset-workflow-when-ras-.patch
blob: 512f648a33b4b239a369ae3cd3bcc23abcc4b344 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
From f8258870fb3346e5920c15901858da7e88a7d29c Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Wed, 27 Nov 2019 13:17:17 +0800
Subject: [PATCH 4663/4736] drm/amdgpu: support full gpu reset workflow when
 ras err_event_athub occurs

This athub fatal error can be recovered by baco without system-level reboot,
so add a mode to use baco for the recovery. Not affect the default psp reset
situations for now.

Change-Id: Ib17f2a39254ff6b0473a785752adfdfea79d0e0d
Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2ca9d556c084..e20d324a6d90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4026,12 +4026,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	struct amdgpu_device *tmp_adev = NULL;
 	int i, r = 0;
 	bool in_ras_intr = amdgpu_ras_intr_triggered();
+	bool use_baco =
+		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ?
+		true : false;
 
 	/*
 	 * Flush RAM to disk so that after reboot
 	 * the user can read log and see why the system rebooted.
 	 */
-	if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
+	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {
 
 		DRM_WARN("Emergency reboot.");
 
@@ -4042,7 +4045,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	need_full_reset = job_signaled = false;
 	INIT_LIST_HEAD(&device_list);
 
-	dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset");
+	dev_info(adev->dev, "GPU %s begin!\n",
+		(in_ras_intr && !use_baco) ? "jobs stop":"reset");
 
 	cancel_delayed_work_sync(&adev->delayed_init_work);
 
@@ -4109,7 +4113,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		amdgpu_unregister_gpu_instance(tmp_adev);
 
 		/* disable ras on ALL IPs */
-		if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev))
+		if (!(in_ras_intr && !use_baco) &&
+		      amdgpu_device_ip_need_full_reset(tmp_adev))
 			amdgpu_ras_suspend(tmp_adev);
 
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -4120,13 +4125,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 			drm_sched_stop(&ring->sched, job ? &job->base : NULL);
 
-			if (in_ras_intr)
+			if (in_ras_intr && !use_baco)
 				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
 		}
 	}
 
 
-	if (in_ras_intr)
+	if (in_ras_intr && !use_baco)
 		goto skip_sched_resume;
 
 	/*
@@ -4220,7 +4225,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 skip_sched_resume:
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 		/*unlock kfd: SRIOV would do it separately */
-		if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev))
+		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))
 	                amdgpu_amdkfd_post_reset(tmp_adev);
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
-- 
2.17.1
="k">$(readlink -f $(pwd)/$SDK_ROOTFS_DIR) fi TAR_OPTS="" if [[ "$ROOTFS_TARBALL" =~ tar\.bz2$ ]]; then TAR_OPTS="--numeric-owner -xjf" fi if [[ "$ROOTFS_TARBALL" =~ tar\.gz$ ]]; then TAR_OPTS="--numeric-owner -xzf" fi if [[ "$ROOTFS_TARBALL" =~ \.tar$ ]]; then TAR_OPTS="--numeric-owner -xf" fi if [ -z "$TAR_OPTS" ]; then echo "Error: Unable to determine sdk tarball format" echo "Accepted types: .tar / .tar.gz / .tar.bz2" exit 1 fi if [ ! -d "$SDK_ROOTFS_DIR" ]; then echo "Creating directory $SDK_ROOTFS_DIR" mkdir -p "$SDK_ROOTFS_DIR" fi pseudo_state_dir="$SDK_ROOTFS_DIR/../$(basename "$SDK_ROOTFS_DIR").pseudo_state" pseudo_state_dir="$(readlink -f $pseudo_state_dir)" if [ -e "$pseudo_state_dir" ]; then echo "Error: $pseudo_state_dir already exists!" echo "Please delete the rootfs tree and pseudo directory manually" echo "if this is really what you want." exit 1 fi mkdir -p "$pseudo_state_dir" touch "$pseudo_state_dir/pseudo.pid" PSEUDO_LOCALSTATEDIR="$pseudo_state_dir" export PSEUDO_LOCALSTATEDIR echo "Extracting rootfs tarball using pseudo..." echo "$PSEUDO $PSEUDO_OPTS tar -C \"$SDK_ROOTFS_DIR\" $TAR_OPTS \"$ROOTFS_TARBALL\"" $PSEUDO $PSEUDO_OPTS tar -C "$SDK_ROOTFS_DIR" $TAR_OPTS "$ROOTFS_TARBALL" DIRCHECK=`ls -l "$SDK_ROOTFS_DIR" | wc -l` if [ "$DIRCHECK" -lt 5 ]; then echo "Warning: I don't see many files in $SDK_ROOTFS_DIR" echo "Please double-check the extraction worked as intended" exit 0 fi echo "SDK image successfully extracted to $SDK_ROOTFS_DIR" exit 0