1 files changed, 85 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4272-drm-amdgpu-refine-reboot-debugfs-operation-in-ras-ca.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4272-drm-amdgpu-refine-reboot-debugfs-operation-in-ras-ca.patch
new file mode 100644
index 00000000..e39a8cd2
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4272-drm-amdgpu-refine-reboot-debugfs-operation-in-ras-ca.patch
@@ -0,0 +1,85 @@
+From 9b2167f3c47600d84667fb2ee5676035826d288a Mon Sep 17 00:00:00 2001
+From: Guchun Chen <guchun.chen@amd.com>
+Date: Mon, 21 Oct 2019 16:56:00 +0800
+Subject: [PATCH 4272/4736] drm/amdgpu: refine reboot debugfs operation in ras
+ case (v3)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Ras reboot debugfs node allows user one easy control to avoid
+gpu recovery hang problem and directly reboot system per card
+basis, after ras uncorrectable error happens. However, it is
+one common entry, which should get rid of ras_ctrl node and
+remove ip dependence when inputting by user. So add one new
+auto_reboot node in ras debugfs dir to achieve this.
+
+v2: in commit mssage, add justification why ras reboot debugfs
+node is needed.
+v3: use debugfs_create_bool to create debugfs file for boolean value
+
+Signed-off-by: Guchun Chen <guchun.chen@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index 1ca613014126..5b532cd254cc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -151,8 +151,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
+ 		op = 1;
+ 	else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
+ 		op = 2;
+-	else if (sscanf(str, "reboot %32s", block_name) == 1)
+-		op = 3;
+ 	else if (str[0] && str[1] && str[2] && str[3])
+ 		/* ascii string, but commands are not matched. */
+ 		return -EINVAL;
+@@ -216,12 +214,11 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
+  * value to the address.
+  *
+  * Second member: struct ras_debug_if::op.
+- * It has four kinds of operations.
++ * It has three kinds of operations.
+  *
+  * - 0: disable RAS on the block. Take ::head as its data.
+  * - 1: enable RAS on the block. Take ::head as its data.
+  * - 2: inject errors on the block. Take ::inject as its data.
+- * - 3: reboot on unrecoverable error
+  *
+  * How to use the interface?
+  * programs:
+@@ -303,9 +300,6 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
+ 		/* data.inject.address is offset instead of absolute gpu address */
+ 		ret = amdgpu_ras_error_inject(adev, &data.inject);
+ 		break;
+-	case 3:
+-		amdgpu_ras_get_context(adev)->reboot = true;
+-		break;
+ 	default:
+ 		ret = -EINVAL;
+ 		break;
+@@ -1035,6 +1029,17 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
+ 				adev, &amdgpu_ras_debugfs_ctrl_ops);
+ 	debugfs_create_file("ras_eeprom_reset", S_IWUGO | S_IRUGO, con->dir,
+ 				adev, &amdgpu_ras_debugfs_eeprom_ops);
++
++	/*
++	 * After one uncorrectable error happens, usually GPU recovery will
++	 * be scheduled. But due to the known problem in GPU recovery failing
++	 * to bring GPU back, below interface provides one direct way to
++	 * user to reboot system automatically in such case within
++	 * ERREVENT_ATHUB_INTERRUPT generated. Normal GPU recovery routine
++	 * will never be called.
++	 */
++	debugfs_create_bool("auto_reboot", S_IWUGO | S_IRUGO, con->dir,
++				&con->reboot);
+ }
+ 
+ void amdgpu_ras_debugfs_create(struct amdgpu_device *adev,
+-- 
+2.17.1
+