aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch167
1 files changed, 167 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch
new file mode 100644
index 00000000..6ad7b6ae
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch
@@ -0,0 +1,167 @@
+From 013f63d334471e85b469aad0bba8ed3c2d256cd0 Mon Sep 17 00:00:00 2001
+From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Date: Thu, 22 Aug 2019 15:01:37 -0400
+Subject: [PATCH 3738/4256] dmr/amdgpu: Add system auto reboot to RAS.
+
+In case of RAS error allow user configure auto system
+reboot through ras_ctrl.
+This is also part of the temproray work around for the RAS
+hang problem.
+
+v4: Use latest kernel API for disk sync.
+
+Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
+Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 ++++++++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +-
+ include/linux/suspend.h | 3 +++
+ include/linux/syscalls.h | 2 +-
+ kernel/power/main.c | 11 ++++++++++-
+ 6 files changed, 37 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index e30f7ba53aab..b29b26098b8f 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -64,6 +64,8 @@
+ #include "amdgpu_ras.h"
+ #include "amdgpu_pmu.h"
+
++#include <linux/suspend.h>
++
+ MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
+ MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
+ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
+@@ -3758,6 +3760,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
+ int i, r = 0;
+ bool in_ras_intr = amdgpu_ras_intr_triggered();
+
++ /*
++ * Flush RAM to disk so that after reboot
++ * the user can read log and see why the system rebooted.
++ */
++ if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) {
++
++ DRM_WARN("Emergency reboot.");
++
++ ksys_sync_helper();
++ emergency_restart();
++ }
++
+ need_full_reset = job_signaled = false;
+ INIT_LIST_HEAD(&device_list);
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index 01a66559f04e..5c2276bb8325 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -154,6 +154,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
+ op = 1;
+ else if (sscanf(str, "inject %32s %8s", block_name, err) == 2)
+ op = 2;
++ else if (sscanf(str, "reboot %32s", block_name) == 1)
++ op = 3;
+ else if (str[0] && str[1] && str[2] && str[3])
+ /* ascii string, but commands are not matched. */
+ return -EINVAL;
+@@ -287,6 +289,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
+ /* data.inject.address is offset instead of absolute gpu address */
+ ret = amdgpu_ras_error_inject(adev, &data.inject);
+ break;
++ case 3:
++ amdgpu_ras_get_context(adev)->reboot = true;
++ break;
+ default:
+ ret = -EINVAL;
+ break;
+@@ -1744,6 +1749,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
+ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
+ {
+ if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
+- DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected! Stopping all GPU jobs.\n");
++ DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n");
++
++ amdgpu_ras_reset_gpu(adev, false);
+ }
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+index 6fda96b29f1f..f487038ba331 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+@@ -334,7 +334,7 @@ struct amdgpu_ras {
+ struct mutex recovery_lock;
+
+ uint32_t flags;
+-
++ bool reboot;
+ struct amdgpu_ras_eeprom_control eeprom_control;
+ };
+
+diff --git a/include/linux/suspend.h b/include/linux/suspend.h
+index 3f529ad9a9d2..6b3ea9ea6a9e 100644
+--- a/include/linux/suspend.h
++++ b/include/linux/suspend.h
+@@ -425,6 +425,7 @@ void restore_processor_state(void);
+ /* kernel/power/main.c */
+ extern int register_pm_notifier(struct notifier_block *nb);
+ extern int unregister_pm_notifier(struct notifier_block *nb);
++extern void ksys_sync_helper(void);
+
+ #define pm_notifier(fn, pri) { \
+ static struct notifier_block fn##_nb = \
+@@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
+ return 0;
+ }
+
++static inline void ksys_sync_helper(void) {}
++
+ #define pm_notifier(fn, pri) do { (void)(fn); } while (0)
+
+ static inline bool pm_wakeup_pending(void) { return false; }
+diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
+index 2ff814c92f7f..9dc129a92b31 100644
+--- a/include/linux/syscalls.h
++++ b/include/linux/syscalls.h
+@@ -906,7 +906,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
+ unsigned mask, struct statx __user *buffer);
+ asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
+ int flags, uint32_t sig);
+-
++void ksys_sync(void);
+ /*
+ * Architecture-specific system calls
+ */
+diff --git a/kernel/power/main.c b/kernel/power/main.c
+index 35b50823d83b..1f5b1b262ff7 100644
+--- a/kernel/power/main.c
++++ b/kernel/power/main.c
+@@ -16,7 +16,7 @@
+ #include <linux/debugfs.h>
+ #include <linux/seq_file.h>
+ #include <linux/suspend.h>
+-
++#include <linux/syscalls.h>
+ #include "power.h"
+
+ #ifdef CONFIG_PM_SLEEP
+@@ -49,6 +49,15 @@ void unlock_system_sleep(void)
+ current->flags &= ~PF_FREEZER_SKIP;
+ mutex_unlock(&system_transition_mutex);
+ }
++
++void ksys_sync_helper(void)
++{
++ pr_info("Syncing filesystems ... ");
++ ksys_sync();
++ pr_cont("done.\n");
++}
++EXPORT_SYMBOL_GPL(ksys_sync_helper);
++
+ EXPORT_SYMBOL_GPL(unlock_system_sleep);
+
+ /* Routines for PM-transition notifications */
+--
+2.17.1
+