diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch new file mode 100644 index 00000000..6ad7b6ae --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3738-dmr-amdgpu-Add-system-auto-reboot-to-RAS.patch @@ -0,0 +1,167 @@ +From 013f63d334471e85b469aad0bba8ed3c2d256cd0 Mon Sep 17 00:00:00 2001 +From: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Date: Thu, 22 Aug 2019 15:01:37 -0400 +Subject: [PATCH 3738/4256] dmr/amdgpu: Add system auto reboot to RAS. + +In case of RAS error allow user configure auto system +reboot through ras_ctrl. +This is also part of the temproray work around for the RAS +hang problem. + +v4: Use latest kernel API for disk sync. + +Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> +Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 ++++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 ++++++++- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- + include/linux/suspend.h | 3 +++ + include/linux/syscalls.h | 2 +- + kernel/power/main.c | 11 ++++++++++- + 6 files changed, 37 insertions(+), 4 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index e30f7ba53aab..b29b26098b8f 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -64,6 +64,8 @@ + #include "amdgpu_ras.h" + #include "amdgpu_pmu.h" + ++#include <linux/suspend.h> ++ + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); + MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); + MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); +@@ -3758,6 +3760,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + int i, r = 0; + bool in_ras_intr = amdgpu_ras_intr_triggered(); + ++ /* ++ * Flush RAM to disk so that after reboot ++ * the user can read log and see why the system rebooted. ++ */ ++ if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { ++ ++ DRM_WARN("Emergency reboot."); ++ ++ ksys_sync_helper(); ++ emergency_restart(); ++ } ++ + need_full_reset = job_signaled = false; + INIT_LIST_HEAD(&device_list); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +index 01a66559f04e..5c2276bb8325 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +@@ -154,6 +154,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, + op = 1; + else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) + op = 2; ++ else if (sscanf(str, "reboot %32s", block_name) == 1) ++ op = 3; + else if (str[0] && str[1] && str[2] && str[3]) + /* ascii string, but commands are not matched. */ + return -EINVAL; +@@ -287,6 +289,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * + /* data.inject.address is offset instead of absolute gpu address */ + ret = amdgpu_ras_error_inject(adev, &data.inject); + break; ++ case 3: ++ amdgpu_ras_get_context(adev)->reboot = true; ++ break; + default: + ret = -EINVAL; + break; +@@ -1744,6 +1749,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) + { + if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { +- DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected! Stopping all GPU jobs.\n"); ++ DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); ++ ++ amdgpu_ras_reset_gpu(adev, false); + } + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +index 6fda96b29f1f..f487038ba331 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +@@ -334,7 +334,7 @@ struct amdgpu_ras { + struct mutex recovery_lock; + + uint32_t flags; +- ++ bool reboot; + struct amdgpu_ras_eeprom_control eeprom_control; + }; + +diff --git a/include/linux/suspend.h b/include/linux/suspend.h +index 3f529ad9a9d2..6b3ea9ea6a9e 100644 +--- a/include/linux/suspend.h ++++ b/include/linux/suspend.h +@@ -425,6 +425,7 @@ void restore_processor_state(void); + /* kernel/power/main.c */ + extern int register_pm_notifier(struct notifier_block *nb); + extern int unregister_pm_notifier(struct notifier_block *nb); ++extern void ksys_sync_helper(void); + + #define pm_notifier(fn, pri) { \ + static struct notifier_block fn##_nb = \ +@@ -462,6 +463,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) + return 0; + } + ++static inline void ksys_sync_helper(void) {} ++ + #define pm_notifier(fn, pri) do { (void)(fn); } while (0) + + static inline bool pm_wakeup_pending(void) { return false; } +diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h +index 2ff814c92f7f..9dc129a92b31 100644 +--- a/include/linux/syscalls.h ++++ b/include/linux/syscalls.h +@@ -906,7 +906,7 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, + unsigned mask, struct statx __user *buffer); + asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, + int flags, uint32_t sig); +- ++void ksys_sync(void); + /* + * Architecture-specific system calls + */ +diff --git a/kernel/power/main.c b/kernel/power/main.c +index 35b50823d83b..1f5b1b262ff7 100644 +--- a/kernel/power/main.c ++++ b/kernel/power/main.c +@@ -16,7 +16,7 @@ + #include <linux/debugfs.h> + #include <linux/seq_file.h> + #include <linux/suspend.h> +- ++#include <linux/syscalls.h> + #include "power.h" + + #ifdef CONFIG_PM_SLEEP +@@ -49,6 +49,15 @@ void unlock_system_sleep(void) + current->flags &= ~PF_FREEZER_SKIP; + mutex_unlock(&system_transition_mutex); + } ++ ++void ksys_sync_helper(void) ++{ ++ pr_info("Syncing filesystems ... "); ++ ksys_sync(); ++ pr_cont("done.\n"); ++} ++EXPORT_SYMBOL_GPL(ksys_sync_helper); ++ + EXPORT_SYMBOL_GPL(unlock_system_sleep); + + /* Routines for PM-transition notifications */ +-- +2.17.1 + |