aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch87
1 files changed, 87 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch
new file mode 100644
index 00000000..604ab73f
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch
@@ -0,0 +1,87 @@
+From 881a616e2e79841ce68a3cd7426a40f41c495a74 Mon Sep 17 00:00:00 2001
+From: Tao Zhou <tao.zhou1@amd.com>
+Date: Mon, 30 Sep 2019 14:48:19 +0800
+Subject: [PATCH 4112/4736] drm/amdgpu: avoid ras error injection for retired
+ page
+
+check whether a page is bad page before umc error injection, bad page
+should not be accessed again
+
+Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
+Reviewed-by: Guchun Chen <guchun.chen@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 44 +++++++++++++++++++++++++
+ 1 file changed, 44 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index 18af80f1cffd..f3f3a98f93b3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -69,6 +69,9 @@ const char *ras_block_string[] = {
+
+ atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0);
+
++static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
++ uint64_t addr);
++
+ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
+ {
+@@ -289,6 +292,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
+ break;
+ }
+
++ /* umc ce/ue error injection for a bad page is not allowed */
++ if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) &&
++ amdgpu_ras_check_bad_page(adev, data.inject.address)) {
++ DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n",
++ data.inject.address);
++ break;
++ }
++
+ /* data.inject.address is offset instead of absolute gpu address */
+ ret = amdgpu_ras_error_inject(adev, &data.inject);
+ break;
+@@ -1429,6 +1440,39 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev)
+ return ret;
+ }
+
++/*
++ * check if an address belongs to bad page
++ *
++ * Note: this check is only for umc block
++ */
++static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev,
++ uint64_t addr)
++{
++ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
++ struct ras_err_handler_data *data;
++ int i;
++ bool ret = false;
++
++ if (!con || !con->eh_data)
++ return ret;
++
++ mutex_lock(&con->recovery_lock);
++ data = con->eh_data;
++ if (!data)
++ goto out;
++
++ addr >>= AMDGPU_GPU_PAGE_SHIFT;
++ for (i = 0; i < data->count; i++)
++ if (addr == data->bps[i].retired_page) {
++ ret = true;
++ goto out;
++ }
++
++out:
++ mutex_unlock(&con->recovery_lock);
++ return ret;
++}
++
+ /* called in gpu recovery/init */
+ int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev)
+ {
+--
+2.17.1
+