diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch new file mode 100644 index 00000000..604ab73f --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4112-drm-amdgpu-avoid-ras-error-injection-for-retired-pag.patch @@ -0,0 +1,87 @@ +From 881a616e2e79841ce68a3cd7426a40f41c495a74 Mon Sep 17 00:00:00 2001 +From: Tao Zhou <tao.zhou1@amd.com> +Date: Mon, 30 Sep 2019 14:48:19 +0800 +Subject: [PATCH 4112/4736] drm/amdgpu: avoid ras error injection for retired + page + +check whether a page is bad page before umc error injection, bad page +should not be accessed again + +Signed-off-by: Tao Zhou <tao.zhou1@amd.com> +Reviewed-by: Guchun Chen <guchun.chen@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 44 +++++++++++++++++++++++++ + 1 file changed, 44 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +index 18af80f1cffd..f3f3a98f93b3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +@@ -69,6 +69,9 @@ const char *ras_block_string[] = { + + atomic_t amdgpu_ras_in_intr = ATOMIC_INIT(0); + ++static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, ++ uint64_t addr); ++ + static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) + { +@@ -289,6 +292,14 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * + break; + } + ++ /* umc ce/ue error injection for a bad page is not allowed */ ++ if ((data.head.block == AMDGPU_RAS_BLOCK__UMC) && ++ amdgpu_ras_check_bad_page(adev, data.inject.address)) { ++ DRM_WARN("RAS WARN: 0x%llx has been marked as bad before error injection!\n", ++ data.inject.address); ++ break; ++ } ++ + /* data.inject.address is offset instead of absolute gpu address */ + ret = amdgpu_ras_error_inject(adev, &data.inject); + break; +@@ -1429,6 +1440,39 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) + return ret; + } + ++/* ++ * check if an address belongs to bad page ++ * ++ * Note: this check is only for umc block ++ */ ++static bool amdgpu_ras_check_bad_page(struct amdgpu_device *adev, ++ uint64_t addr) ++{ ++ struct amdgpu_ras *con = amdgpu_ras_get_context(adev); ++ struct ras_err_handler_data *data; ++ int i; ++ bool ret = false; ++ ++ if (!con || !con->eh_data) ++ return ret; ++ ++ mutex_lock(&con->recovery_lock); ++ data = con->eh_data; ++ if (!data) ++ goto out; ++ ++ addr >>= AMDGPU_GPU_PAGE_SHIFT; ++ for (i = 0; i < data->count; i++) ++ if (addr == data->bps[i].retired_page) { ++ ret = true; ++ goto out; ++ } ++ ++out: ++ mutex_unlock(&con->recovery_lock); ++ return ret; ++} ++ + /* called in gpu recovery/init */ + int amdgpu_ras_reserve_bad_pages(struct amdgpu_device *adev) + { +-- +2.17.1 + |