aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch280
1 files changed, 280 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch
new file mode 100644
index 00000000..80f889c3
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch
@@ -0,0 +1,280 @@
+From d7a0687e3002be95a026a2cac6cead346164425f Mon Sep 17 00:00:00 2001
+From: Feifei Xu <Feifei.Xu@amd.com>
+Date: Fri, 7 Dec 2018 17:52:20 +0800
+Subject: [PATCH 1517/2940] drm/amdgpu: enable ras on gfx9
+
+Register ecc interrupts and ecc interrupt handler on gfx9.
+Add ras support on gfx9
+
+Signed-off-by: Feifei Xu <Feifei.Xu@amd.com>
+Signed-off-by: xinhui pan <xinhui.pan@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 +
+ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 173 ++++++++++++++++++++++++
+ 2 files changed, 176 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+index f790e15bcd08..09fc53af3d35 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+@@ -258,6 +258,9 @@ struct amdgpu_gfx {
+ /* pipe reservation */
+ struct mutex pipe_reserve_mutex;
+ DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
++
++ /*ras */
++ struct ras_common_if *ras_if;
+ };
+
+ #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+index 1478e784cff0..3a86d9d515b5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+@@ -40,6 +40,8 @@
+
+ #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
+
++#include "amdgpu_ras.h"
++
+ #define GFX9_NUM_GFX_RINGS 1
+ #define GFX9_MEC_HPD_SIZE 4096
+ #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
+@@ -1638,6 +1640,18 @@ static int gfx_v9_0_sw_init(void *handle)
+ if (r)
+ return r;
+
++ /* ECC error */
++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
++ &adev->gfx.cp_ecc_error_irq);
++ if (r)
++ return r;
++
++ /* FUE error */
++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
++ &adev->gfx.cp_ecc_error_irq);
++ if (r)
++ return r;
++
+ adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
+
+ gfx_v9_0_scratch_init(adev);
+@@ -1730,6 +1744,20 @@ static int gfx_v9_0_sw_fini(void *handle)
+ int i;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
++ adev->gfx.ras_if) {
++ struct ras_common_if *ras_if = adev->gfx.ras_if;
++ struct ras_ih_if ih_info = {
++ .head = *ras_if,
++ };
++
++ amdgpu_ras_debugfs_remove(adev, ras_if);
++ amdgpu_ras_sysfs_remove(adev, ras_if);
++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
++ amdgpu_ras_feature_enable(adev, ras_if, 0);
++ kfree(ras_if);
++ }
++
+ amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+@@ -3308,6 +3336,7 @@ static int gfx_v9_0_hw_fini(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
++ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
+ amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
+
+@@ -3497,6 +3526,77 @@ static int gfx_v9_0_early_init(void *handle)
+ return 0;
+ }
+
++static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
++ struct amdgpu_iv_entry *entry);
++
++static int gfx_v9_0_ecc_late_init(void *handle)
++{
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++ struct ras_common_if **ras_if = &adev->gfx.ras_if;
++ struct ras_ih_if ih_info = {
++ .cb = gfx_v9_0_process_ras_data_cb,
++ };
++ struct ras_fs_if fs_info = {
++ .sysfs_name = "gfx_err_count",
++ .debugfs_name = "gfx_err_inject",
++ };
++ struct ras_common_if ras_block = {
++ .block = AMDGPU_RAS_BLOCK__GFX,
++ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
++ .sub_block_index = 0,
++ .name = "gfx",
++ };
++ int r;
++
++ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
++ amdgpu_ras_feature_enable(adev, &ras_block, 0);
++ return 0;
++ }
++
++ *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
++ if (!*ras_if)
++ return -ENOMEM;
++
++ **ras_if = ras_block;
++
++ r = amdgpu_ras_feature_enable(adev, *ras_if, 1);
++ if (r)
++ goto feature;
++
++ ih_info.head = **ras_if;
++ fs_info.head = **ras_if;
++
++ r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
++ if (r)
++ goto interrupt;
++
++ r = amdgpu_ras_debugfs_create(adev, &fs_info);
++ if (r)
++ goto debugfs;
++
++ r = amdgpu_ras_sysfs_create(adev, &fs_info);
++ if (r)
++ goto sysfs;
++
++ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
++ if (r)
++ goto irq;
++
++ return 0;
++irq:
++ amdgpu_ras_sysfs_remove(adev, *ras_if);
++sysfs:
++ amdgpu_ras_debugfs_remove(adev, *ras_if);
++debugfs:
++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
++interrupt:
++ amdgpu_ras_feature_enable(adev, *ras_if, 0);
++feature:
++ kfree(*ras_if);
++ *ras_if = NULL;
++ return -EINVAL;
++}
++
+ static int gfx_v9_0_late_init(void *handle)
+ {
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+@@ -3510,6 +3610,10 @@ static int gfx_v9_0_late_init(void *handle)
+ if (r)
+ return r;
+
++ r = gfx_v9_0_ecc_late_init(handle);
++ if (r)
++ return r;
++
+ return 0;
+ }
+
+@@ -4546,6 +4650,45 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
+ return 0;
+ }
+
++#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
++ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
++ CP_ECC_ERROR_INT_ENABLE, 1)
++
++#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
++ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
++ CP_ECC_ERROR_INT_ENABLE, 0)
++
++static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
++ struct amdgpu_irq_src *source,
++ unsigned type,
++ enum amdgpu_interrupt_state state)
++{
++ switch (state) {
++ case AMDGPU_IRQ_STATE_DISABLE:
++ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
++ CP_ECC_ERROR_INT_ENABLE, 0);
++ DISABLE_ECC_ON_ME_PIPE(1, 0);
++ DISABLE_ECC_ON_ME_PIPE(1, 1);
++ DISABLE_ECC_ON_ME_PIPE(1, 2);
++ DISABLE_ECC_ON_ME_PIPE(1, 3);
++ break;
++
++ case AMDGPU_IRQ_STATE_ENABLE:
++ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
++ CP_ECC_ERROR_INT_ENABLE, 1);
++ ENABLE_ECC_ON_ME_PIPE(1, 0);
++ ENABLE_ECC_ON_ME_PIPE(1, 1);
++ ENABLE_ECC_ON_ME_PIPE(1, 2);
++ ENABLE_ECC_ON_ME_PIPE(1, 3);
++ break;
++ default:
++ break;
++ }
++
++ return 0;
++}
++
++
+ static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *src,
+ unsigned type,
+@@ -4662,6 +4805,27 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
+ return 0;
+ }
+
++static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
++ struct amdgpu_iv_entry *entry)
++{
++ /* TODO ue will trigger an interrupt. */
++ amdgpu_ras_reset_gpu(adev, 0);
++ return AMDGPU_RAS_UE;
++}
++
++static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
++ struct amdgpu_irq_src *source,
++ struct amdgpu_iv_entry *entry)
++{
++ DRM_ERROR("CP ECC ERROR IRQ\n");
++ struct ras_dispatch_if ih_data = {
++ .head = *adev->gfx.ras_if,
++ .entry = entry,
++ };
++ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
++ return 0;
++}
++
+ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
+ .name = "gfx_v9_0",
+ .early_init = gfx_v9_0_early_init,
+@@ -4823,6 +4987,12 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
+ .process = gfx_v9_0_priv_inst_irq,
+ };
+
++static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
++ .set = gfx_v9_0_set_cp_ecc_error_state,
++ .process = gfx_v9_0_cp_ecc_error_irq,
++};
++
++
+ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
+ {
+ adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
+@@ -4833,6 +5003,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
+
+ adev->gfx.priv_inst_irq.num_types = 1;
+ adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
++
++ adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
++ adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
+ }
+
+ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
+--
+2.17.1
+