diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch | 280 |
1 files changed, 280 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch new file mode 100644 index 00000000..80f889c3 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1517-drm-amdgpu-enable-ras-on-gfx9.patch @@ -0,0 +1,280 @@ +From d7a0687e3002be95a026a2cac6cead346164425f Mon Sep 17 00:00:00 2001 +From: Feifei Xu <Feifei.Xu@amd.com> +Date: Fri, 7 Dec 2018 17:52:20 +0800 +Subject: [PATCH 1517/2940] drm/amdgpu: enable ras on gfx9 + +Register ecc interrupts and ecc interrupt handler on gfx9. +Add ras support on gfx9 + +Signed-off-by: Feifei Xu <Feifei.Xu@amd.com> +Signed-off-by: xinhui pan <xinhui.pan@amd.com> +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 3 + + drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 173 ++++++++++++++++++++++++ + 2 files changed, 176 insertions(+) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +index f790e15bcd08..09fc53af3d35 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +@@ -258,6 +258,9 @@ struct amdgpu_gfx { + /* pipe reservation */ + struct mutex pipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); ++ ++ /*ras */ ++ struct ras_common_if *ras_if; + }; + + #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) +diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +index 1478e784cff0..3a86d9d515b5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +@@ -40,6 +40,8 @@ + + #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" + ++#include "amdgpu_ras.h" ++ + #define GFX9_NUM_GFX_RINGS 1 + #define GFX9_MEC_HPD_SIZE 4096 + #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +@@ -1638,6 +1640,18 @@ static int gfx_v9_0_sw_init(void *handle) + if (r) + return r; + ++ /* ECC error */ ++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, ++ &adev->gfx.cp_ecc_error_irq); ++ if (r) ++ return r; ++ ++ /* FUE error */ ++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, ++ &adev->gfx.cp_ecc_error_irq); ++ if (r) ++ return r; ++ + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; + + gfx_v9_0_scratch_init(adev); +@@ -1730,6 +1744,20 @@ static int gfx_v9_0_sw_fini(void *handle) + int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + ++ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && ++ adev->gfx.ras_if) { ++ struct ras_common_if *ras_if = adev->gfx.ras_if; ++ struct ras_ih_if ih_info = { ++ .head = *ras_if, ++ }; ++ ++ amdgpu_ras_debugfs_remove(adev, ras_if); ++ amdgpu_ras_sysfs_remove(adev, ras_if); ++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info); ++ amdgpu_ras_feature_enable(adev, ras_if, 0); ++ kfree(ras_if); ++ } ++ + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); +@@ -3308,6 +3336,7 @@ static int gfx_v9_0_hw_fini(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + ++ amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + +@@ -3497,6 +3526,77 @@ static int gfx_v9_0_early_init(void *handle) + return 0; + } + ++static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, ++ struct amdgpu_iv_entry *entry); ++ ++static int gfx_v9_0_ecc_late_init(void *handle) ++{ ++ struct amdgpu_device *adev = (struct amdgpu_device *)handle; ++ struct ras_common_if **ras_if = &adev->gfx.ras_if; ++ struct ras_ih_if ih_info = { ++ .cb = gfx_v9_0_process_ras_data_cb, ++ }; ++ struct ras_fs_if fs_info = { ++ .sysfs_name = "gfx_err_count", ++ .debugfs_name = "gfx_err_inject", ++ }; ++ struct ras_common_if ras_block = { ++ .block = AMDGPU_RAS_BLOCK__GFX, ++ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, ++ .sub_block_index = 0, ++ .name = "gfx", ++ }; ++ int r; ++ ++ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { ++ amdgpu_ras_feature_enable(adev, &ras_block, 0); ++ return 0; ++ } ++ ++ *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); ++ if (!*ras_if) ++ return -ENOMEM; ++ ++ **ras_if = ras_block; ++ ++ r = amdgpu_ras_feature_enable(adev, *ras_if, 1); ++ if (r) ++ goto feature; ++ ++ ih_info.head = **ras_if; ++ fs_info.head = **ras_if; ++ ++ r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); ++ if (r) ++ goto interrupt; ++ ++ r = amdgpu_ras_debugfs_create(adev, &fs_info); ++ if (r) ++ goto debugfs; ++ ++ r = amdgpu_ras_sysfs_create(adev, &fs_info); ++ if (r) ++ goto sysfs; ++ ++ r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); ++ if (r) ++ goto irq; ++ ++ return 0; ++irq: ++ amdgpu_ras_sysfs_remove(adev, *ras_if); ++sysfs: ++ amdgpu_ras_debugfs_remove(adev, *ras_if); ++debugfs: ++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info); ++interrupt: ++ amdgpu_ras_feature_enable(adev, *ras_if, 0); ++feature: ++ kfree(*ras_if); ++ *ras_if = NULL; ++ return -EINVAL; ++} ++ + static int gfx_v9_0_late_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +@@ -3510,6 +3610,10 @@ static int gfx_v9_0_late_init(void *handle) + if (r) + return r; + ++ r = gfx_v9_0_ecc_late_init(handle); ++ if (r) ++ return r; ++ + return 0; + } + +@@ -4546,6 +4650,45 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, + return 0; + } + ++#define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ ++ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ ++ CP_ECC_ERROR_INT_ENABLE, 1) ++ ++#define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ ++ WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ ++ CP_ECC_ERROR_INT_ENABLE, 0) ++ ++static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, ++ struct amdgpu_irq_src *source, ++ unsigned type, ++ enum amdgpu_interrupt_state state) ++{ ++ switch (state) { ++ case AMDGPU_IRQ_STATE_DISABLE: ++ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, ++ CP_ECC_ERROR_INT_ENABLE, 0); ++ DISABLE_ECC_ON_ME_PIPE(1, 0); ++ DISABLE_ECC_ON_ME_PIPE(1, 1); ++ DISABLE_ECC_ON_ME_PIPE(1, 2); ++ DISABLE_ECC_ON_ME_PIPE(1, 3); ++ break; ++ ++ case AMDGPU_IRQ_STATE_ENABLE: ++ WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, ++ CP_ECC_ERROR_INT_ENABLE, 1); ++ ENABLE_ECC_ON_ME_PIPE(1, 0); ++ ENABLE_ECC_ON_ME_PIPE(1, 1); ++ ENABLE_ECC_ON_ME_PIPE(1, 2); ++ ENABLE_ECC_ON_ME_PIPE(1, 3); ++ break; ++ default: ++ break; ++ } ++ ++ return 0; ++} ++ ++ + static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, +@@ -4662,6 +4805,27 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, + return 0; + } + ++static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, ++ struct amdgpu_iv_entry *entry) ++{ ++ /* TODO ue will trigger an interrupt. */ ++ amdgpu_ras_reset_gpu(adev, 0); ++ return AMDGPU_RAS_UE; ++} ++ ++static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, ++ struct amdgpu_irq_src *source, ++ struct amdgpu_iv_entry *entry) ++{ ++ DRM_ERROR("CP ECC ERROR IRQ\n"); ++ struct ras_dispatch_if ih_data = { ++ .head = *adev->gfx.ras_if, ++ .entry = entry, ++ }; ++ amdgpu_ras_interrupt_dispatch(adev, &ih_data); ++ return 0; ++} ++ + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { + .name = "gfx_v9_0", + .early_init = gfx_v9_0_early_init, +@@ -4823,6 +4987,12 @@ static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { + .process = gfx_v9_0_priv_inst_irq, + }; + ++static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { ++ .set = gfx_v9_0_set_cp_ecc_error_state, ++ .process = gfx_v9_0_cp_ecc_error_irq, ++}; ++ ++ + static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) + { + adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; +@@ -4833,6 +5003,9 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) + + adev->gfx.priv_inst_irq.num_types = 1; + adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; ++ ++ adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ ++ adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; + } + + static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) +-- +2.17.1 + |