diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch new file mode 100644 index 00000000..d1c7e62e --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch @@ -0,0 +1,301 @@ +From dd12fccc938fba6ae89772a122b8fe8988243d5d Mon Sep 17 00:00:00 2001 +From: xinhui pan <xinhui.pan@amd.com> +Date: Wed, 28 Nov 2018 21:14:56 +0800 +Subject: [PATCH 1516/2940] drm/amdgpu: enable ras on sdma4 + +register IH, enable ras features on sdma. +create sysfs debugfs file for sdma. + +Signed-off-by: xinhui pan <xinhui.pan@amd.com> +Signed-off-by: Feifei Xu <Feifei.Xu@amd.com> +Signed-off-by: Eric Huang <JinhuiEric.Huang@amd.com> +Reviewed-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 4 + + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 184 ++++++++++++++++++++++- + 2 files changed, 187 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +index 79e89e3bebc4..e988c3f09ecc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +@@ -30,6 +30,8 @@ + enum amdgpu_sdma_irq { + AMDGPU_SDMA_IRQ_TRAP0 = 0, + AMDGPU_SDMA_IRQ_TRAP1, ++ AMDGPU_SDMA_IRQ_ECC0, ++ AMDGPU_SDMA_IRQ_ECC1, + + AMDGPU_SDMA_IRQ_LAST + }; +@@ -49,9 +51,11 @@ struct amdgpu_sdma { + struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; + struct amdgpu_irq_src trap_irq; + struct amdgpu_irq_src illegal_inst_irq; ++ struct amdgpu_irq_src ecc_irq; + int num_instances; + uint32_t srbm_soft_reset; + bool has_page_queue; ++ struct ras_common_if *ras_if; + }; + + /* +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +index d6547a093e67..c0e568b68ef6 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +@@ -41,6 +41,8 @@ + #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h" + #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h" + ++#include "amdgpu_ras.h" ++ + MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); + MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); + MODULE_FIRMWARE("amdgpu/vega12_sdma.bin"); +@@ -1495,6 +1497,83 @@ static int sdma_v4_0_early_init(void *handle) + return 0; + } + ++static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, ++ struct amdgpu_iv_entry *entry); ++ ++static int sdma_v4_0_late_init(void *handle) ++{ ++ struct amdgpu_device *adev = (struct amdgpu_device *)handle; ++ struct ras_common_if **ras_if = &adev->sdma.ras_if; ++ struct ras_ih_if ih_info = { ++ .cb = sdma_v4_0_process_ras_data_cb, ++ }; ++ struct ras_fs_if fs_info = { ++ .sysfs_name = "sdma_err_count", ++ .debugfs_name = "sdma_err_inject", ++ }; ++ struct ras_common_if ras_block = { ++ .block = AMDGPU_RAS_BLOCK__SDMA, ++ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, ++ .sub_block_index = 0, ++ .name = "sdma", ++ }; ++ int r; ++ ++ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { ++ amdgpu_ras_feature_enable(adev, &ras_block, 0); ++ return 0; ++ } ++ ++ *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); ++ if (!*ras_if) ++ return -ENOMEM; ++ ++ **ras_if = ras_block; ++ ++ r = amdgpu_ras_feature_enable(adev, *ras_if, 1); ++ if (r) ++ goto feature; ++ ++ ih_info.head = **ras_if; ++ fs_info.head = **ras_if; ++ ++ r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); ++ if (r) ++ goto interrupt; ++ ++ r = amdgpu_ras_debugfs_create(adev, &fs_info); ++ if (r) ++ goto debugfs; ++ ++ r = amdgpu_ras_sysfs_create(adev, &fs_info); ++ if (r) ++ goto sysfs; ++ ++ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0); ++ if (r) ++ goto irq; ++ ++ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC1); ++ if (r) { ++ amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0); ++ goto irq; ++ } ++ ++ return 0; ++irq: ++ amdgpu_ras_sysfs_remove(adev, *ras_if); ++sysfs: ++ amdgpu_ras_debugfs_remove(adev, *ras_if); ++debugfs: ++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info); ++interrupt: ++ amdgpu_ras_feature_enable(adev, *ras_if, 0); ++feature: ++ kfree(*ras_if); ++ *ras_if = NULL; ++ return -EINVAL; ++} ++ + static int sdma_v4_0_sw_init(void *handle) + { + struct amdgpu_ring *ring; +@@ -1513,6 +1592,18 @@ static int sdma_v4_0_sw_init(void *handle) + if (r) + return r; + ++ /* SDMA SRAM ECC event */ ++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC, ++ &adev->sdma.ecc_irq); ++ if (r) ++ return r; ++ ++ /* SDMA SRAM ECC event */ ++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC, ++ &adev->sdma.ecc_irq); ++ if (r) ++ return r; ++ + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + ring->ring_obj = NULL; +@@ -1563,6 +1654,22 @@ static int sdma_v4_0_sw_fini(void *handle) + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + ++ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && ++ adev->sdma.ras_if) { ++ struct ras_common_if *ras_if = adev->sdma.ras_if; ++ struct ras_ih_if ih_info = { ++ .head = *ras_if, ++ }; ++ ++ /*remove fs first*/ ++ amdgpu_ras_debugfs_remove(adev, ras_if); ++ amdgpu_ras_sysfs_remove(adev, ras_if); ++ /*remove the IH*/ ++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info); ++ amdgpu_ras_feature_enable(adev, ras_if, 0); ++ kfree(ras_if); ++ } ++ + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_ring_fini(&adev->sdma.instance[i].ring); + if (adev->sdma.has_page_queue) +@@ -1600,6 +1707,9 @@ static int sdma_v4_0_hw_fini(void *handle) + if (amdgpu_sriov_vf(adev)) + return 0; + ++ amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0); ++ amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC1); ++ + sdma_v4_0_ctx_switch_enable(adev, false); + sdma_v4_0_enable(adev, false); + +@@ -1716,6 +1826,50 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, + return 0; + } + ++static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, ++ struct amdgpu_iv_entry *entry) ++{ ++ uint32_t instance, err_source; ++ ++ switch (entry->client_id) { ++ case SOC15_IH_CLIENTID_SDMA0: ++ instance = 0; ++ break; ++ case SOC15_IH_CLIENTID_SDMA1: ++ instance = 1; ++ break; ++ default: ++ return 0; ++ } ++ ++ switch (entry->src_id) { ++ case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: ++ err_source = 0; ++ break; ++ case SDMA0_4_0__SRCID__SDMA_ECC: ++ err_source = 1; ++ break; ++ default: ++ return 0; ++ } ++ ++ amdgpu_ras_reset_gpu(adev, 0); ++ ++ return AMDGPU_RAS_UE; ++} ++ ++static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, ++ struct amdgpu_irq_src *source, ++ struct amdgpu_iv_entry *entry) ++{ ++ struct ras_dispatch_if ih_data = { ++ .head = *adev->sdma.ras_if, ++ .entry = entry, ++ }; ++ amdgpu_ras_interrupt_dispatch(adev, &ih_data); ++ return 0; ++} ++ + static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +@@ -1743,6 +1897,25 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, + return 0; + } + ++static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev, ++ struct amdgpu_irq_src *source, ++ unsigned type, ++ enum amdgpu_interrupt_state state) ++{ ++ u32 sdma_edc_config; ++ ++ u32 reg_offset = (type == AMDGPU_SDMA_IRQ_ECC0) ? ++ sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) : ++ sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG); ++ ++ sdma_edc_config = RREG32(reg_offset); ++ sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE, ++ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); ++ WREG32(reg_offset, sdma_edc_config); ++ ++ return 0; ++} ++ + static void sdma_v4_0_update_medium_grain_clock_gating( + struct amdgpu_device *adev, + bool enable) +@@ -1908,7 +2081,7 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) + const struct amd_ip_funcs sdma_v4_0_ip_funcs = { + .name = "sdma_v4_0", + .early_init = sdma_v4_0_early_init, +- .late_init = NULL, ++ .late_init = sdma_v4_0_late_init, + .sw_init = sdma_v4_0_sw_init, + .sw_fini = sdma_v4_0_sw_fini, + .hw_init = sdma_v4_0_hw_init, +@@ -2010,11 +2183,20 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = { + .process = sdma_v4_0_process_illegal_inst_irq, + }; + ++static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = { ++ .set = sdma_v4_0_set_ecc_irq_state, ++ .process = sdma_v4_0_process_ecc_irq, ++}; ++ ++ ++ + static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) + { + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; + adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; ++ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; ++ adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs; + } + + /** +-- +2.17.1 + |