aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch301
1 files changed, 301 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch
new file mode 100644
index 00000000..d1c7e62e
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1516-drm-amdgpu-enable-ras-on-sdma4.patch
@@ -0,0 +1,301 @@
+From dd12fccc938fba6ae89772a122b8fe8988243d5d Mon Sep 17 00:00:00 2001
+From: xinhui pan <xinhui.pan@amd.com>
+Date: Wed, 28 Nov 2018 21:14:56 +0800
+Subject: [PATCH 1516/2940] drm/amdgpu: enable ras on sdma4
+
+register IH, enable ras features on sdma.
+create sysfs debugfs file for sdma.
+
+Signed-off-by: xinhui pan <xinhui.pan@amd.com>
+Signed-off-by: Feifei Xu <Feifei.Xu@amd.com>
+Signed-off-by: Eric Huang <JinhuiEric.Huang@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 4 +
+ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 184 ++++++++++++++++++++++-
+ 2 files changed, 187 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+index 79e89e3bebc4..e988c3f09ecc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+@@ -30,6 +30,8 @@
+ enum amdgpu_sdma_irq {
+ AMDGPU_SDMA_IRQ_TRAP0 = 0,
+ AMDGPU_SDMA_IRQ_TRAP1,
++ AMDGPU_SDMA_IRQ_ECC0,
++ AMDGPU_SDMA_IRQ_ECC1,
+
+ AMDGPU_SDMA_IRQ_LAST
+ };
+@@ -49,9 +51,11 @@ struct amdgpu_sdma {
+ struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+ struct amdgpu_irq_src trap_irq;
+ struct amdgpu_irq_src illegal_inst_irq;
++ struct amdgpu_irq_src ecc_irq;
+ int num_instances;
+ uint32_t srbm_soft_reset;
+ bool has_page_queue;
++ struct ras_common_if *ras_if;
+ };
+
+ /*
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+index d6547a093e67..c0e568b68ef6 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+@@ -41,6 +41,8 @@
+ #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
+ #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
+
++#include "amdgpu_ras.h"
++
+ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
+ MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
+ MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
+@@ -1495,6 +1497,83 @@ static int sdma_v4_0_early_init(void *handle)
+ return 0;
+ }
+
++static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
++ struct amdgpu_iv_entry *entry);
++
++static int sdma_v4_0_late_init(void *handle)
++{
++ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
++ struct ras_common_if **ras_if = &adev->sdma.ras_if;
++ struct ras_ih_if ih_info = {
++ .cb = sdma_v4_0_process_ras_data_cb,
++ };
++ struct ras_fs_if fs_info = {
++ .sysfs_name = "sdma_err_count",
++ .debugfs_name = "sdma_err_inject",
++ };
++ struct ras_common_if ras_block = {
++ .block = AMDGPU_RAS_BLOCK__SDMA,
++ .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
++ .sub_block_index = 0,
++ .name = "sdma",
++ };
++ int r;
++
++ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
++ amdgpu_ras_feature_enable(adev, &ras_block, 0);
++ return 0;
++ }
++
++ *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
++ if (!*ras_if)
++ return -ENOMEM;
++
++ **ras_if = ras_block;
++
++ r = amdgpu_ras_feature_enable(adev, *ras_if, 1);
++ if (r)
++ goto feature;
++
++ ih_info.head = **ras_if;
++ fs_info.head = **ras_if;
++
++ r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
++ if (r)
++ goto interrupt;
++
++ r = amdgpu_ras_debugfs_create(adev, &fs_info);
++ if (r)
++ goto debugfs;
++
++ r = amdgpu_ras_sysfs_create(adev, &fs_info);
++ if (r)
++ goto sysfs;
++
++ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0);
++ if (r)
++ goto irq;
++
++ r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC1);
++ if (r) {
++ amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0);
++ goto irq;
++ }
++
++ return 0;
++irq:
++ amdgpu_ras_sysfs_remove(adev, *ras_if);
++sysfs:
++ amdgpu_ras_debugfs_remove(adev, *ras_if);
++debugfs:
++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
++interrupt:
++ amdgpu_ras_feature_enable(adev, *ras_if, 0);
++feature:
++ kfree(*ras_if);
++ *ras_if = NULL;
++ return -EINVAL;
++}
++
+ static int sdma_v4_0_sw_init(void *handle)
+ {
+ struct amdgpu_ring *ring;
+@@ -1513,6 +1592,18 @@ static int sdma_v4_0_sw_init(void *handle)
+ if (r)
+ return r;
+
++ /* SDMA SRAM ECC event */
++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
++ &adev->sdma.ecc_irq);
++ if (r)
++ return r;
++
++ /* SDMA SRAM ECC event */
++ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC,
++ &adev->sdma.ecc_irq);
++ if (r)
++ return r;
++
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
+ ring->ring_obj = NULL;
+@@ -1563,6 +1654,22 @@ static int sdma_v4_0_sw_fini(void *handle)
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i;
+
++ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) &&
++ adev->sdma.ras_if) {
++ struct ras_common_if *ras_if = adev->sdma.ras_if;
++ struct ras_ih_if ih_info = {
++ .head = *ras_if,
++ };
++
++ /*remove fs first*/
++ amdgpu_ras_debugfs_remove(adev, ras_if);
++ amdgpu_ras_sysfs_remove(adev, ras_if);
++ /*remove the IH*/
++ amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
++ amdgpu_ras_feature_enable(adev, ras_if, 0);
++ kfree(ras_if);
++ }
++
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ if (adev->sdma.has_page_queue)
+@@ -1600,6 +1707,9 @@ static int sdma_v4_0_hw_fini(void *handle)
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
++ amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC0);
++ amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_ECC1);
++
+ sdma_v4_0_ctx_switch_enable(adev, false);
+ sdma_v4_0_enable(adev, false);
+
+@@ -1716,6 +1826,50 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
+ return 0;
+ }
+
++static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
++ struct amdgpu_iv_entry *entry)
++{
++ uint32_t instance, err_source;
++
++ switch (entry->client_id) {
++ case SOC15_IH_CLIENTID_SDMA0:
++ instance = 0;
++ break;
++ case SOC15_IH_CLIENTID_SDMA1:
++ instance = 1;
++ break;
++ default:
++ return 0;
++ }
++
++ switch (entry->src_id) {
++ case SDMA0_4_0__SRCID__SDMA_SRAM_ECC:
++ err_source = 0;
++ break;
++ case SDMA0_4_0__SRCID__SDMA_ECC:
++ err_source = 1;
++ break;
++ default:
++ return 0;
++ }
++
++ amdgpu_ras_reset_gpu(adev, 0);
++
++ return AMDGPU_RAS_UE;
++}
++
++static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev,
++ struct amdgpu_irq_src *source,
++ struct amdgpu_iv_entry *entry)
++{
++ struct ras_dispatch_if ih_data = {
++ .head = *adev->sdma.ras_if,
++ .entry = entry,
++ };
++ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
++ return 0;
++}
++
+ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ struct amdgpu_irq_src *source,
+ struct amdgpu_iv_entry *entry)
+@@ -1743,6 +1897,25 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev,
+ return 0;
+ }
+
++static int sdma_v4_0_set_ecc_irq_state(struct amdgpu_device *adev,
++ struct amdgpu_irq_src *source,
++ unsigned type,
++ enum amdgpu_interrupt_state state)
++{
++ u32 sdma_edc_config;
++
++ u32 reg_offset = (type == AMDGPU_SDMA_IRQ_ECC0) ?
++ sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) :
++ sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG);
++
++ sdma_edc_config = RREG32(reg_offset);
++ sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE,
++ state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
++ WREG32(reg_offset, sdma_edc_config);
++
++ return 0;
++}
++
+ static void sdma_v4_0_update_medium_grain_clock_gating(
+ struct amdgpu_device *adev,
+ bool enable)
+@@ -1908,7 +2081,7 @@ static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
+ const struct amd_ip_funcs sdma_v4_0_ip_funcs = {
+ .name = "sdma_v4_0",
+ .early_init = sdma_v4_0_early_init,
+- .late_init = NULL,
++ .late_init = sdma_v4_0_late_init,
+ .sw_init = sdma_v4_0_sw_init,
+ .sw_fini = sdma_v4_0_sw_fini,
+ .hw_init = sdma_v4_0_hw_init,
+@@ -2010,11 +2183,20 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_illegal_inst_irq_funcs = {
+ .process = sdma_v4_0_process_illegal_inst_irq,
+ };
+
++static const struct amdgpu_irq_src_funcs sdma_v4_0_ecc_irq_funcs = {
++ .set = sdma_v4_0_set_ecc_irq_state,
++ .process = sdma_v4_0_process_ecc_irq,
++};
++
++
++
+ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
+ {
+ adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
+ adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs;
++ adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
++ adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs;
+ }
+
+ /**
+--
+2.17.1
+