aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4129-drm-amdgpu-add-RAS-support-for-VML2-and-ATCL2.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4129-drm-amdgpu-add-RAS-support-for-VML2-and-ATCL2.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4129-drm-amdgpu-add-RAS-support-for-VML2-and-ATCL2.patch205
1 files changed, 205 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4129-drm-amdgpu-add-RAS-support-for-VML2-and-ATCL2.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4129-drm-amdgpu-add-RAS-support-for-VML2-and-ATCL2.patch
new file mode 100644
index 00000000..ca6b0fa0
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4129-drm-amdgpu-add-RAS-support-for-VML2-and-ATCL2.patch
@@ -0,0 +1,205 @@
+From 3524f56effff32b75337729b56e3209600be45a0 Mon Sep 17 00:00:00 2001
+From: Dennis Li <Dennis.Li@amd.com>
+Date: Sun, 29 Sep 2019 16:04:10 +0800
+Subject: [PATCH 4129/4736] drm/amdgpu: add RAS support for VML2 and ATCL2
+
+v1: Add codes to query the EDC count of VML2 & ATCL2
+v2: Rename VML2/ATCL2 registers and drop their mask define
+v3: Add back the ECC mask for VML2 registers
+
+Change-Id: If2c251481ba0a1a34ce3405a85f86d65eecee461
+Signed-off-by: Dennis Li <Dennis.Li@amd.com>
+Reviewed-by: Hawking Zhang <hawking.zhang@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 167 ++++++++++++++++++++++++++
+ 1 file changed, 167 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+index 2d7140e57113..24802e4d25e5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+@@ -5944,6 +5944,171 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
+ return ret;
+ }
+
++static const char *vml2_mems[] = {
++ "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
++ "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
++ "UTC_VML2_BANK_CACHE_0_4K_MEM0",
++ "UTC_VML2_BANK_CACHE_0_4K_MEM1",
++ "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
++ "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
++ "UTC_VML2_BANK_CACHE_1_4K_MEM0",
++ "UTC_VML2_BANK_CACHE_1_4K_MEM1",
++ "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
++ "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
++ "UTC_VML2_BANK_CACHE_2_4K_MEM0",
++ "UTC_VML2_BANK_CACHE_2_4K_MEM1",
++ "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
++ "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
++ "UTC_VML2_BANK_CACHE_3_4K_MEM0",
++ "UTC_VML2_BANK_CACHE_3_4K_MEM1",
++};
++
++static const char *vml2_walker_mems[] = {
++ "UTC_VML2_CACHE_PDE0_MEM0",
++ "UTC_VML2_CACHE_PDE0_MEM1",
++ "UTC_VML2_CACHE_PDE1_MEM0",
++ "UTC_VML2_CACHE_PDE1_MEM1",
++ "UTC_VML2_CACHE_PDE2_MEM0",
++ "UTC_VML2_CACHE_PDE2_MEM1",
++ "UTC_VML2_RDIF_LOG_FIFO",
++};
++
++static const char *atc_l2_cache_2m_mems[] = {
++ "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
++ "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
++ "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
++ "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
++};
++
++static const char *atc_l2_cache_4k_mems[] = {
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
++ "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
++ "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
++};
++
++static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
++ struct ras_err_data *err_data)
++{
++ uint32_t i, data;
++ uint32_t sec_count, ded_count;
++
++ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
++ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
++ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
++ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
++
++ for (i = 0; i < 16; i++) {
++ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
++ data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
++
++ sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
++ if (sec_count) {
++ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
++ vml2_mems[i], sec_count);
++ err_data->ce_count += sec_count;
++ }
++
++ ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
++ if (ded_count) {
++ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
++ vml2_mems[i], ded_count);
++ err_data->ue_count += ded_count;
++ }
++ }
++
++ for (i = 0; i < 7; i++) {
++ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
++ data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
++
++ sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
++ SEC_COUNT);
++ if (sec_count) {
++ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
++ vml2_walker_mems[i], sec_count);
++ err_data->ce_count += sec_count;
++ }
++
++ ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
++ DED_COUNT);
++ if (ded_count) {
++ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
++ vml2_walker_mems[i], ded_count);
++ err_data->ue_count += ded_count;
++ }
++ }
++
++ for (i = 0; i < 4; i++) {
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
++ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
++
++ sec_count = (data & 0x00006000L) >> 0xd;
++ if (sec_count) {
++ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
++ atc_l2_cache_2m_mems[i], sec_count);
++ err_data->ce_count += sec_count;
++ }
++ }
++
++ for (i = 0; i < 32; i++) {
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
++ data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
++
++ sec_count = (data & 0x00006000L) >> 0xd;
++ if (sec_count) {
++ DRM_INFO("Instance[%d]: SubBlock %s, SEC %d\n", i,
++ atc_l2_cache_4k_mems[i], sec_count);
++ err_data->ce_count += sec_count;
++ }
++
++ ded_count = (data & 0x00018000L) >> 0xf;
++ if (ded_count) {
++ DRM_INFO("Instance[%d]: SubBlock %s, DED %d\n", i,
++ atc_l2_cache_4k_mems[i], ded_count);
++ err_data->ue_count += ded_count;
++ }
++ }
++
++ WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
++ WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
++ WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
++
++ return 0;
++}
++
+ static int __get_ras_error_count(const struct soc15_reg_entry *reg,
+ uint32_t se_id, uint32_t inst_id, uint32_t value,
+ uint32_t *sec_count, uint32_t *ded_count)
+@@ -6019,6 +6184,8 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
+ gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
++ gfx_v9_0_query_utc_edc_status(adev, err_data);
++
+ return 0;
+ }
+
+--
+2.17.1
+