aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4726-drm-amdgpu-Added-RAS-UMC-error-query-support-for-Arc.patch
blob: c91ce0bb54821333ef173393bc172ba3c5da9249 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
From b314adff2767fe2e6fdbfba1149f29d6aed845de Mon Sep 17 00:00:00 2001
From: John Clements <john.clements@amd.com>
Date: Wed, 11 Dec 2019 10:18:55 +0800
Subject: [PATCH 4726/4736] drm/amdgpu: Added RAS UMC error query support for
 Arcturus

Updated UMC 6.1 function set to support UMC 6.1.1 and 6.1.2 devices

Change-Id: I7b106328d24aba5ab93a8f4cddb1635392eecd0f
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: John Clements <john.clements@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c |  9 +++-
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 78 ++++++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/umc_v6_1.h |  3 +-
 3 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index b051ede2fb83..a2d1016ce81a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -692,11 +692,18 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
 		adev->umc.funcs = &umc_v6_0_funcs;
 		break;
 	case CHIP_VEGA20:
+		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
+		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
+		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
+		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
+		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
+		adev->umc.funcs = &umc_v6_1_funcs;
+		break;
 	case CHIP_ARCTURUS:
 		adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
 		adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
 		adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
-		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET;
+		adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
 		adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
 		adev->umc.funcs = &umc_v6_1_funcs;
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
index 47c4b96b14d1..515eb50cd0f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
@@ -31,6 +31,14 @@
 
 #define smnMCA_UMC0_MCUMC_ADDRT0	0x50f10
 
+/* UMC 6_1_2 register offsets */
+#define mmUMCCH0_0_EccErrCntSel_ARCT                 0x0360
+#define mmUMCCH0_0_EccErrCntSel_ARCT_BASE_IDX        1
+#define mmUMCCH0_0_EccErrCnt_ARCT                    0x0361
+#define mmUMCCH0_0_EccErrCnt_ARCT_BASE_IDX           1
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT           0x03c2
+#define mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT_BASE_IDX  1
+
 /*
  * (addr / 256) * 8192, the higher 26 bits in ErrorAddr
  * is the index of 8KB block
@@ -95,12 +103,25 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev,
 	uint64_t mc_umc_status;
 	uint32_t mc_umc_status_addr;
 
-	ecc_err_cnt_sel_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
-	ecc_err_cnt_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
-	mc_umc_status_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	}
 
 	/* select the lower chip and check the error count */
 	ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
@@ -141,8 +162,17 @@ static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev
 	uint64_t mc_umc_status;
 	uint32_t mc_umc_status_addr;
 
-	mc_umc_status_addr =
-                SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	}
 
 	/* check the MCUMC_STATUS */
 	mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset);
@@ -179,8 +209,17 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
 	uint64_t mc_umc_status, err_addr, retired_page;
 	struct eeprom_table_record *err_rec;
 
-	mc_umc_status_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+
+		mc_umc_status_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
+	}
 
 	/* skip error address process if -ENOMEM */
 	if (!err_data->err_addr) {
@@ -241,10 +280,21 @@ static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev,
 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
 	uint32_t ecc_err_cnt_addr;
 
-	ecc_err_cnt_sel_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
-	ecc_err_cnt_addr =
-		SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+	if (adev->asic_type == CHIP_ARCTURUS) {
+		/* UMC 6_1_2 registers */
+
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT);
+	} else {
+		/* UMC 6_1_1 registers */
+
+		ecc_err_cnt_sel_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel);
+		ecc_err_cnt_addr =
+			SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt);
+	}
 
 	/* select the lower chip and check the error count */
 	ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
index dab9cbd292c5..0ce1d323cfdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h
@@ -35,7 +35,8 @@
 /* total channel instances in one umc block */
 #define UMC_V6_1_TOTAL_CHANNEL_NUM	(UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM)
 /* UMC regiser per channel offset */
-#define UMC_V6_1_PER_CHANNEL_OFFSET		0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_VG20	0x800
+#define UMC_V6_1_PER_CHANNEL_OFFSET_ARCT	0x400
 
 /* EccErrCnt max value */
 #define UMC_V6_1_CE_CNT_MAX		0xffff
-- 
2.17.1