1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
From f7af664bf62fc3f66ac1435165116700552b4c60 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Fri, 22 Nov 2019 18:39:11 +0800
Subject: [PATCH 4660/4736] drm/amdgpu: clear uncorrectable parity error status
bit
This should be cleared during every nbif uncorrectable error cleanup work.
Change-Id: If5de1fa2779d012ad8b20de03e19251d0d590fa2
Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 9a3a65a0691c..bb701dbfd472 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -482,10 +482,12 @@ static int nbio_v7_4_init_ras_err_event_athub_interrupt (struct amdgpu_device *a
return 0;
}
+#define smnPARITY_ERROR_STATUS_UNCORR_GRP2 0x13a20030
+
static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
- uint32_t global_sts, central_sts, int_eoi;
+ uint32_t global_sts, central_sts, int_eoi, parity_sts;
uint32_t corr, fatal, non_fatal;
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -494,6 +496,7 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO, ParityErrFatal);
non_fatal = REG_GET_FIELD(global_sts, RAS_GLOBAL_STATUS_LO,
ParityErrNonFatal);
+ parity_sts = RREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2);
if (corr)
err_data->ce_count++;
@@ -505,6 +508,11 @@ static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
/* clear error status register */
WREG32_PCIE(smnRAS_GLOBAL_STATUS_LO, global_sts);
+ if (fatal)
+ /* clear parity fatal error indication field */
+ WREG32_PCIE(smnPARITY_ERROR_STATUS_UNCORR_GRP2,
+ parity_sts);
+
if (REG_GET_FIELD(central_sts, BIFL_RAS_CENTRAL_STATUS,
BIFL_RasContller_Intr_Recv)) {
/* clear interrupt status register */
--
2.17.1
|