aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/3770-drm-amdgpu-initialize-ras-structures-for-xgmi-block-.patch
blob: 81cd462532e72d7c087989746d47025c8b58ea6e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
From e3a7a6666e4bc09e679b68a0dd43113e595c03c6 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Tue, 10 Sep 2019 11:13:39 +0800
Subject: [PATCH 3770/4256] drm/amdgpu: initialize ras structures for xgmi
 block (v2)

init ras common interface and fs node for xgmi block

v2: remove unnecessary physical node number check before
invoking amdgpu_xgmi_ras_late_init

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h  |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 36 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    |  4 ++-
 4 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index e03351f0ba5a..df012766d907 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -120,6 +120,7 @@ struct amdgpu_xgmi {
 	/* gpu list in the same hive */
 	struct list_head head;
 	bool supported;
+	struct ras_common_if *ras_if;
 };
 
 struct amdgpu_gmc {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 83be108f57bd..0fa1d2416b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -25,6 +25,7 @@
 #include "amdgpu.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_smu.h"
+#include "amdgpu_ras.h"
 #include "df/df_3_6_offset.h"
 
 static DEFINE_MUTEX(xgmi_mutex);
@@ -436,3 +437,38 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 		mutex_unlock(&hive->hive_lock);
 	}
 }
+
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
+{
+	int r;
+	struct ras_ih_if ih_info = {
+		.cb = NULL,
+	};
+	struct ras_fs_if fs_info = {
+		.sysfs_name = "xgmi_wafl_err_count",
+		.debugfs_name = "xgmi_wafl_err_inject",
+	};
+
+	if (!adev->gmc.xgmi.supported ||
+	    adev->gmc.xgmi.num_physical_nodes == 0)
+		return 0;
+
+	if (!adev->gmc.xgmi.ras_if) {
+		adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+		if (!adev->gmc.xgmi.ras_if)
+			return -ENOMEM;
+		adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
+		adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+		adev->gmc.xgmi.ras_if->sub_block_index = 0;
+		strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
+	}
+	ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
+	r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
+				 &fs_info, &ih_info);
+	if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
+		kfree(adev->gmc.xgmi.ras_if);
+		adev->gmc.xgmi.ras_if = NULL;
+	}
+
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index fbcee31788c4..9023789397c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -42,6 +42,7 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
 int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
 int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
 		struct amdgpu_device *peer_adev);
+int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
 
 static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
 		struct amdgpu_device *bo_adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 64cc3068873a..3dde208fa0c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -51,6 +51,7 @@
 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
 
 #include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
 
 /* add these here since we already include dce12 headers and these are for DCN */
 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
@@ -804,7 +805,8 @@ static int gmc_v9_0_ecc_late_init(void *handle)
 		if (r)
 			return r;
 	}
-	return 0;
+
+	return amdgpu_xgmi_ras_late_init(adev);
 }
 
 static int gmc_v9_0_late_init(void *handle)
-- 
2.17.1