1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
From 10df2f98d08221484c41535cd10aeb05845c58ab Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Mon, 11 Mar 2019 14:12:40 +0800
Subject: [PATCH 1605/2940] drm/amdgpu: add new member hw_supported
Currently, it is not clear how ras is supported. Both software and
hardware can set the supported. That is confusing.
Fix it by adding new member hw_supported.
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 42 ++++++++++++++++++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 ++
2 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 750087535f00..74a65a61fd23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -469,7 +469,9 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
struct ras_common_if *head)
{
- return amdgpu_ras_enable && (amdgpu_ras_mask & BIT(head->block));
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ return con->hw_supported & BIT(head->block);
}
static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
@@ -490,6 +492,12 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+ /* If hardware does not support ras, then do not create obj.
+ * But if hardware support ras, we can create the obj.
+ * Ras framework checks con->hw_supported to see if it need do
+ * corresponding initialization.
+ * IP checks con->support to see if it need disable ras.
+ */
if (!amdgpu_ras_is_feature_allowed(adev, head))
return 0;
if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head)))
@@ -1334,27 +1342,36 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
}
/* recovery end */
-static uint32_t amdgpu_ras_check_supported(struct amdgpu_device *adev)
+/*
+ * check hardware's ras ability which will be saved in hw_supported.
+ * if hardware does not support ras, we can skip some ras initializtion and
+ * forbid some ras operations from IP.
+ * if software itself, say boot parameter, limit the ras ability. We still
+ * need allow IP do some limited operations, like disable. In such case,
+ * we have to initialize ras as normal. but need check if operation is
+ * allowed or not in each function.
+ */
+static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
+ uint32_t *hw_supported, uint32_t *supported)
{
- uint32_t supported = 0;
+ *hw_supported = 0;
+ *supported = 0;
- if (amdgpu_ras_enable == 0 ||
- amdgpu_sriov_vf(adev) ||
+ if (amdgpu_sriov_vf(adev) ||
adev->asic_type != CHIP_VEGA20)
- return 0;
+ return;
if (amdgpu_atomfirmware_mem_ecc_supported(adev) ||
- amdgpu_atomfirmware_sram_ecc_supported(adev)) {
- supported = AMDGPU_RAS_BLOCK_MASK;
- }
+ amdgpu_atomfirmware_sram_ecc_supported(adev))
+ *hw_supported = AMDGPU_RAS_BLOCK_MASK;
- return supported & amdgpu_ras_mask;
+ *supported = amdgpu_ras_enable == 0 ?
+ 0 : *hw_supported & amdgpu_ras_mask;
}
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- uint32_t supported = amdgpu_ras_check_supported(adev);
if (con)
return 0;
@@ -1369,7 +1386,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
amdgpu_ras_set_context(adev, con);
- con->supported = supported;
+ amdgpu_ras_check_supported(adev, &con->hw_supported,
+ &con->supported);
con->features = 0;
INIT_LIST_HEAD(&con->head);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 02cb9a13ddc5..2b6077762b91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -81,6 +81,9 @@ typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
struct amdgpu_ras {
/* ras infrastructure */
+ /* for ras itself. */
+ uint32_t hw_supported;
+ /* for IP to check its ras ability. */
uint32_t supported;
uint32_t features;
struct list_head head;
--
2.17.1
|