1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
From 4f712c43a25f97247ad8b2aaa32c255b09704004 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Thu, 31 Oct 2019 14:15:29 +0800
Subject: [PATCH 4362/4736] drm/amdgpu: fix possible pstate switch race
condition
Added lock protection so that the p-state switch will
be guarded to be sequential. Also update the hive
pstate only all device from the hive are in the same
state.
Change-Id: I165a6f44e8aec1e6da56eefa0fc49d36670e56fe
Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 34 ++++++++++++++++++++++--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2eb3a6bcbd6c..715739799383 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1025,6 +1025,9 @@ struct amdgpu_device {
uint64_t unique_id;
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
+
+ /* device pstate */
+ int pstate;
};
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 44a0ee91b42d..e58bad7f64c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -274,12 +274,18 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
int ret = 0;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
+ struct amdgpu_device *tmp_adev;
+ bool update_hive_pstate = true;
if (!hive)
return 0;
- if (hive->pstate == pstate)
+ mutex_lock(&hive->hive_lock);
+
+ if (hive->pstate == pstate) {
+ mutex_unlock(&hive->hive_lock);
return 0;
+ }
dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate);
@@ -290,11 +296,32 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle,
pstate);
- if (ret)
+ if (ret) {
dev_err(adev->dev,
"XGMI: Set pstate failure on device %llx, hive %llx, ret %d",
adev->gmc.xgmi.node_id,
adev->gmc.xgmi.hive_id, ret);
+ goto out;
+ }
+
+ /* Update device pstate */
+ adev->pstate = pstate;
+
+ /*
+ * Update the hive pstate only all devices of the hive
+ * are in the same pstate
+ */
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+ if (tmp_adev->pstate != adev->pstate) {
+ update_hive_pstate = false;
+ break;
+ }
+ }
+ if (update_hive_pstate)
+ hive->pstate = pstate;
+
+out:
+ mutex_unlock(&hive->hive_lock);
return ret;
}
@@ -369,6 +396,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
+ /* Set default device pstate */
+ adev->pstate = -1;
+
top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
--
2.17.1
|