1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
From 49b0461e86d2ae3044f3ae6296831d170e9ad126 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 3 Jan 2018 15:44:36 -0500
Subject: [PATCH 3052/4131] drm/amdkfd: Consolidate duplicate memory banks info
in topology
If there are several memory banks that has the same properties in CRAT,
we aggregate them into one memory bank.
Change-Id: I37978351fcf32b598164200f2b33ebdf82ac39d7
Signed-off-by: Yong Zhao <yong.zhao@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 57 ++++++++++++++++++++++++++++-------
1 file changed, 46 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 1664ba3..8c254328 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -173,6 +173,21 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu,
return 0;
}
+static struct kfd_mem_properties *
+find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width,
+ struct kfd_topology_device *dev)
+{
+ struct kfd_mem_properties *props;
+
+ list_for_each_entry(props, &dev->mem_props, list) {
+ if (props->heap_type == heap_type
+ && props->flags == flags
+ && props->width == width)
+ return props;
+ }
+
+ return NULL;
+}
/* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct
* topology device present in the device_list
*/
@@ -181,36 +196,56 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
{
struct kfd_mem_properties *props;
struct kfd_topology_device *dev;
+ uint32_t heap_type;
+ uint64_t size_in_bytes;
+ uint32_t flags = 0;
+ uint32_t width;
pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
mem->proximity_domain);
list_for_each_entry(dev, device_list, list) {
if (mem->proximity_domain == dev->proximity_domain) {
- props = kfd_alloc_struct(props);
- if (!props)
- return -ENOMEM;
-
/* We're on GPU node */
if (dev->node_props.cpu_cores_count == 0) {
/* APU */
if (mem->visibility_type == 0)
- props->heap_type =
+ heap_type =
HSA_MEM_HEAP_TYPE_FB_PRIVATE;
/* dGPU */
else
- props->heap_type = mem->visibility_type;
+ heap_type = mem->visibility_type;
} else
- props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
+ heap_type = HSA_MEM_HEAP_TYPE_SYSTEM;
if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE)
- props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
+ flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE;
if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
- props->flags |= HSA_MEM_FLAGS_NON_VOLATILE;
+ flags |= HSA_MEM_FLAGS_NON_VOLATILE;
- props->size_in_bytes =
+ size_in_bytes =
((uint64_t)mem->length_high << 32) +
mem->length_low;
- props->width = mem->width;
+ width = mem->width;
+
+ /* Multiple banks of the same type are aggregated into
+ * one. User mode doesn't care about multiple physical
+ * memory segments. It's managed as a single virtual
+ * heap for user mode.
+ */
+ props = find_subtype_mem(heap_type, flags, width, dev);
+ if (props) {
+ props->size_in_bytes += size_in_bytes;
+ break;
+ }
+
+ props = kfd_alloc_struct(props);
+ if (!props)
+ return -ENOMEM;
+
+ props->heap_type = heap_type;
+ props->flags = flags;
+ props->size_in_bytes = size_in_bytes;
+ props->width = width;
dev->node_props.mem_banks_count++;
list_add_tail(&props->list, &dev->mem_props);
--
2.7.4
|