aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/0769-drm-amdgpu-implement-direct-gma-domains.patch
blob: d74d976be90287b883c4fe63a0f656e5d74cb003 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
From fe09190b5611da2a46d4d3705951d5f5783279cf Mon Sep 17 00:00:00 2001
From: Flora Cui <Flora.Cui@amd.com>
Date: Thu, 1 Sep 2016 19:27:42 +0800
Subject: [PATCH 0769/4131] drm/amdgpu: implement direct gma domains

Change-Id: Iadfb62f3ec78fed2adc365d857e5c47b31fbfb4a
Signed-off-by: Flora Cui <Flora.Cui@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>

 Conflicts:
        drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 23 ++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 81 +++++++++++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  4 ++
 include/uapi/drm/amdgpu_drm.h              | 31 ++++++++++++
 5 files changed, 135 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 4a9e85d..4a57461 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -73,6 +73,22 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 {
 	u32 c = 0;
 
+	if ((domain & AMDGPU_GEM_DOMAIN_DGMA) && amdgpu_direct_gma_size) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+			AMDGPU_PL_FLAG_DGMA | TTM_PL_FLAG_NO_EVICT;
+		c++;
+	}
+
+	if ((domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT) && amdgpu_direct_gma_size) {
+		places[c].fpfn = 0;
+		places[c].lpfn = 0;
+		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+			AMDGPU_PL_FLAG_DGMA_IMPORT | TTM_PL_FLAG_NO_EVICT;
+		c++;
+	}
+
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
 
@@ -351,7 +367,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 					 AMDGPU_GEM_DOMAIN_CPU |
 					 AMDGPU_GEM_DOMAIN_GDS |
 					 AMDGPU_GEM_DOMAIN_GWS |
-					 AMDGPU_GEM_DOMAIN_OA);
+					 AMDGPU_GEM_DOMAIN_OA |
+					 AMDGPU_GEM_DOMAIN_DGMA |
+					 AMDGPU_GEM_DOMAIN_DGMA_IMPORT);
 	bo->allowed_domains = bo->preferred_domains;
 	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
@@ -431,7 +449,8 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	if (type == ttm_bo_type_device)
 		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 
-	if ((flags & AMDGPU_GEM_CREATE_NO_EVICT) && amdgpu_no_evict) {
+	if (((flags & AMDGPU_GEM_CREATE_NO_EVICT) && amdgpu_no_evict) ||
+	    domain & (AMDGPU_GEM_DOMAIN_DGMA | AMDGPU_GEM_DOMAIN_DGMA_IMPORT)) {
 		r = amdgpu_bo_reserve(bo, false);
 		if (unlikely(r != 0))
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index a288fa6..78eacc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -109,6 +109,10 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type)
 		return AMDGPU_GEM_DOMAIN_GWS;
 	case AMDGPU_PL_OA:
 		return AMDGPU_GEM_DOMAIN_OA;
+	case AMDGPU_PL_DGMA:
+		return AMDGPU_GEM_DOMAIN_DGMA;
+	case AMDGPU_PL_DGMA_IMPORT:
+		return AMDGPU_GEM_DOMAIN_DGMA_IMPORT;
 	default:
 		break;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8196fff..7edea7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -184,6 +184,23 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		man->available_caching = TTM_PL_FLAG_UNCACHED;
 		man->default_caching = TTM_PL_FLAG_UNCACHED;
 		break;
+	case AMDGPU_PL_DGMA:
+		/* reserved visible VRAM for direct GMA */
+		man->func = &ttm_bo_manager_func;
+		man->gpu_offset = amdgpu_bo_gpu_offset(adev->direct_gma.dgma_bo);
+		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
+		man->default_caching = TTM_PL_FLAG_WC;
+		break;
+	case AMDGPU_PL_DGMA_IMPORT:
+		/* reserved GTT space for direct GMA */
+		man->func = &ttm_bo_manager_func;
+		man->gpu_offset = bdev->man[TTM_PL_TT].gpu_offset +
+				  (adev->direct_gma.gart_mem.start << PAGE_SHIFT);
+		man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
+		man->default_caching = TTM_PL_FLAG_WC;
+		break;
 	default:
 		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
 		return -EINVAL;
@@ -212,6 +229,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	abo = container_of(bo, struct amdgpu_bo, tbo);
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
+	case AMDGPU_PL_DGMA:
 		if (adev->mman.buffer_funcs &&
 		    adev->mman.buffer_funcs_ring &&
 		    adev->mman.buffer_funcs_ring->ready == false) {
@@ -249,6 +267,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 		}
 		break;
 	case TTM_PL_TT:
+	case AMDGPU_PL_DGMA_IMPORT:
 	default:
 		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 	}
@@ -488,6 +507,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 	if (WARN_ON_ONCE(abo->pin_count > 0))
 		return -EINVAL;
 
+	if (old_mem->mem_type == AMDGPU_GEM_DOMAIN_DGMA ||
+	    old_mem->mem_type == AMDGPU_GEM_DOMAIN_DGMA_IMPORT)
+		return -EINVAL;
+
 	adev = amdgpu_ttm_adev(bo->bdev);
 
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
@@ -547,7 +570,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+	struct ttm_mem_reg backup;
 
+	backup = *mem;
 	mem->bus.addr = NULL;
 	mem->bus.offset = 0;
 	mem->bus.size = mem->num_pages << PAGE_SHIFT;
@@ -562,11 +587,20 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 	case TTM_PL_TT:
 		break;
 	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		/* check if it's visible */
-		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
-			return -EINVAL;
-		mem->bus.base = adev->mc.aper_base;
+	case AMDGPU_PL_DGMA:
+	case AMDGPU_PL_DGMA_IMPORT:
+		if (mem->mem_type != AMDGPU_PL_DGMA_IMPORT) {
+			mem->bus.offset = (mem->start << PAGE_SHIFT) + man->gpu_offset -
+					adev->mc.vram_start;
+			/* check if it's visible */
+			if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
+				return -EINVAL;
+			mem->bus.base = adev->mc.aper_base;
+		} else {
+			mem->bus.offset = backup.bus.offset;
+			mem->bus.base = backup.bus.base;
+		}
+
 		mem->bus.is_iomem = true;
 		break;
 	default:
@@ -1238,9 +1272,25 @@ static int amdgpu_direct_gma_init(struct amdgpu_device *adev)
 		goto error_free;
 
 	adev->gart_pin_size += size;
+
+	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_DGMA, size >> PAGE_SHIFT);
+	if (unlikely(r))
+		goto error_put_node;
+
+	r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_DGMA_IMPORT, size >> PAGE_SHIFT);
+	if (unlikely(r))
+		goto error_release_mm;
+
 	DRM_INFO("%dMB VRAM/GTT reserved for Direct GMA\n", amdgpu_direct_gma_size);
 	return 0;
 
+error_release_mm:
+	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_DGMA);
+
+error_put_node:
+	(*man->func->put_node)(man, &adev->direct_gma.gart_mem);
+	adev->gart_pin_size -= size;
+
 error_free:
 	amdgpu_bo_unref(&abo);
 
@@ -1259,6 +1309,9 @@ static void amdgpu_direct_gma_fini(struct amdgpu_device *adev)
 	if (amdgpu_direct_gma_size == 0)
 		return;
 
+	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_DGMA);
+	ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_DGMA_IMPORT);
+
 	r = amdgpu_bo_reserve(adev->direct_gma.dgma_bo, false);
 	if (r == 0) {
 		amdgpu_bo_unpin(adev->direct_gma.dgma_bo);
@@ -1691,6 +1744,8 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data)
 
 static int ttm_pl_vram = TTM_PL_VRAM;
 static int ttm_pl_tt = TTM_PL_TT;
+static int ttm_pl_dgma = AMDGPU_PL_DGMA;
+static int ttm_pl_dgma_import = AMDGPU_PL_DGMA_IMPORT;
 
 static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
 	{"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram},
@@ -1701,6 +1756,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = {
 #endif
 };
 
+static const struct drm_info_list amdgpu_ttm_dgma_debugfs_list[] = {
+	{"amdgpu_dgma_mm", amdgpu_mm_dump_table, 0, &ttm_pl_dgma},
+	{"amdgpu_dgma_import_mm", amdgpu_mm_dump_table, 0, &ttm_pl_dgma_import}
+};
+
 static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 				    size_t size, loff_t *pos)
 {
@@ -1801,6 +1861,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
 	unsigned count;
+	int r;
 
 	struct drm_minor *minor = adev->ddev->primary;
 	struct dentry *ent, *root = minor->debugfs_root;
@@ -1828,7 +1889,15 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 		--count;
 #endif
 
-	return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
+	r = amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count);
+	if (unlikely(r))
+		return r;
+
+	if (amdgpu_direct_gma_size)
+		r = amdgpu_debugfs_add_files(adev, amdgpu_ttm_dgma_debugfs_list,
+					ARRAY_SIZE(amdgpu_ttm_dgma_debugfs_list));
+
+	return r;
 #else
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 557829a..be4c98a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -29,10 +29,14 @@
 #define AMDGPU_PL_GDS		(TTM_PL_PRIV + 0)
 #define AMDGPU_PL_GWS		(TTM_PL_PRIV + 1)
 #define AMDGPU_PL_OA		(TTM_PL_PRIV + 2)
+#define AMDGPU_PL_DGMA		(TTM_PL_PRIV + 3)
+#define AMDGPU_PL_DGMA_IMPORT	(TTM_PL_PRIV + 4)
 
 #define AMDGPU_PL_FLAG_GDS		(TTM_PL_FLAG_PRIV << 0)
 #define AMDGPU_PL_FLAG_GWS		(TTM_PL_FLAG_PRIV << 1)
 #define AMDGPU_PL_FLAG_OA		(TTM_PL_FLAG_PRIV << 2)
+#define AMDGPU_PL_FLAG_DGMA		(TTM_PL_FLAG_PRIV << 3)
+#define AMDGPU_PL_FLAG_DGMA_IMPORT	(TTM_PL_FLAG_PRIV << 4)
 
 #define AMDGPU_GTT_MAX_TRANSFER_SIZE	512
 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS	2
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 30a0693..16474c6 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -80,6 +80,8 @@ extern "C" {
 #define AMDGPU_GEM_DOMAIN_GDS		0x8
 #define AMDGPU_GEM_DOMAIN_GWS		0x10
 #define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_DGMA          0x40
+#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT   0x80
 
 /* Flag that CPU access will be required for the case of VRAM domain */
 #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
@@ -94,6 +96,12 @@ extern "C" {
 /* Flag that allocating the BO should use linear VRAM */
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
 
+/* hybrid specific */
+/* Flag that the memory allocation should be from top of domain */
+#define AMDGPU_GEM_CREATE_TOP_DOWN              (1 << 30)
+/* Flag that the memory allocation should be pinned */
+#define AMDGPU_GEM_CREATE_NO_EVICT              (1 << 31)
+
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
 	__u64 bo_size;
@@ -618,6 +626,14 @@ struct drm_amdgpu_cs_chunk_data {
 /* Number of VRAM page faults on CPU access. */
 #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
 
+/* Hybrid Stack Specific Defs*/
+/* gpu capability */
+#define AMDGPU_INFO_CAPABILITY                  0x50
+/* virtual range */
+#define AMDGPU_INFO_VIRTUAL_RANGE               0x51
+/* query pin memory capability */
+#define AMDGPU_CAPABILITY_PIN_MEM_FLAG  (1 << 0)
+
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
 #define AMDGPU_INFO_MMR_SH_INDEX_SHIFT	8
@@ -674,6 +690,11 @@ struct drm_amdgpu_info {
 			__u32 flags;
 		} read_mmr_reg;
 
+                struct {
+                        uint32_t aperture;
+                        uint32_t _pad;
+                } virtual_range;
+
 		struct drm_amdgpu_query_fw query_fw;
 
 		struct {
@@ -881,6 +902,16 @@ struct drm_amdgpu_info_vce_clock_table {
 #define AMDGPU_FAMILY_AI			141 /* Vega10 */
 #define AMDGPU_FAMILY_RV			142 /* Raven */
 
+/**
+ *  Definition of System Unified Address (SUA) apertures
+ */
+#define AMDGPU_SUA_APERTURE_PRIVATE    1
+#define AMDGPU_SUA_APERTURE_SHARED     2
+struct drm_amdgpu_virtual_range {
+        uint64_t start;
+        uint64_t end;
+};
+
 /*
  * Definition of free sync enter and exit signals
  * We may have more options in the future
-- 
2.7.4