aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/0869-drm-amdgpu-add-IOCTL-interface-for-per-VM-BOs-v3.patch
blob: 245bb803104b185f84466e3c96fe0b72f53ee84c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
From 9c187d95a7b1a1dcabe325a4fdffa02655d5f555 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 25 Aug 2017 09:14:43 +0200
Subject: [PATCH 0869/4131] drm/amdgpu: add IOCTL interface for per VM BOs v3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the IOCTL interface so that applications can allocate per VM BOs.

Still WIP since not all corner cases are tested yet, but this reduces average
CS overhead for 10K BOs from 21ms down to 48us.

v2: add some extra checks, remove the WIP tag
v3: rename new flag to AMDGPU_GEM_CREATE_VM_ALWAYS_VALID

Change-Id: I1746d681781b20d6240a85a7f681df9afb245d11
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

 Conflicts:
        drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   | 61 +++++++++++++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  3 +-
 include/uapi/drm/amdgpu_drm.h             |  2 +
 5 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ecd9d37..fa17a10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -468,9 +468,10 @@ struct amdgpu_sa_bo {
  */
 void amdgpu_gem_force_release(struct amdgpu_device *adev);
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj);
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj);
 
 int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *dev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 950af13..4f3bf7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -144,7 +144,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
-				       true, &gobj);
+				       true, NULL, &gobj);
 	if (ret) {
 		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 768de03..b6ce2b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -51,9 +51,10 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 }
 
 int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
-				int alignment, u32 initial_domain,
-				u64 flags, bool kernel,
-				struct drm_gem_object **obj)
+			     int alignment, u32 initial_domain,
+			     u64 flags, bool kernel,
+			     struct reservation_object *resv,
+			     struct drm_gem_object **obj)
 {
 	struct amdgpu_bo *robj;
 	struct amdgpu_gem_object *gobj;
@@ -85,7 +86,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 
 retry:
 	r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
-			     flags, NULL, NULL, 0, &robj);
+			     flags, NULL, resv, 0, &robj);
 	if (r) {
 		if (r != -ERESTARTSYS) {
 			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
@@ -162,6 +163,10 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	if (mm && mm != current->mm)
 		return -EPERM;
 
+	if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
+	    abo->tbo.resv != vm->root.base.bo->tbo.resv)
+		return -EPERM;
+
 	r = amdgpu_bo_reserve(abo, false);
 	if (r)
 		return r;
@@ -185,13 +190,14 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	struct amdgpu_vm *vm = &fpriv->vm;
 
 	struct amdgpu_bo_list_entry vm_pd;
-	struct list_head list;
+	struct list_head list, duplicates;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
 	struct amdgpu_bo_va *bo_va;
 	int r;
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
 	tv.shared = true;
@@ -199,7 +205,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@@ -234,9 +240,12 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	union drm_amdgpu_gem_create *args = data;
 	uint64_t flags = args->in.domain_flags;
 	uint64_t size = args->in.bo_size;
+	struct reservation_object *resv = NULL;
 	struct drm_gem_object *gobj;
 	uint32_t handle;
 	int r;
@@ -246,7 +255,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 		      AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
 		      AMDGPU_GEM_CREATE_CPU_GTT_USWC |
 		      AMDGPU_GEM_CREATE_VRAM_CLEARED |
-		      AMDGPU_GEM_CREATE_NO_EVICT))
+		      AMDGPU_GEM_CREATE_NO_EVICT |
+		      AMDGPU_GEM_CREATE_VM_ALWAYS_VALID))
 		return -EINVAL;
 
 	/* reject invalid gem domains */
@@ -274,9 +284,25 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 	}
 	size = roundup(size, PAGE_SIZE);
 
+	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+		r = amdgpu_bo_reserve(vm->root.base.bo, false);
+		if (r)
+			return r;
+
+		resv = vm->root.base.bo->tbo.resv;
+	}
+
 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 				     (u32)(0xffffffff & args->in.domains),
-				     flags, false, &gobj);
+				     flags, false, resv, &gobj);
+	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
+		if (!r) {
+			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
+
+			abo->parent = amdgpu_bo_ref(vm->root.base.bo);
+		}
+		amdgpu_bo_unreserve(vm->root.base.bo);
+	}
 	if (r)
 		return r;
 
@@ -388,9 +414,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	}
 
 	/* create a gem object to contain this object in */
-	r = amdgpu_gem_object_create(adev, args->size, 0,
-				     AMDGPU_GEM_DOMAIN_CPU, 0,
-				     0, &gobj);
+	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
+				     0, 0, NULL, &gobj);
 	if (r)
 		return r;
 
@@ -465,7 +490,7 @@ int amdgpu_gem_dgma_ioctl(struct drm_device *dev, void *data,
 		/* create a gem object to contain this object in */
 		r = amdgpu_gem_object_create(adev, args->size, 0,
 					     AMDGPU_GEM_DOMAIN_DGMA_IMPORT, 0,
-					     0, &gobj);
+					     0, NULL, &gobj);
 		if (r)
 			return r;
 
@@ -699,7 +724,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_bo_list_entry vm_pd;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
-	struct list_head list;
+	struct list_head list, duplicates;
 	uint64_t va_flags;
 	int r = 0;
 
@@ -735,6 +760,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	}
 
 	INIT_LIST_HEAD(&list);
+	INIT_LIST_HEAD(&duplicates);
 	if ((args->operation != AMDGPU_VA_OP_CLEAR) &&
 	    !(args->flags & AMDGPU_VM_PAGE_PRT)) {
 		gobj = drm_gem_object_lookup(filp, args->handle);
@@ -751,7 +777,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
 	if (r)
 		goto error_unref;
 
@@ -817,6 +843,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *filp)
 {
+	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_amdgpu_gem_op *args = data;
 	struct drm_gem_object *gobj;
 	struct amdgpu_bo *robj;
@@ -864,6 +891,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
 			robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
 
+		if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+			amdgpu_vm_bo_invalidate(adev, robj, true);
+
 		amdgpu_bo_unreserve(robj);
 		break;
 	default:
@@ -893,8 +923,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 	r = amdgpu_gem_object_create(adev, args->size, 0,
 				     AMDGPU_GEM_DOMAIN_VRAM,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
-				     ttm_bo_type_device,
-				     &gobj);
+				     false, NULL, &gobj);
 	if (r)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 9f460de..d719434 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -154,7 +154,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
 
-	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
 		return ERR_PTR(-EPERM);
 
 	return drm_gem_prime_export(dev, gobj, flags);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 49eb2e1..36a4c1a 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -101,6 +101,8 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_SHADOW		(1 << 4)
 /* Flag that allocating the BO should use linear VRAM */
 #define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS	(1 << 5)
+/* Flag that BO is always valid in this VM */
+#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID       (1 << 6)
 
 /* hybrid specific */
 /* Flag that the memory allocation should be from top of domain */
-- 
2.7.4