aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/2849-drm-amdgpu-update-one-PDE-at-a-time-v2.patch
blob: 6671ef134320f38862479063e62b7d3803167c76 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
From a2e6bc0038af78e5a7afbe39fcd4c9cecdcd7470 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 30 Nov 2017 15:19:50 +0100
Subject: [PATCH 2849/4131] drm/amdgpu: update one PDE at a time v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Horrible inefficient, but avoids problems when the root PD size becomes
to big.

v2: remove incr as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <davdi1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 80 +++++++++++++++-------------------
 1 file changed, 34 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cdf3fbb..3997b08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1078,18 +1078,20 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  * Makes sure all entries in @parent are up to date.
  * Returns 0 for success, error for failure.
  */
-static int amdgpu_vm_update_level(struct amdgpu_device *adev,
-				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent)
+static int amdgpu_vm_update_pde(struct amdgpu_device *adev,
+				struct amdgpu_vm *vm,
+				struct amdgpu_vm_pt *parent,
+				struct amdgpu_vm_pt *entry)
 {
+	struct amdgpu_pte_update_params params;
+	struct amdgpu_bo *bo = entry->base.bo;
 	struct amdgpu_bo *shadow;
 	struct amdgpu_ring *ring = NULL;
 	uint64_t pd_addr, shadow_addr = 0;
-	unsigned pt_idx, ndw = 0;
 	struct amdgpu_job *job;
-	struct amdgpu_pte_update_params params;
 	struct dma_fence *fence = NULL;
-	uint32_t incr;
+	unsigned ndw = 0;
+	uint64_t pde, pt;
 
 	int r;
 
@@ -1111,20 +1113,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		ring = container_of(vm->entity.sched, struct amdgpu_ring,
 				    sched);
 
-		/* padding, etc. */
+		/* should be sufficient for two commands plus padding, etc. */
 		ndw = 64;
 
-		/* assume the worst case */
-		ndw += parent->last_entry_used * 6;
-
 		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
-
-		if (shadow) {
+		if (shadow)
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
-			ndw *= 2;
-		} else {
+		else
 			shadow_addr = 0;
-		}
 
 		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
 		if (r)
@@ -1134,40 +1130,30 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		params.func = amdgpu_vm_do_set_ptes;
 	}
 
+	spin_lock(&vm->status_lock);
+	list_del_init(&entry->base.vm_status);
+	spin_unlock(&vm->status_lock);
 
-	/* walk over the address space and update the directory */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-		struct amdgpu_bo *bo = entry->base.bo;
-		uint64_t pde, pt;
-
-		if (bo == NULL)
-			continue;
-
-		spin_lock(&vm->status_lock);
-		list_del_init(&entry->base.vm_status);
-		spin_unlock(&vm->status_lock);
-
-		pt = amdgpu_bo_gpu_offset(bo);
-		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		/* Don't update huge pages here */
-		if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) ||
-		    parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID))
-			continue;
-
-		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
+	pt = amdgpu_bo_gpu_offset(bo);
+	pt = amdgpu_gart_get_vm_pde(adev, pt);
+	/* Don't update huge pages here */
+	if (entry->addr & AMDGPU_PDE_PTE ||
+	    entry->addr == (pt | AMDGPU_PTE_VALID)) {
+		if (!vm->use_cpu_for_update)
+			amdgpu_job_free(job);
+		return 0;
+	}
 
-		incr = amdgpu_bo_size(bo);
-		if (shadow) {
-			pde = shadow_addr + pt_idx * 8;
-			params.func(&params, pde, pt, 1, incr,
-				    AMDGPU_PTE_VALID);
-		}
+	entry->addr = pt | AMDGPU_PTE_VALID;
 
-		pde = pd_addr + pt_idx * 8;
-		params.func(&params, pde, pt, 1, incr, AMDGPU_PTE_VALID);
+	if (shadow) {
+		pde = shadow_addr + (entry - parent->entries) * 8;
+		params.func(&params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
 	}
 
+	pde = pd_addr + (entry - parent->entries) * 8;
+	params.func(&params, pde, pt, 1, 0, AMDGPU_PTE_VALID);
+
 	if (!vm->use_cpu_for_update) {
 		if (params.ib->length_dw == 0) {
 			amdgpu_job_free(job);
@@ -1258,14 +1244,16 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 		bo = bo_base->bo->parent;
 		if (bo) {
 			struct amdgpu_vm_bo_base *parent;
-			struct amdgpu_vm_pt *pt;
+			struct amdgpu_vm_pt *pt, *entry;
 
 			parent = list_first_entry(&bo->va,
 						  struct amdgpu_vm_bo_base,
 						  bo_list);
 			pt = container_of(parent, struct amdgpu_vm_pt, base);
+			entry = container_of(bo_base, struct amdgpu_vm_pt,
+					     base);
 
-			r = amdgpu_vm_update_level(adev, vm, pt);
+			r = amdgpu_vm_update_pde(adev, vm, pt, entry);
 			if (r) {
 				amdgpu_vm_invalidate_level(vm, &vm->root);
 				return r;
-- 
2.7.4