aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0255-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch
blob: c674a1e2e7c831230f8d9f79fde90736a7da22ef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
From 804871103c28d763b94a3488262169696452c0c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 26 Jan 2016 12:37:49 +0100
Subject: [PATCH 0255/1110] drm/amdgpu: optimize amdgpu_vm_update_ptes a bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't calculate the end address multiple times.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 ++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 21d918a..83bc8b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -577,6 +577,10 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 
 	unsigned count;
 
+        /* Abort early if there isn't anything to do */
+        if (pe_start == pe_end)
+                return;
+
 	/* system pages are non continuously */
 	if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
 
@@ -634,6 +638,9 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 	uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 	uint64_t last_pte = ~0, last_dst = ~0;
 	unsigned count = 0;
+        const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
+ 
+        uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
 	uint64_t addr;
 
 	/* walk over the address space and update the page tables */
@@ -641,40 +648,36 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
                 struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
 		unsigned nptes;
-		uint64_t pte;
+                uint64_t pe_start;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
 
-		pte = amdgpu_bo_gpu_offset(pt);
-		pte += (addr & mask) * 8;
+                pe_start = amdgpu_bo_gpu_offset(pt);
+                pe_start += (addr & mask) * 8;
 
-		if ((last_pte + 8 * count) != pte) {
+                if (last_pe_end != pe_start) {
 
-			if (count) {
-				amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
-						    last_pte, last_pte + 8 * count,
-						    last_dst, flags);
-			}
-
-			count = nptes;
-			last_pte = pte;
+                        amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+                                            last_pe_start, last_pe_end,
+                                            last_dst, flags);
+ 
+                        last_pe_start = pe_start;
+                        last_pe_end = pe_start + 8 * nptes;
 			last_dst = dst;
 		} else {
-			count += nptes;
+                        last_pe_end += 8 * nptes;
 		}
 
 		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	if (count) {
-		amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
-				    last_pte, last_pte + 8 * count,
-				    last_dst, flags);
-	}
+        amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+                            last_pe_start, last_pe_end,
+                            last_dst, flags);
 }
 
 /**
-- 
2.7.4