aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0991-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch
blob: 09eb70e9cbf456de2d79c28adcd6e36b2e5b1cc0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
From 3a22b61d8de6db7c5f14d00135390f473fd344e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 26 Jan 2016 12:37:49 +0100
Subject: [PATCH 0991/1565] drm/amdgpu: optimize amdgpu_vm_update_ptes a bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't calculate the end address multiple times.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0aa43c5..eb38aa2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -568,6 +568,10 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 
 	unsigned count;
 
+	/* Abort early if there isn't anything to do */
+	if (pe_start == pe_end)
+		return;
+
 	/* system pages are non continuously */
 	if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
 
@@ -622,9 +626,9 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint32_t flags)
 {
-	uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
-	uint64_t last_pte = ~0, last_dst = ~0;
-	unsigned count = 0;
+	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
+
+	uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
 	uint64_t addr;
 
 	/* walk over the address space and update the page tables */
@@ -632,40 +636,36 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
 		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
 		unsigned nptes;
-		uint64_t pte;
+		uint64_t pe_start;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
 
-		pte = amdgpu_bo_gpu_offset(pt);
-		pte += (addr & mask) * 8;
+		pe_start = amdgpu_bo_gpu_offset(pt);
+		pe_start += (addr & mask) * 8;
 
-		if ((last_pte + 8 * count) != pte) {
+		if (last_pe_end != pe_start) {
 
-			if (count) {
-				amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
-						    last_pte, last_pte + 8 * count,
-						    last_dst, flags);
-			}
+			amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+					    last_pe_start, last_pe_end,
+					    last_dst, flags);
 
-			count = nptes;
-			last_pte = pte;
+			last_pe_start = pe_start;
+			last_pe_end = pe_start + 8 * nptes;
 			last_dst = dst;
 		} else {
-			count += nptes;
+			last_pe_end += 8 * nptes;
 		}
 
 		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	if (count) {
-		amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
-				    last_pte, last_pte + 8 * count,
-				    last_dst, flags);
-	}
+	amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+			    last_pe_start, last_pe_end,
+			    last_dst, flags);
 }
 
 /**
-- 
1.9.1