1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
From 804871103c28d763b94a3488262169696452c0c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 26 Jan 2016 12:37:49 +0100
Subject: [PATCH 0255/1110] drm/amdgpu: optimize amdgpu_vm_update_ptes a bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't calculate the end address multiple times.
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 ++++++++++++++++++----------------
1 file changed, 21 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 21d918a..83bc8b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -577,6 +577,10 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
unsigned count;
+ /* Abort early if there isn't anything to do */
+ if (pe_start == pe_end)
+ return;
+
/* system pages are non continuously */
if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
@@ -634,6 +638,9 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
uint64_t last_pte = ~0, last_dst = ~0;
unsigned count = 0;
+ const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
+
+ uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
uint64_t addr;
/* walk over the address space and update the page tables */
@@ -641,40 +648,36 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
uint64_t pt_idx = addr >> amdgpu_vm_block_size;
struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
unsigned nptes;
- uint64_t pte;
+ uint64_t pe_start;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
- pte = amdgpu_bo_gpu_offset(pt);
- pte += (addr & mask) * 8;
+ pe_start = amdgpu_bo_gpu_offset(pt);
+ pe_start += (addr & mask) * 8;
- if ((last_pte + 8 * count) != pte) {
+ if (last_pe_end != pe_start) {
- if (count) {
- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
- last_pte, last_pte + 8 * count,
- last_dst, flags);
- }
-
- count = nptes;
- last_pte = pte;
+ amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+ last_pe_start, last_pe_end,
+ last_dst, flags);
+
+ last_pe_start = pe_start;
+ last_pe_end = pe_start + 8 * nptes;
last_dst = dst;
} else {
- count += nptes;
+ last_pe_end += 8 * nptes;
}
addr += nptes;
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
}
- if (count) {
- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
- last_pte, last_pte + 8 * count,
- last_dst, flags);
- }
+ amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+ last_pe_start, last_pe_end,
+ last_dst, flags);
}
/**
--
2.7.4
|