aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/files/0255-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/files/0255-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch')
-rw-r--r--common/recipes-kernel/linux/files/0255-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch101
1 files changed, 101 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/files/0255-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch b/common/recipes-kernel/linux/files/0255-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch
new file mode 100644
index 00000000..c674a1e2
--- /dev/null
+++ b/common/recipes-kernel/linux/files/0255-drm-amdgpu-optimize-amdgpu_vm_update_ptes-a-bit.patch
@@ -0,0 +1,101 @@
+From 804871103c28d763b94a3488262169696452c0c6 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Tue, 26 Jan 2016 12:37:49 +0100
+Subject: [PATCH 0255/1110] drm/amdgpu: optimize amdgpu_vm_update_ptes a bit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Don't calculate the end address multiple times.
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 39 ++++++++++++++++++----------------
+ 1 file changed, 21 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+index 21d918a..83bc8b1 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+@@ -577,6 +577,10 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+
+ unsigned count;
+
++ /* Abort early if there isn't anything to do */
++ if (pe_start == pe_end)
++ return;
++
+ /* system pages are non continuously */
+ if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
+
+@@ -634,6 +638,9 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+ uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
+ uint64_t last_pte = ~0, last_dst = ~0;
+ unsigned count = 0;
++ const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
++
++ uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
+ uint64_t addr;
+
+ /* walk over the address space and update the page tables */
+@@ -641,40 +648,36 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+ uint64_t pt_idx = addr >> amdgpu_vm_block_size;
+ struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
+ unsigned nptes;
+- uint64_t pte;
++ uint64_t pe_start;
+
+ if ((addr & ~mask) == (end & ~mask))
+ nptes = end - addr;
+ else
+ nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
+
+- pte = amdgpu_bo_gpu_offset(pt);
+- pte += (addr & mask) * 8;
++ pe_start = amdgpu_bo_gpu_offset(pt);
++ pe_start += (addr & mask) * 8;
+
+- if ((last_pte + 8 * count) != pte) {
++ if (last_pe_end != pe_start) {
+
+- if (count) {
+- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+- last_pte, last_pte + 8 * count,
+- last_dst, flags);
+- }
+-
+- count = nptes;
+- last_pte = pte;
++ amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
++ last_pe_start, last_pe_end,
++ last_dst, flags);
++
++ last_pe_start = pe_start;
++ last_pe_end = pe_start + 8 * nptes;
+ last_dst = dst;
+ } else {
+- count += nptes;
++ last_pe_end += 8 * nptes;
+ }
+
+ addr += nptes;
+ dst += nptes * AMDGPU_GPU_PAGE_SIZE;
+ }
+
+- if (count) {
+- amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+- last_pte, last_pte + 8 * count,
+- last_dst, flags);
+- }
++ amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
++ last_pe_start, last_pe_end,
++ last_dst, flags);
+ }
+
+ /**
+--
+2.7.4
+