aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch53
1 files changed, 53 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch
new file mode 100644
index 00000000..5f502cc7
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch
@@ -0,0 +1,53 @@
+From 2bd2c52721418a622b717d892211569b53db0120 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Mon, 15 Jul 2019 16:18:03 -0400
+Subject: [PATCH 4600/4736] drm/amdgpu: Optimize KFD page table reservation
+
+Be less pessimistic about estimated page table use for KFD. Most
+allocations use 2MB pages and therefore need less VRAM for page
+tables. This allows more VRAM to be used for applications especially
+on large systems with many GPUs and hundreds of GB of system memory.
+
+Example: 8 GPUs with 32GB VRAM each + 256GB system memory = 512GB
+Old page table reservation per GPU: 1GB
+New page table reservation per GPU: 32MB
+
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Reviewed-by: xinhui pan <xinhui.pan@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index a0d138849b61..3d7d6b5f423e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
+ }
+
++/* Estimate page table size needed to represent a given memory size
++ *
++ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory
++ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB
++ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize
++ * for 2MB pages for TLB efficiency. However, small allocations and
++ * fragmented system memory still need some 4KB pages. We choose a
++ * compromise that should work in most cases without reserving too
++ * much memory for page tables unnecessarily (factor 16K, >> 14).
++ */
++#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
++
+ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain, bool sg)
+ {
++ uint64_t reserved_for_pt =
++ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
+ size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
+- uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
+ int ret = 0;
+
+ acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+--
+2.17.1
+