diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch new file mode 100644 index 00000000..5f502cc7 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/4600-drm-amdgpu-Optimize-KFD-page-table-reservation.patch @@ -0,0 +1,53 @@ +From 2bd2c52721418a622b717d892211569b53db0120 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Mon, 15 Jul 2019 16:18:03 -0400 +Subject: [PATCH 4600/4736] drm/amdgpu: Optimize KFD page table reservation + +Be less pessimistic about estimated page table use for KFD. Most +allocations use 2MB pages and therefore need less VRAM for page +tables. This allows more VRAM to be used for applications especially +on large systems with many GPUs and hundreds of GB of system memory. + +Example: 8 GPUs with 32GB VRAM each + 256GB system memory = 512GB +Old page table reservation per GPU: 1GB +New page table reservation per GPU: 32MB + +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +Reviewed-by: xinhui pan <xinhui.pan@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index a0d138849b61..3d7d6b5f423e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) + (kfd_mem_limit.max_ttm_mem_limit >> 20)); + } + ++/* Estimate page table size needed to represent a given memory size ++ * ++ * With 4KB pages, we need one 8 byte PTE for each 4KB of memory ++ * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB ++ * of memory (factor 256K, >> 18). ROCm user mode tries to optimize ++ * for 2MB pages for TLB efficiency. However, small allocations and ++ * fragmented system memory still need some 4KB pages. We choose a ++ * compromise that should work in most cases without reserving too ++ * much memory for page tables unnecessarily (factor 16K, >> 14). ++ */ ++#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) ++ + static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain, bool sg) + { ++ uint64_t reserved_for_pt = ++ ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; +- uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; + int ret = 0; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, +-- +2.17.1 + |