diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch | 360 |
1 files changed, 360 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch new file mode 100644 index 00000000..3e7298b5 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch @@ -0,0 +1,360 @@ +From 275b387f2746831946aec9217b59fae318f12104 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Mon, 19 Nov 2018 20:05:54 -0500 +Subject: [PATCH 2724/2940] drm/amdgpu: Add KFD VRAM limit checking + +We don't want KFD processes evicting each other over VRAM usage. +Therefore prevent overcommitting VRAM among KFD applications with +a per-GPU limit. Also leave enough room for page tables on top +of the application memory usage. + +Acked-by: Alex Deucher <alexander.deucher@amd.com> +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +Reviewed-by: Eric Huang <JinHuiEric.Huang@amd.com> +Signed-off-by: Alex Deucher <alexander.deucher@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 50 ++++++++++------ + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 8 ++- + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 60 +++++++++---------- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- + 5 files changed, 74 insertions(+), 53 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 14a8871b9917..11287a5ffc88 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -83,6 +83,7 @@ + #include "amdgpu_bo_list.h" + #include "amdgpu_gem.h" + #include "amdgpu_doorbell.h" ++#include "amdgpu_amdkfd.h" + #include "amdgpu_smu.h" + #include "amdgpu_discovery.h" + #include "amdgpu_mes.h" +@@ -950,6 +951,9 @@ struct amdgpu_device { + /* GDS */ + struct amdgpu_gds gds; + ++ /* KFD */ ++ struct amdgpu_kfd_dev kfd; ++ + /* display related functionality */ + struct amdgpu_display_manager dm; + +@@ -970,9 +974,6 @@ struct amdgpu_device { + atomic64_t visible_pin_size; + atomic64_t gart_pin_size; + +- /* amdkfd interface */ +- struct kfd_dev *kfd; +- + /* soc15 register offset based on ip, instance and segment */ + uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +index 77bccdb27c7a..06bdbf06a0a5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +@@ -32,10 +32,20 @@ bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); + + static const unsigned int compute_vmid_bitmap = 0xFF00; + ++/* Total memory size in system memory and all GPU VRAM. Used to ++ * estimate worst case amount of memory to reserve for page tables ++ */ ++uint64_t amdgpu_amdkfd_total_mem_size; ++ + int amdgpu_amdkfd_init(void) + { ++ struct sysinfo si; + int ret; + ++ si_meminfo(&si); ++ amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; ++ amdgpu_amdkfd_total_mem_size *= si.mem_unit; ++ + #ifdef CONFIG_HSA_AMD + ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); + if (ret) +@@ -90,8 +100,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) + return; + } + +- adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, +- adev->pdev, kfd2kgd); ++ adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev, ++ adev->pdev, kfd2kgd); ++ ++ if (adev->kfd.dev) ++ amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; + } + + /** +@@ -131,7 +144,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) + { + int i; + int last_valid_bit; +- if (adev->kfd) { ++ ++ if (adev->kfd.dev) { + struct kgd2kfd_shared_resources gpu_resources = { + .compute_vmid_bitmap = compute_vmid_bitmap, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, +@@ -191,37 +205,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) + gpu_resources.reserved_doorbell_mask = 0x1f0; + gpu_resources.reserved_doorbell_val = 0x0f0; + +- kgd2kfd->device_init(adev->kfd, &gpu_resources); ++ kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); + } + } + + void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) + { +- if (adev->kfd) { +- kgd2kfd->device_exit(adev->kfd); +- adev->kfd = NULL; ++ if (adev->kfd.dev) { ++ kgd2kfd->device_exit(adev->kfd.dev); ++ adev->kfd.dev = NULL; + } + } + + void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, + const void *ih_ring_entry) + { +- if (adev->kfd) +- kgd2kfd->interrupt(adev->kfd, ih_ring_entry); ++ if (adev->kfd.dev) ++ kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry); + } + + void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) + { +- if (adev->kfd) +- kgd2kfd->suspend(adev->kfd); ++ if (adev->kfd.dev) ++ kgd2kfd->suspend(adev->kfd.dev); + } + + int amdgpu_amdkfd_resume(struct amdgpu_device *adev) + { + int r = 0; + +- if (adev->kfd) +- r = kgd2kfd->resume(adev->kfd); ++ if (adev->kfd.dev) ++ r = kgd2kfd->resume(adev->kfd.dev); + + return r; + } +@@ -230,8 +244,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) + { + int r = 0; + +- if (adev->kfd) +- r = kgd2kfd->pre_reset(adev->kfd); ++ if (adev->kfd.dev) ++ r = kgd2kfd->pre_reset(adev->kfd.dev); + + return r; + } +@@ -240,8 +254,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) + { + int r = 0; + +- if (adev->kfd) +- r = kgd2kfd->post_reset(adev->kfd); ++ if (adev->kfd.dev) ++ r = kgd2kfd->post_reset(adev->kfd.dev); + + return r; + } +@@ -489,7 +503,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) + + bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) + { +- if (adev->kfd) { ++ if (adev->kfd.dev) { + if ((1 << vmid) & compute_vmid_bitmap) + return true; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +index 573df02e74b1..63895bcbc5c5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +@@ -34,6 +34,7 @@ + #include "amdgpu_vm.h" + + extern const struct kgd2kfd_calls *kgd2kfd; ++extern uint64_t amdgpu_amdkfd_total_mem_size; + + struct amdgpu_device; + +@@ -76,6 +77,11 @@ struct amdgpu_amdkfd_fence { + char timeline_name[TASK_COMM_LEN]; + }; + ++struct amdgpu_kfd_dev { ++ struct kfd_dev *dev; ++ uint64_t vram_used; ++}; ++ + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); + bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +@@ -194,6 +200,6 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); + + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); ++void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); + + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index 3162408e4e73..ed12c61e3737 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -110,17 +110,17 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) + (kfd_mem_limit.max_ttm_mem_limit >> 20)); + } + +-static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, ++static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain, bool sg) + { +- size_t acc_size, system_mem_needed, ttm_mem_needed; ++ size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; ++ uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; + int ret = 0; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + +- spin_lock(&kfd_mem_limit.mem_limit_lock); +- ++ vram_needed = 0; + if (domain == AMDGPU_GEM_DOMAIN_GTT) { + /* TTM GTT memory */ + system_mem_needed = acc_size + size; +@@ -133,23 +133,30 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, + /* VRAM and SG */ + system_mem_needed = acc_size; + ttm_mem_needed = acc_size; ++ if (domain == AMDGPU_GEM_DOMAIN_VRAM) ++ vram_needed = size; + } + ++ spin_lock(&kfd_mem_limit.mem_limit_lock); ++ + if ((kfd_mem_limit.system_mem_used + system_mem_needed > +- kfd_mem_limit.max_system_mem_limit) || +- (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > +- kfd_mem_limit.max_ttm_mem_limit)) ++ kfd_mem_limit.max_system_mem_limit) || ++ (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > ++ kfd_mem_limit.max_ttm_mem_limit) || ++ (adev->kfd.vram_used + vram_needed > ++ adev->gmc.real_vram_size - reserved_for_pt)) { + ret = -ENOMEM; +- else { ++ } else { + kfd_mem_limit.system_mem_used += system_mem_needed; + kfd_mem_limit.ttm_mem_used += ttm_mem_needed; ++ adev->kfd.vram_used += vram_needed; + } + + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return ret; + } + +-static void unreserve_system_mem_limit(struct amdgpu_device *adev, ++static void unreserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain, bool sg) + { + size_t acc_size; +@@ -167,6 +174,11 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, + } else { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.ttm_mem_used -= acc_size; ++ if (domain == AMDGPU_GEM_DOMAIN_VRAM) { ++ adev->kfd.vram_used -= size; ++ WARN_ONCE(adev->kfd.vram_used < 0, ++ "kfd VRAM memory accounting unbalanced"); ++ } + } + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); +@@ -176,29 +188,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, + spin_unlock(&kfd_mem_limit.mem_limit_lock); + } + +-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) ++void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) + { +- spin_lock(&kfd_mem_limit.mem_limit_lock); ++ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); ++ u32 domain = bo->preferred_domains; ++ bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); + + if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { +- kfd_mem_limit.system_mem_used -= +- (bo->tbo.acc_size + amdgpu_bo_size(bo)); +- kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; +- } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { +- kfd_mem_limit.system_mem_used -= +- (bo->tbo.acc_size + amdgpu_bo_size(bo)); +- kfd_mem_limit.ttm_mem_used -= +- (bo->tbo.acc_size + amdgpu_bo_size(bo)); +- } else { +- kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; +- kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; ++ domain = AMDGPU_GEM_DOMAIN_CPU; ++ sg = false; + } +- WARN_ONCE(kfd_mem_limit.system_mem_used < 0, +- "kfd system memory accounting unbalanced"); +- WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, +- "kfd TTM memory accounting unbalanced"); + +- spin_unlock(&kfd_mem_limit.mem_limit_lock); ++ unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); + } + + +@@ -1226,8 +1227,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + + amdgpu_sync_create(&(*mem)->sync); + +- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, +- alloc_domain, false); ++ ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false); + if (ret) { + pr_debug("Insufficient system memory\n"); + goto err_reserve_limit; +@@ -1280,7 +1280,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + /* Don't unreserve system mem limit twice */ + goto err_reserve_limit; + err_bo_create: +- unreserve_system_mem_limit(adev, size, alloc_domain, false); ++ unreserve_mem_limit(adev, size, alloc_domain, false); + err_reserve_limit: + mutex_destroy(&(*mem)->lock); + kfree(*mem); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +index bc028b41b3e6..2401d61cb741 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +@@ -83,7 +83,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) + amdgpu_bo_subtract_pin_size(bo); + + if (bo->kfd_bo) +- amdgpu_amdkfd_unreserve_system_memory_limit(bo); ++ amdgpu_amdkfd_unreserve_memory_limit(bo); + + amdgpu_bo_kunmap(bo); + +-- +2.17.1 + |