aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch360
1 files changed, 360 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch
new file mode 100644
index 00000000..3e7298b5
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2724-drm-amdgpu-Add-KFD-VRAM-limit-checking.patch
@@ -0,0 +1,360 @@
+From 275b387f2746831946aec9217b59fae318f12104 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Mon, 19 Nov 2018 20:05:54 -0500
+Subject: [PATCH 2724/2940] drm/amdgpu: Add KFD VRAM limit checking
+
+We don't want KFD processes evicting each other over VRAM usage.
+Therefore prevent overcommitting VRAM among KFD applications with
+a per-GPU limit. Also leave enough room for page tables on top
+of the application memory usage.
+
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Reviewed-by: Eric Huang <JinHuiEric.Huang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 ++-
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 50 ++++++++++------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 8 ++-
+ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 60 +++++++++----------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
+ 5 files changed, 74 insertions(+), 53 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index 14a8871b9917..11287a5ffc88 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -83,6 +83,7 @@
+ #include "amdgpu_bo_list.h"
+ #include "amdgpu_gem.h"
+ #include "amdgpu_doorbell.h"
++#include "amdgpu_amdkfd.h"
+ #include "amdgpu_smu.h"
+ #include "amdgpu_discovery.h"
+ #include "amdgpu_mes.h"
+@@ -950,6 +951,9 @@ struct amdgpu_device {
+ /* GDS */
+ struct amdgpu_gds gds;
+
++ /* KFD */
++ struct amdgpu_kfd_dev kfd;
++
+ /* display related functionality */
+ struct amdgpu_display_manager dm;
+
+@@ -970,9 +974,6 @@ struct amdgpu_device {
+ atomic64_t visible_pin_size;
+ atomic64_t gart_pin_size;
+
+- /* amdkfd interface */
+- struct kfd_dev *kfd;
+-
+ /* soc15 register offset based on ip, instance and segment */
+ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+index 77bccdb27c7a..06bdbf06a0a5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+@@ -32,10 +32,20 @@ bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
+
+ static const unsigned int compute_vmid_bitmap = 0xFF00;
+
++/* Total memory size in system memory and all GPU VRAM. Used to
++ * estimate worst case amount of memory to reserve for page tables
++ */
++uint64_t amdgpu_amdkfd_total_mem_size;
++
+ int amdgpu_amdkfd_init(void)
+ {
++ struct sysinfo si;
+ int ret;
+
++ si_meminfo(&si);
++ amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
++ amdgpu_amdkfd_total_mem_size *= si.mem_unit;
++
+ #ifdef CONFIG_HSA_AMD
+ ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
+ if (ret)
+@@ -90,8 +100,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
+ return;
+ }
+
+- adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+- adev->pdev, kfd2kgd);
++ adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev,
++ adev->pdev, kfd2kgd);
++
++ if (adev->kfd.dev)
++ amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
+ }
+
+ /**
+@@ -131,7 +144,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
+ {
+ int i;
+ int last_valid_bit;
+- if (adev->kfd) {
++
++ if (adev->kfd.dev) {
+ struct kgd2kfd_shared_resources gpu_resources = {
+ .compute_vmid_bitmap = compute_vmid_bitmap,
+ .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+@@ -191,37 +205,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
+ gpu_resources.reserved_doorbell_mask = 0x1f0;
+ gpu_resources.reserved_doorbell_val = 0x0f0;
+
+- kgd2kfd->device_init(adev->kfd, &gpu_resources);
++ kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
+ }
+ }
+
+ void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
+ {
+- if (adev->kfd) {
+- kgd2kfd->device_exit(adev->kfd);
+- adev->kfd = NULL;
++ if (adev->kfd.dev) {
++ kgd2kfd->device_exit(adev->kfd.dev);
++ adev->kfd.dev = NULL;
+ }
+ }
+
+ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
+ const void *ih_ring_entry)
+ {
+- if (adev->kfd)
+- kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
++ if (adev->kfd.dev)
++ kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry);
+ }
+
+ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
+ {
+- if (adev->kfd)
+- kgd2kfd->suspend(adev->kfd);
++ if (adev->kfd.dev)
++ kgd2kfd->suspend(adev->kfd.dev);
+ }
+
+ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
+ {
+ int r = 0;
+
+- if (adev->kfd)
+- r = kgd2kfd->resume(adev->kfd);
++ if (adev->kfd.dev)
++ r = kgd2kfd->resume(adev->kfd.dev);
+
+ return r;
+ }
+@@ -230,8 +244,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+ {
+ int r = 0;
+
+- if (adev->kfd)
+- r = kgd2kfd->pre_reset(adev->kfd);
++ if (adev->kfd.dev)
++ r = kgd2kfd->pre_reset(adev->kfd.dev);
+
+ return r;
+ }
+@@ -240,8 +254,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
+ {
+ int r = 0;
+
+- if (adev->kfd)
+- r = kgd2kfd->post_reset(adev->kfd);
++ if (adev->kfd.dev)
++ r = kgd2kfd->post_reset(adev->kfd.dev);
+
+ return r;
+ }
+@@ -489,7 +503,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
+
+ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
+ {
+- if (adev->kfd) {
++ if (adev->kfd.dev) {
+ if ((1 << vmid) & compute_vmid_bitmap)
+ return true;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+index 573df02e74b1..63895bcbc5c5 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+@@ -34,6 +34,7 @@
+ #include "amdgpu_vm.h"
+
+ extern const struct kgd2kfd_calls *kgd2kfd;
++extern uint64_t amdgpu_amdkfd_total_mem_size;
+
+ struct amdgpu_device;
+
+@@ -76,6 +77,11 @@ struct amdgpu_amdkfd_fence {
+ char timeline_name[TASK_COMM_LEN];
+ };
+
++struct amdgpu_kfd_dev {
++ struct kfd_dev *dev;
++ uint64_t vram_used;
++};
++
+ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+ struct mm_struct *mm);
+ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+@@ -194,6 +200,6 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ struct kfd_vm_fault_info *info);
+
+ void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
++void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+
+ #endif /* AMDGPU_AMDKFD_H_INCLUDED */
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index 3162408e4e73..ed12c61e3737 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -110,17 +110,17 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+ (kfd_mem_limit.max_ttm_mem_limit >> 20));
+ }
+
+-static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
++static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain, bool sg)
+ {
+- size_t acc_size, system_mem_needed, ttm_mem_needed;
++ size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
++ uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
+ int ret = 0;
+
+ acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+ sizeof(struct amdgpu_bo));
+
+- spin_lock(&kfd_mem_limit.mem_limit_lock);
+-
++ vram_needed = 0;
+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+ /* TTM GTT memory */
+ system_mem_needed = acc_size + size;
+@@ -133,23 +133,30 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
+ /* VRAM and SG */
+ system_mem_needed = acc_size;
+ ttm_mem_needed = acc_size;
++ if (domain == AMDGPU_GEM_DOMAIN_VRAM)
++ vram_needed = size;
+ }
+
++ spin_lock(&kfd_mem_limit.mem_limit_lock);
++
+ if ((kfd_mem_limit.system_mem_used + system_mem_needed >
+- kfd_mem_limit.max_system_mem_limit) ||
+- (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+- kfd_mem_limit.max_ttm_mem_limit))
++ kfd_mem_limit.max_system_mem_limit) ||
++ (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
++ kfd_mem_limit.max_ttm_mem_limit) ||
++ (adev->kfd.vram_used + vram_needed >
++ adev->gmc.real_vram_size - reserved_for_pt)) {
+ ret = -ENOMEM;
+- else {
++ } else {
+ kfd_mem_limit.system_mem_used += system_mem_needed;
+ kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
++ adev->kfd.vram_used += vram_needed;
+ }
+
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+ return ret;
+ }
+
+-static void unreserve_system_mem_limit(struct amdgpu_device *adev,
++static void unreserve_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain, bool sg)
+ {
+ size_t acc_size;
+@@ -167,6 +174,11 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+ } else {
+ kfd_mem_limit.system_mem_used -= acc_size;
+ kfd_mem_limit.ttm_mem_used -= acc_size;
++ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
++ adev->kfd.vram_used -= size;
++ WARN_ONCE(adev->kfd.vram_used < 0,
++ "kfd VRAM memory accounting unbalanced");
++ }
+ }
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "kfd system memory accounting unbalanced");
+@@ -176,29 +188,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+ }
+
+-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
++void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
+ {
+- spin_lock(&kfd_mem_limit.mem_limit_lock);
++ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
++ u32 domain = bo->preferred_domains;
++ bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
+
+ if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+- kfd_mem_limit.system_mem_used -=
+- (bo->tbo.acc_size + amdgpu_bo_size(bo));
+- kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
+- } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+- kfd_mem_limit.system_mem_used -=
+- (bo->tbo.acc_size + amdgpu_bo_size(bo));
+- kfd_mem_limit.ttm_mem_used -=
+- (bo->tbo.acc_size + amdgpu_bo_size(bo));
+- } else {
+- kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+- kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
++ domain = AMDGPU_GEM_DOMAIN_CPU;
++ sg = false;
+ }
+- WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+- "kfd system memory accounting unbalanced");
+- WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
+- "kfd TTM memory accounting unbalanced");
+
+- spin_unlock(&kfd_mem_limit.mem_limit_lock);
++ unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
+ }
+
+
+@@ -1226,8 +1227,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+
+ amdgpu_sync_create(&(*mem)->sync);
+
+- ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
+- alloc_domain, false);
++ ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, false);
+ if (ret) {
+ pr_debug("Insufficient system memory\n");
+ goto err_reserve_limit;
+@@ -1280,7 +1280,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+ /* Don't unreserve system mem limit twice */
+ goto err_reserve_limit;
+ err_bo_create:
+- unreserve_system_mem_limit(adev, size, alloc_domain, false);
++ unreserve_mem_limit(adev, size, alloc_domain, false);
+ err_reserve_limit:
+ mutex_destroy(&(*mem)->lock);
+ kfree(*mem);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index bc028b41b3e6..2401d61cb741 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -83,7 +83,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
+ amdgpu_bo_subtract_pin_size(bo);
+
+ if (bo->kfd_bo)
+- amdgpu_amdkfd_unreserve_system_memory_limit(bo);
++ amdgpu_amdkfd_unreserve_memory_limit(bo);
+
+ amdgpu_bo_kunmap(bo);
+
+--
+2.17.1
+