1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
From 97c763139b7c2eb879ad4c54b6362de098ae47cc Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Mon, 9 Jan 2017 16:22:53 -0500
Subject: [PATCH 1568/4131] drm/amdgpu: More fixes for system/userptr memory
accounting
amdgpu_ttm_tt_get_usermm depends on bo->tbo.ttm, which is already
destroyed when amdgpu_amdkfd_unreserve_system_memory_limit gets called
during BO destruction. Add a flag to identify KFD userptr BOs for
accounting purposes.
Fix memory usage accounting when amdgpu_bo_create fails.
Replace warnings with WARN_ONCE to avoid spamming the logs in case
accounting is found to be broken.
Change-Id: Icebb442180a174934c066f111644dea127083330
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 50 ++++++++++++++++++------
1 file changed, 37 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d14550d..e644bb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -45,6 +45,9 @@
* a HW bug. */
#define VI_BO_SIZE_ALIGN (0x8000)
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
/* Impose limit on how much memory KFD can use */
struct kfd_mem_usage_limit {
uint64_t max_system_mem_limit;
@@ -129,7 +132,6 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
-
} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
if ((kfd_mem_limit.system_mem_used + acc_size >
kfd_mem_limit.max_system_mem_limit) ||
@@ -146,25 +148,44 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
return ret;
}
+static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain)
+{
+ size_t acc_size;
+
+ acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+ sizeof(struct amdgpu_bo));
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+ kfd_mem_limit.system_mem_used -= (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ kfd_mem_limit.system_mem_used -= acc_size;
+ kfd_mem_limit.userptr_mem_used -= size;
+ }
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
+
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (bo->prefered_domains == AMDGPU_GEM_DOMAIN_GTT)
- kfd_mem_limit.system_mem_used -=
- (bo->tbo.acc_size + amdgpu_bo_size(bo));
- else if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+ } else if (bo->prefered_domains == AMDGPU_GEM_DOMAIN_GTT) {
+ kfd_mem_limit.system_mem_used -=
+ (bo->tbo.acc_size + amdgpu_bo_size(bo));
}
- if (kfd_mem_limit.system_mem_used < 0) {
- pr_warn("kfd system memory size ref. error\n");
- kfd_mem_limit.system_mem_used = 0;
- }
- if (kfd_mem_limit.userptr_mem_used < 0) {
- pr_warn("kfd userptr memory size ref. error\n");
- kfd_mem_limit.userptr_mem_used = 0;
- }
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -614,10 +635,13 @@ static int __alloc_memory_of_gpu(struct kgd_dev *kgd, uint64_t va,
if (ret != 0) {
pr_err("Failed to create BO on domain %s. ret %d\n",
domain_string(alloc_domain), ret);
+ unreserve_system_mem_limit(adev, size, alloc_domain);
goto err_bo_create;
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
+ if (userptr)
+ bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
ret = amdgpu_bo_reserve(bo, true);
--
2.7.4
|