aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1515-drm-amdgpu-Add-kfd2kgd-restore_process_bos-API.patch
blob: 30fc225b09f95c7e745d3ac19abe614c8769dcae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
From 116d98ff97fd3aad490ea72c8f19c106a48ae1e0 Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Wed, 29 Jun 2016 16:50:52 -0400
Subject: [PATCH 1515/4131] drm/amdgpu: Add kfd2kgd restore_process_bos API

Restore all BOs and the resume the evicted queues

Change-Id: Ieee6e0b4eeb327c7dd56304aca86c4008e044b70
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h        |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 168 ++++++++++++++++++++++
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |   5 +
 5 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 83bea0e..a291577 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -138,7 +138,7 @@ void amdgpu_amdkfd_cancel_restore_mem(struct amdgpu_device *adev,
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
-
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *master_vm);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 22c4b7a..be8bbda 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -227,6 +227,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions()
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 7be32f3..2c34213 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -208,6 +208,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
 	.get_tile_config = amdgpu_amdkfd_get_tile_config,
+	.restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions()
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index bab085c..ad61cb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1725,3 +1725,171 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm)
 
 	return ret;
 }
+
+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+ *   KFD process identified by master_vm
+ *
+ * @master_vm: Master VM of the KFD process
+ *
+ * After memory eviction, restore thread calls this function. The function
+ * should be called when the Process is still valid. BO restore involves -
+ *
+ * 1.  Release old eviction fence and create new one
+ * 2.  Get PD BO list and PT BO list from all the VMs.
+ * 3.  Merge PD BO list into KFD BO list. Reserve all BOs.
+ * 4.  Split the list into PD BO list and KFD BO list
+ * 5.  Validate of PD and PT BOs and add new fence to the BOs
+ * 6.  Validate all KFD BOs and Map them and add new fence
+ * 7.  Merge PD BO list into KFD BO list. Unreserve all BOs
+ * 8.  Restore back KFD BO list by removing PD BO entries
+ */
+
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
+{
+	struct amdgpu_bo_list_entry *pd_bo_list, *last_pd_bo_entry, *entry;
+	struct amdkfd_vm *master_vm = (struct amdkfd_vm *)m_vm, *peer_vm;
+	struct kgd_mem *mem;
+	struct bo_vm_reservation_context ctx;
+	struct amdgpu_amdkfd_fence *old_fence;
+	int ret = 0, i;
+
+	if (WARN_ON(master_vm == NULL || master_vm->master != master_vm))
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&ctx.list);
+	INIT_LIST_HEAD(&ctx.duplicates);
+
+	pd_bo_list = kcalloc(master_vm->n_vms,
+			     sizeof(struct amdgpu_bo_list_entry),
+			     GFP_KERNEL);
+	if (pd_bo_list == NULL)
+		return -ENOMEM;
+
+	/* Release old eviction fence and create new one. Use context and mm
+	 * from the old fence.
+	 */
+	old_fence = master_vm->eviction_fence;
+	master_vm->eviction_fence =
+		amdgpu_amdkfd_fence_create(old_fence->base.context,
+					   old_fence->mm);
+	fence_put(&old_fence->base);
+	if (master_vm->eviction_fence == NULL) {
+		pr_err("Failed to create eviction fence\n");
+		goto evict_fence_fail;
+	}
+
+	/* Get PD BO list and PT BO list from all the VMs */
+	amdgpu_vm_get_pd_bo(&master_vm->base, &ctx.list,
+			    &pd_bo_list[0]);
+	amdgpu_vm_get_pt_bos(master_vm->adev, &master_vm->base,
+			     &ctx.duplicates);
+
+	i = 1;
+	list_for_each_entry(peer_vm, &master_vm->kfd_vm_list, kfd_vm_list) {
+		amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
+				    &pd_bo_list[i]);
+		amdgpu_vm_get_pt_bos(peer_vm->adev, &peer_vm->base,
+				     &ctx.duplicates);
+		i++;
+	}
+
+	/* Needed to splicing and cutting the lists */
+	last_pd_bo_entry = list_last_entry(&ctx.list,
+					   struct amdgpu_bo_list_entry,
+					   tv.head);
+
+	/* Reserve all BOs and page tables/directory. */
+	mutex_lock(&master_vm->lock);
+	list_splice_init(&ctx.list, &master_vm->kfd_bo_list);
+	ret = ttm_eu_reserve_buffers(&ctx.ticket, &master_vm->kfd_bo_list,
+				     false, &ctx.duplicates);
+	if (ret) {
+		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
+		goto ttm_reserve_fail;
+	}
+
+	/* Restore kfd_bo_list. ctx.list contains only PDs */
+	list_cut_position(&ctx.list, &master_vm->kfd_bo_list,
+			  &last_pd_bo_entry->tv.head);
+
+	/* Validate PDs and PTs */
+	list_for_each_entry(entry, &ctx.list, tv.head) {
+		struct amdgpu_bo *bo = entry->robj;
+
+		amdgpu_ttm_placement_from_domain(bo, bo->prefered_domains);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (ret) {
+			pr_debug("Memory eviction: Validate failed. Try again\n");
+			goto validate_map_fail;
+		}
+	}
+	list_for_each_entry(entry, &ctx.duplicates, tv.head) {
+		struct amdgpu_bo *bo = entry->robj;
+
+		amdgpu_ttm_placement_from_domain(bo, bo->prefered_domains);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (ret) {
+			pr_debug("Memory eviction: Validate failed. Try again\n");
+			goto validate_map_fail;
+		}
+	}
+
+	/* Wait for PT/PD validate to finish and attach eviction fence.
+	 * PD/PT share the same reservation object
+	 */
+	list_for_each_entry(entry, &ctx.list, tv.head) {
+		struct amdgpu_bo *bo = entry->robj;
+
+		ttm_bo_wait(&bo->tbo, false, false);
+		amdgpu_bo_fence(bo, &master_vm->eviction_fence->base, true);
+	}
+
+
+	/* Validate BOs and map them to GPUVM (update VM page tables). */
+	list_for_each_entry(mem, &master_vm->kfd_bo_list,
+			    data2.bo_list_entry.tv.head) {
+
+		struct amdgpu_bo *bo = mem->data2.bo;
+		uint32_t domain = mem->data2.domain;
+		struct kfd_bo_va_list *bo_va_entry;
+
+		amdgpu_ttm_placement_from_domain(bo, domain);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (ret) {
+			pr_debug("Memory eviction: Validate failed. Try again\n");
+			goto validate_map_fail;
+		}
+		list_for_each_entry(bo_va_entry, &mem->data2.bo_va_list,
+				    bo_list) {
+			ret = map_bo_to_gpuvm((struct amdgpu_device *)
+					      bo_va_entry->kgd_dev,
+					      bo, bo_va_entry->bo_va,
+					      domain);
+			if (ret) {
+				pr_debug("Memory eviction: Map failed. Try again\n");
+				goto validate_map_fail;
+			}
+		}
+	}
+
+	/* Wait for validate to finish and attach new eviction fence */
+	list_for_each_entry(mem, &master_vm->kfd_bo_list,
+		data2.bo_list_entry.tv.head) {
+		struct amdgpu_bo *bo = mem->data2.bo;
+
+		ttm_bo_wait(&bo->tbo, false, false);
+		amdgpu_bo_fence(bo, &master_vm->eviction_fence->base, true);
+	}
+validate_map_fail:
+	/* Add PDs to kfd_bo_list for unreserve */
+	list_splice_init(&ctx.list, &master_vm->kfd_bo_list);
+	ttm_eu_backoff_reservation(&ctx.ticket, &master_vm->kfd_bo_list);
+ttm_reserve_fail:
+	/* Restore kfd_bo_list */
+	list_cut_position(&ctx.list, &master_vm->kfd_bo_list,
+			  &last_pd_bo_entry->tv.head);
+	mutex_unlock(&master_vm->lock);
+evict_fence_fail:
+	kfree(pd_bo_list);
+	return ret;
+}
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 4b52412..c38e707 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -220,6 +220,9 @@ struct tile_config {
  * @submit_ib: Submits an IB to the engine specified by inserting the IB to
  * the corresonded ring (ring type).
  *
+ * @restore_process_bos: Restore all BOs that belongs to the process identified
+ * by master_vm.
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -361,6 +364,8 @@ struct kfd2kgd_calls {
 			uint32_t *ib_cmd, uint32_t ib_len);
 	int (*get_tile_config)(struct kgd_dev *kgd,
 			struct tile_config *config);
+
+	int (*restore_process_bos)(void *master_vm);
 };
 
 /**
-- 
2.7.4