aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1574-drm-amdgpu-Fix-PT-validation-for-amdkfd.patch
blob: 9ccdfaa05a519f174e772d4d7ac3cc36abcb5d09 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
From 161a71c5dc7b4b70dab42cef6d9a7cc2e1dcae2d Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 16 Nov 2016 20:20:34 -0500
Subject: [PATCH 1574/4131] drm/amdgpu: Fix PT validation for amdkfd

Remove amdgpu_vm_validate_pt_bos from reserve_bo functions where it
was validating page table BOs before reserving them, and triggering
eviction fences.

Introduce it to validate_pt_pd_bos and
amdgpu_amdkfd_gpuvm_restore_process_bos to make sure we validate
page tables when needed, without triggering eviction fences.

Change-Id: I14557e58fa70863f32e75609d1733f876560c912
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

 Conflicts:
	drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 81 +++++++++++++-----------
 1 file changed, 45 insertions(+), 36 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1899eba..6ac2ba3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -453,41 +453,36 @@ static int amdgpu_amdkfd_bo_invalidate(struct amdgpu_bo *bo)
 	return ret;
 }
 
-static int validate_pt_pd_bos(struct amdgpu_vm *vm)
+static int validate_pt_pd_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-	int i, ret = 0;
-	struct amdgpu_bo *bo, *pd = vm->page_directory;
+	struct amdgpu_bo *pd = vm->page_directory;
 	struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base);
+	struct amdgpu_vm_parser param;
+	int ret;
+
+	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	param.wait = true;
 
 	/* Remove eviction fence so that validate can wait on move fences */
 	amdgpu_amdkfd_remove_eviction_fence(pd, kvm->eviction_fence,
 					    NULL, NULL);
 
-	/* PTs share same reservation object as PD. So only fence PD */
-	for (i = 0; i <= vm->max_pde_used; ++i) {
-		bo = vm->page_tables[i].bo;
-
-		if (!bo)
-			continue;
-
-		ret = amdgpu_amdkfd_bo_validate(bo, AMDGPU_GEM_DOMAIN_VRAM,
-						true);
-		if (ret != 0) {
-			pr_err("Failed to validate PTE %d\n", i);
-			break;
-		}
-	}
+	ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
+					&param);
 
-	ret = amdgpu_amdkfd_bo_validate(pd, AMDGPU_GEM_DOMAIN_VRAM,
-					true);
-	if (ret != 0) {
-		pr_err("Failed to validate PD\n");
-		return ret;
+	if (ret) {
+		pr_err("amdgpu: failed to validate PT BOs\n");
+	} else {
+		ret = amdgpu_amdkfd_validate(&param, pd);
+		if (ret)
+			pr_err("amdgpu: failed to validate PD\n");
 	}
 
 	/* Add the eviction fence back */
 	amdgpu_bo_fence(pd, &kvm->master->eviction_fence->base, true);
 
+	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
+
 	return ret;
 }
 
@@ -735,7 +730,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 			      struct bo_vm_reservation_context *ctx)
 {
 	struct amdgpu_bo *bo = mem->bo;
-	struct amdgpu_vm_parser param;
 	int ret;
 
 	WARN_ON(!vm);
@@ -760,10 +754,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
-	param.domain = bo->prefered_domains;
-	param.wait = false;
-	amdgpu_vm_validate_pt_bos(amdgpu_ttm_adev(bo->tbo.bdev), vm,
-			amdgpu_amdkfd_validate, &param);
 
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
 				     false, &ctx->duplicates);
@@ -801,7 +791,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 {
 	struct amdgpu_bo *bo = mem->bo;
 	struct kfd_bo_va_list *entry;
-	struct amdgpu_vm_parser param;
 	unsigned i;
 	int ret;
 
@@ -848,13 +837,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 		i++;
 	}
 
-	if (vm) {
-		param.domain = bo->prefered_domains;
-		param.wait = false;
-		amdgpu_vm_validate_pt_bos(amdgpu_ttm_adev(bo->tbo.bdev), vm,
-				amdgpu_amdkfd_validate, &param);
-	}
-
 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
 				     false, &ctx->duplicates);
 	if (!ret)
@@ -1044,7 +1026,7 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 	/* PT BOs may be created during amdgpu_vm_bo_map() call,
 	 * so we have to validate the newly created PT BOs.
 	 */
-	ret = validate_pt_pd_bos(entry->bo_va->vm);
+	ret = validate_pt_pd_bos(adev, entry->bo_va->vm);
 	if (ret != 0) {
 		pr_err("validate_pt_pd_bos() failed\n");
 		return ret;
@@ -2114,6 +2096,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
 	struct kgd_mem *mem;
 	struct bo_vm_reservation_context ctx;
 	struct amdgpu_amdkfd_fence *old_fence;
+	struct amdgpu_device *adev;
+	struct amdgpu_vm_parser param;
 	int ret = 0, i;
 
 	if (WARN_ON(master_vm == NULL || master_vm->master != master_vm))
@@ -2184,6 +2168,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
 			goto validate_map_fail;
 		}
 	}
+        /* This isn't used for PTs any more, but can there be other
+	 * duplicates? */
+	WARN_ONCE(!list_empty(&ctx.duplicates), "Duplicates not empty");
 	list_for_each_entry(entry, &ctx.duplicates, tv.head) {
 		struct amdgpu_bo *bo = entry->robj;
 
@@ -2194,6 +2181,28 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
 			goto validate_map_fail;
 		}
 	}
+	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
+	param.wait = false;
+	adev = amdgpu_ttm_adev(master_vm->base.page_directory->tbo.bdev);
+	ret = amdgpu_vm_validate_pt_bos(adev, &master_vm->base,
+					amdgpu_amdkfd_validate, &param);
+	if (ret) {
+		pr_debug("Memory eviction: Validate failed. Try again\n");
+		goto validate_map_fail;
+	}
+	master_vm->base.last_eviction_counter =
+		atomic64_read(&adev->num_evictions);
+	list_for_each_entry(peer_vm, &master_vm->kfd_vm_list, kfd_vm_list) {
+		adev = amdgpu_ttm_adev(peer_vm->base.page_directory->tbo.bdev);
+		ret = amdgpu_vm_validate_pt_bos(adev, &peer_vm->base,
+						amdgpu_amdkfd_validate, &param);
+		if (ret) {
+			pr_debug("Memory eviction: Validate failed. Try again\n");
+			goto validate_map_fail;
+		}
+		peer_vm->base.last_eviction_counter =
+			atomic64_read(&adev->num_evictions);
+	}
 
 	/* Wait for PT/PD validate to finish and attach eviction fence.
 	 * PD/PT share the same reservation object
-- 
2.7.4