aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/1618-drm-amdgpu-Use-separate-resv.-list-in-restore.patch
blob: b45fbbf4ed5ddef3b35888dfdbf17b0644ee1037 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
From a6863fb8d017749b01ec2ed3bf783e6bf9b8caf7 Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Fri, 10 Mar 2017 18:07:01 -0500
Subject: [PATCH 1618/4131] drm/amdgpu: Use separate resv. list in restore

ttm_eu_reserve_buffers() can reorder the items in the resv. list. This
could corrupt the kfd_bo_list, instead create a separate list for
reserve and unreserve.

Change-Id: I2690e2c0f5264d28c3b63d85ec94783e1ccae810
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 73 +++++++++++-------------
 2 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 1fdade9..262a4fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -50,6 +50,7 @@ struct kgd_mem {
         struct list_head bo_va_list;
 	/* protected by amdkfd_process_info.lock */
         struct ttm_validate_buffer validate_list;
+	struct ttm_validate_buffer resv_list;
         uint32_t domain;
         unsigned int mapped_to_gpu_memory;
         void *kptr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 358bb72..a8482ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2097,18 +2097,19 @@ int amdgpu_amdkfd_gpuvm_restore_mem(struct kgd_mem *mem, struct mm_struct *mm)
  * should be called when the Process is still valid. BO restore involves -
  *
  * 1.  Release old eviction fence and create new one
- * 2.  Get PD BO list and PT BO list from all the VMs.
- * 3.  Merge PD BO list into KFD BO list. Reserve all BOs.
- * 4.  Split the list into PD BO list and KFD BO list
- * 5.  Validate of PD and PT BOs and add new fence to the BOs
- * 6.  Validate all KFD BOs and Map them and add new fence
- * 7.  Merge PD BO list into KFD BO list. Unreserve all BOs
- * 8.  Restore back KFD BO list by removing PD BO entries
+ * 2.  Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
+ * 3   Use the second PD list and kfd_bo_list to create a list (ctx.list) of
+ *     BOs that need to be reserved.
+ * 4.  Reserve all the BOs
+ * 5.  Validate of PD and PT BOs.
+ * 6.  Validate all KFD BOs using kfd_bo_list and Map them and add new fence
+ * 7.  Add fence to all PD and PT BOs.
+ * 8.  Unreserve all BOs
  */
 
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
 {
-	struct amdgpu_bo_list_entry *pd_bo_list, *last_pd_bo_entry, *entry;
+	struct amdgpu_bo_list_entry *pd_bo_list, *entry;
 	struct amdkfd_process_info *process_info = info;
 	struct amdkfd_vm *peer_vm;
 	struct kgd_mem *mem;
@@ -2117,17 +2118,17 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
 	struct amdgpu_device *adev;
 	struct amdgpu_vm_parser param;
 	int ret = 0, i;
-	struct list_head duplicate_save;
+	struct list_head duplicate_save, pd_list;
 
 	if (WARN_ON(!process_info))
 		return -EINVAL;
 
 	INIT_LIST_HEAD(&duplicate_save);
-
+	INIT_LIST_HEAD(&pd_list);
 	INIT_LIST_HEAD(&ctx.list);
 	INIT_LIST_HEAD(&ctx.duplicates);
 
-	pd_bo_list = kcalloc(process_info->n_vms,
+	pd_bo_list = kcalloc(process_info->n_vms * 2,
 			     sizeof(struct amdgpu_bo_list_entry),
 			     GFP_KERNEL);
 	if (pd_bo_list == NULL)
@@ -2151,39 +2152,36 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			vm_list_node) {
 		amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list,
-				    &pd_bo_list[i]);
-		i++;
+				    &pd_bo_list[i++]);
+		amdgpu_vm_get_pd_bo(&peer_vm->base, &pd_list,
+				    &pd_bo_list[i++]);
 	}
 
-	/* Needed to splicing and cutting the lists */
-	last_pd_bo_entry = list_last_entry(&ctx.list,
-					   struct amdgpu_bo_list_entry,
-					   tv.head);
+	/* Reserve all BOs and page tables/directory. Add all BOs from
+	 * kfd_bo_list to ctx.list
+	 */
+	list_for_each_entry(mem, &process_info->kfd_bo_list,
+			    validate_list.head) {
 
-	/* Reserve all BOs and page tables/directory. */
-	list_splice_init(&ctx.list, &process_info->kfd_bo_list);
-	ret = ttm_eu_reserve_buffers(&ctx.ticket, &process_info->kfd_bo_list,
+		list_add_tail(&mem->resv_list.head, &ctx.list);
+		mem->resv_list.bo = mem->validate_list.bo;
+		mem->resv_list.shared = mem->validate_list.shared;
+	}
+
+	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
 				     false, &duplicate_save);
 	if (ret) {
 		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
 		goto ttm_reserve_fail;
 	}
 
-	/* Ensure kfd_bo_list does not change after ttm_eu_reserve_buffers(),
-	 * so that the following list operation such as list_cut_position()
-	 * can work as expected.
-	 */
 	if (!list_empty(&duplicate_save))
 		pr_err("BUG: list of BOs to reserve has duplicates!\n");
 
-	/* Restore kfd_bo_list. ctx.list contains only PDs */
-	list_cut_position(&ctx.list, &process_info->kfd_bo_list,
-			  &last_pd_bo_entry->tv.head);
-
 	amdgpu_sync_create(&ctx.sync);
 
 	/* Validate PDs*/
-	list_for_each_entry(entry, &ctx.list, tv.head) {
+	list_for_each_entry(entry, &pd_list, tv.head) {
 		struct amdgpu_bo *bo = entry->robj;
 
 		ret = amdgpu_amdkfd_bo_validate(bo, bo->prefered_domains,
@@ -2211,10 +2209,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
 				atomic64_read(&adev->num_evictions);
 	}
 
-	/* Wait for PD/PTs validate to finish and attach eviction fence.
-	 * PD/PT share the same reservation object
-	 */
-	list_for_each_entry(entry, &ctx.list, tv.head) {
+	/* Wait for PD/PTs validate to finish */
+	list_for_each_entry(entry, &pd_list, tv.head) {
 		struct amdgpu_bo *bo = entry->robj;
 
 		ttm_bo_wait(&bo->tbo, false, false);
@@ -2258,20 +2254,17 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info)
 		ttm_bo_wait(&bo->tbo, false, false);
 		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
 	}
-	list_for_each_entry(entry, &ctx.list, tv.head) {
+
+	/* Attach eviction fence to PD / PT BOs */
+	list_for_each_entry(entry, &pd_list, tv.head) {
 		struct amdgpu_bo *bo = entry->robj;
 
 		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
 	}
 validate_map_fail:
-	/* Add PDs to kfd_bo_list for unreserve */
-	list_splice_init(&ctx.list, &process_info->kfd_bo_list);
-	ttm_eu_backoff_reservation(&ctx.ticket, &process_info->kfd_bo_list);
+	ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
 	amdgpu_sync_free(&ctx.sync);
 ttm_reserve_fail:
-	/* Restore kfd_bo_list */
-	list_cut_position(&ctx.list, &process_info->kfd_bo_list,
-			  &last_pd_bo_entry->tv.head);
 	mutex_unlock(&process_info->lock);
 evict_fence_fail:
 	kfree(pd_bo_list);
-- 
2.7.4