1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
From 27a5f1ee886a62554a57c71b1c7c9572e65182fb Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Fri, 16 Dec 2016 17:18:36 -0500
Subject: [PATCH 1591/4131] drm/amdkfd: Fix a small bug that a variable may be
polluted
ctx.duplicates in the code should not be used as the parameter of
ttm_eu_reserve_buffers(), because it may be polluted, but it should
not, as it will be used later in the code.
Error messages are also updated to make debugging easier.
Change-Id: I340a7f33b4401603c717c1d18130ef7b00289ac0
Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Conflicts:
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 26 ++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index c27abc7..3f2bc2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2069,10 +2069,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
struct amdgpu_device *adev;
struct amdgpu_vm_parser param;
int ret = 0, i;
+ struct list_head duplicate_save;
if (WARN_ON(master_vm == NULL || master_vm->master != master_vm))
return -EINVAL;
+ INIT_LIST_HEAD(&duplicate_save);
+
INIT_LIST_HEAD(&ctx.list);
INIT_LIST_HEAD(&ctx.duplicates);
@@ -2095,7 +2098,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
goto evict_fence_fail;
}
- /* Get PD BO list and PT BO list from all the VMs */
+ /* Get PD BO list from the VM */
amdgpu_vm_get_pd_bo(&master_vm->base, &ctx.list,
&pd_bo_list[0]);
@@ -2115,26 +2118,33 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
mutex_lock(&master_vm->lock);
list_splice_init(&ctx.list, &master_vm->kfd_bo_list);
ret = ttm_eu_reserve_buffers(&ctx.ticket, &master_vm->kfd_bo_list,
- false, &ctx.duplicates);
+ false, &duplicate_save);
if (ret) {
pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
goto ttm_reserve_fail;
}
+ /* Ensure kfd_bo_list does not change after ttm_eu_reserve_buffers(),
+ * so that the following list operation such as list_cut_position()
+ * can work as expected.
+ */
+ if (!list_empty(&duplicate_save))
+ pr_err("BUG: list of BOs to reserve has duplicates!\n");
+
/* Restore kfd_bo_list. ctx.list contains only PDs */
list_cut_position(&ctx.list, &master_vm->kfd_bo_list,
&last_pd_bo_entry->tv.head);
amdgpu_sync_create(&ctx.sync);
- /* Validate PDs and PTs */
+ /* Validate PDs*/
list_for_each_entry(entry, &ctx.list, tv.head) {
struct amdgpu_bo *bo = entry->robj;
ret = amdgpu_amdkfd_bo_validate(bo, bo->prefered_domains,
false);
if (ret) {
- pr_debug("Memory eviction: Validate failed. Try again\n");
+ pr_debug("Memory eviction: Validate PD failed. Try again\n");
goto validate_map_fail;
}
}
@@ -2147,7 +2157,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
ret = amdgpu_amdkfd_bo_validate(bo, bo->prefered_domains,
false);
if (ret) {
- pr_debug("Memory eviction: Validate failed. Try again\n");
+ pr_debug("Memory eviction: Validate PTs failed. Try again\n");
goto validate_map_fail;
}
}
@@ -2174,7 +2184,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
atomic64_read(&adev->num_evictions);
}
- /* Wait for PT/PD validate to finish and attach eviction fence.
+ /* Wait for PD/PTs validate to finish and attach eviction fence.
* PD/PT share the same reservation object
*/
list_for_each_entry(entry, &ctx.list, tv.head) {
@@ -2194,7 +2204,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
if (ret) {
- pr_debug("Memory eviction: Validate failed. Try again\n");
+ pr_debug("Memory eviction: Validate BOs failed. Try again\n");
goto validate_map_fail;
}
@@ -2205,7 +2215,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *m_vm)
bo_va_entry,
&ctx.sync);
if (ret) {
- pr_debug("Memory eviction: Map failed. Try again\n");
+ pr_debug("Memory eviction: update PTE failed. Try again\n");
goto validate_map_fail;
}
}
--
2.7.4
|