aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3184-drm-amdgpu-Fix-potential-deadlock-when-restoring-BOs.patch
blob: 3fbb184ba3e43ea4a951cf3291c48659b00ca54d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
From c861d6b46c53645ff531e90e774bcca8de72c2d0 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 17 Jan 2018 11:39:31 -0500
Subject: [PATCH 3184/4131] drm/amdgpu: Fix potential deadlock when restoring
 BOs

Locking the PD reservation object when querying the PD address while
restarting the queues can lead to a deadlock with an eviction waiting
for the same restore to finish.

Avoid that by remembering the PD address every time we validate the
page directory. That way the address can be queried later without
locking the PD reservation again.

Change-Id: I00498db93ae9a61a43861c1a2363f99402922720
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>

Conflicts:
      drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 47 ++++++++----------------
 2 files changed, 17 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0dbba0f..be9b009 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -126,6 +126,8 @@ struct amdkfd_vm {
 	struct amdgpu_device *adev;
 	/* Points to the KFD process VM info*/
 	struct amdkfd_process_info *process_info;
+
+	uint64_t pd_phys_addr;
 };
 
 int amdgpu_amdkfd_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cb8b4ab..2409b56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -382,17 +382,18 @@ static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
  * again. Page directories are only updated after updating page
  * tables.
  */
-static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm)
 {
-	struct amdgpu_bo *pd = vm->root.base.bo;
+	struct amdgpu_bo *pd = vm->base.root.base.bo;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
 	struct amdgpu_vm_parser param;
+	uint64_t addr, flags = AMDGPU_PTE_VALID;
 	int ret;
 
 	param.domain = AMDGPU_GEM_DOMAIN_VRAM;
 	param.wait = false;
 
-	ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
+	ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate,
 					&param);
 	if (ret) {
 		pr_err("amdgpu: failed to validate PT BOs\n");
@@ -404,7 +405,12 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
 		pr_err("amdgpu: failed to validate PD\n");
 		return ret;
 	}
-	if (vm->use_cpu_for_update) {
+
+	addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo);
+	amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
+	vm->pd_phys_addr = addr;
+
+	if (vm->base.use_cpu_for_update) {
 		ret = amdgpu_bo_kmap(pd, NULL);
 		if (ret) {
 			pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
@@ -519,7 +525,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 		goto err_alloc_pts;
 	}
 
-	ret = vm_validate_pt_pd_bos(avm);
+	ret = vm_validate_pt_pd_bos(kvm);
 	if (ret != 0) {
 		pr_err("validate_pt_pd_bos() failed\n");
 		goto err_alloc_pts;
@@ -1366,31 +1372,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	return ret;
 }
 
-static u64 get_vm_pd_gpu_offset(void *vm)
-{
-	struct amdgpu_vm *avm = (struct amdgpu_vm *) vm;
-	struct amdgpu_device *adev =
-		amdgpu_ttm_adev(avm->root.base.bo->tbo.bdev);
-	u64 offset;
-	uint64_t flags = AMDGPU_PTE_VALID;
-
-	BUG_ON(avm == NULL);
-
-	amdgpu_bo_reserve(avm->root.base.bo, false);
-
-	offset = amdgpu_bo_gpu_offset(avm->root.base.bo);
-
-	amdgpu_bo_unreserve(avm->root.base.bo);
-
-	/* On some ASICs the FB doesn't start at 0. Adjust FB offset
-	 * to an actual MC address.
-	 */
-	if (adev->gmc.gmc_funcs->get_vm_pde)
-		amdgpu_gmc_get_vm_pde(adev, -1, &offset, &flags);
-
-	return offset;
-}
-
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
 					  void **process_info,
 					  struct dma_fence **ef)
@@ -1513,7 +1494,9 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
 
 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
 {
-	return get_vm_pd_gpu_offset(vm) >> AMDGPU_GPU_PAGE_SHIFT;
+	struct amdkfd_vm *avm = (struct amdkfd_vm *)vm;
+
+	return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
 }
 
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
@@ -1920,7 +1903,7 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
 
 	list_for_each_entry(peer_vm, &process_info->vm_list_head,
 			    vm_list_node) {
-		ret = vm_validate_pt_pd_bos(&peer_vm->base);
+		ret = vm_validate_pt_pd_bos(peer_vm);
 		if (ret)
 			return ret;
 	}
-- 
2.7.4