aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1298-drm-amdkfd-Reset-process-queues-if-it-VM_FAULTs.patch
blob: efccde19864c2ca0fa3692896899674853a32b03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
From a93c77f2b3512cbe67090fdce034827adff9ef76 Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Thu, 15 Jun 2017 14:22:14 -0400
Subject: [PATCH 1298/4131] drm/amdkfd: Reset process queues if it VM_FAULTs

Currently, queues are preempt during process termination or if it
VM_FAULTs. Instead reset the queues.

BUG: SWDEV-110763

Change-Id: Ib088fb8426f22948ec9641c88586c5107a9ba442
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  4 +--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 38 ++++++++++++----------
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  3 +-
 3 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 6249c49..9ffadc0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -733,7 +733,7 @@ static int quiesce_process_mm(struct kfd_process *p)
 	unsigned int n_evicted = 0;
 
 	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
-		r = process_evict_queues(pdd->dev->dqm, &pdd->qpd);
+		r = process_evict_queues(pdd->dev->dqm, &pdd->qpd, false);
 		if (r != 0) {
 			pr_err("Failed to evict process queues\n");
 			goto fail;
@@ -805,7 +805,7 @@ int kgd2kfd_quiesce_mm(struct kfd_dev *kfd, struct mm_struct *mm)
 		r = -ENODEV;
 		pdd = kfd_get_process_device_data(kfd, p);
 		if (pdd)
-			r = process_evict_queues(kfd->dqm, &pdd->qpd);
+			r = process_evict_queues(kfd->dqm, &pdd->qpd, false);
 	} else {
 		r = quiesce_process_mm(p);
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 1c1535e..052a059 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm,
-				bool static_queues_included);
+				bool static_queues_included,
+				bool reset);
 static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 		enum kfd_unmap_queues_filter filter,
 		uint32_t filter_param, bool reset);
@@ -549,7 +550,8 @@ static struct mqd_manager *get_mqd_manager_nocpsch(
 }
 
 int process_evict_queues(struct device_queue_manager *dqm,
-		struct qcm_process_device *qpd)
+			 struct qcm_process_device *qpd,
+			 bool reset)
 {
 	struct queue *q, *next;
 	struct mqd_manager *mqd;
@@ -588,7 +590,7 @@ int process_evict_queues(struct device_queue_manager *dqm,
 			dqm->queue_count--;
 	}
 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
-		retval = execute_queues_cpsch(dqm, qpd->is_debug);
+		retval = execute_queues_cpsch(dqm, qpd->is_debug, reset);
 
 out:
 	mutex_unlock(&dqm->lock);
@@ -658,7 +660,7 @@ int process_restore_queues(struct device_queue_manager *dqm,
 		}
 	}
 	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
-		retval = execute_queues_cpsch(dqm, false);
+		retval = execute_queues_cpsch(dqm, false, false);
 
 	if (retval == 0)
 		qpd->evicted = 0;
@@ -974,7 +976,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
 	init_interrupts(dqm);
 
 	mutex_lock(&dqm->lock);
-	execute_queues_cpsch(dqm, false);
+	execute_queues_cpsch(dqm, false, false);
 	mutex_unlock(&dqm->lock);
 
 	return 0;
@@ -1022,7 +1024,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	list_add(&kq->list, &qpd->priv_queue_list);
 	dqm->queue_count++;
 	qpd->is_debug = true;
-	execute_queues_cpsch(dqm, false);
+	execute_queues_cpsch(dqm, false, false);
 	mutex_unlock(&dqm->lock);
 
 	return 0;
@@ -1037,7 +1039,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	list_del(&kq->list);
 	dqm->queue_count--;
 	qpd->is_debug = false;
-	execute_queues_cpsch(dqm, true);
+	execute_queues_cpsch(dqm, true, false);
 	/*
 	 * Unconditionally decrement this counter, regardless of the queue's
 	 * type.
@@ -1110,7 +1112,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	list_add(&q->list, &qpd->queues_list);
 	if (q->properties.is_active) {
 		dqm->queue_count++;
-		retval = execute_queues_cpsch(dqm, false);
+		retval = execute_queues_cpsch(dqm, false, false);
 	}
 
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
@@ -1158,10 +1160,11 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 }
 
 static int unmap_sdma_queues(struct device_queue_manager *dqm,
-				unsigned int sdma_engine)
+			     unsigned int sdma_engine,
+			     bool reset)
 {
 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
-			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
+			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, reset,
 			sdma_engine);
 }
 
@@ -1206,8 +1209,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 		dqm->sdma_queue_count);
 
 	if (dqm->sdma_queue_count > 0) {
-		unmap_sdma_queues(dqm, 0);
-		unmap_sdma_queues(dqm, 1);
+		unmap_sdma_queues(dqm, 0, reset);
+		unmap_sdma_queues(dqm, 1, reset);
 	}
 
 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
@@ -1234,7 +1237,8 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 
 /* dqm->lock mutex has to be locked before calling this function */
 static int execute_queues_cpsch(struct device_queue_manager *dqm,
-				bool static_queues_included)
+				bool static_queues_included,
+				bool reset)
 {
 	int retval;
 	enum kfd_unmap_queues_filter filter;
@@ -1243,7 +1247,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,
 			KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
 			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
 
-	retval = unmap_queues_cpsch(dqm, filter, 0, false);
+	retval = unmap_queues_cpsch(dqm, filter, 0, reset);
 	if (retval != 0) {
 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
 		return retval;
@@ -1297,7 +1301,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
-	retval = execute_queues_cpsch(dqm, false);
+	retval = execute_queues_cpsch(dqm, false, false);
 
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
@@ -1521,7 +1525,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 		}
 	}
 
-	retval = execute_queues_cpsch(dqm, true);
+	retval = execute_queues_cpsch(dqm, true, true);
 
 	/* lastly, free mqd resources */
 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
@@ -1654,7 +1658,7 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm,
 		return -EINVAL;
 	pdd = kfd_get_process_device_data(dqm->dev, p);
 	if (pdd)
-		ret = process_evict_queues(dqm, &pdd->qpd);
+		ret = process_evict_queues(dqm, &pdd->qpd, true);
 	kfd_unref_process(p);
 
 	return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 841283a..a492307 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -216,7 +216,8 @@ unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
 
 int process_evict_queues(struct device_queue_manager *dqm,
-		struct qcm_process_device *qpd);
+		struct qcm_process_device *qpd,
+		bool reset);
 int process_restore_queues(struct device_queue_manager *dqm,
 		struct qcm_process_device *qpd);
 
-- 
2.7.4