aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1139-drm-amdkfd-Fix-leaking-HQD-in-SW-scheduler.patch
blob: ba6715f99c805591d12d977a15e161971df0f148 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
From 898601bab9aa0bba8cb3bd4e8ed2a75aab7d3b86 Mon Sep 17 00:00:00 2001
From: Amber Lin <Amber.Lin@amd.com>
Date: Fri, 27 May 2016 15:48:34 -0400
Subject: [PATCH 1139/4131] drm/amdkfd: Fix leaking HQD in SW scheduler

With software scheduler, process_termination_nocpsch doesn't deallocate HQD
when destroing the CP queue. However this is done properly in
destroy_queue_nocpsch. In order to avoid errors like this, this patch
factors out duplicated code in process_termination_nocpsch and
destroy_queue_nocpsch into destroy_queue_nocpsch_locked.

BUG: SWDEV-93672

Change-Id: I626dcef388b1ce91f159b1497d82c41630660ee8
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 94 +++++++++-------------
 1 file changed, 36 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e04832f..c973a3a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -314,48 +314,41 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 	return 0;
 }
 
-static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
+ * to avoid asynchronized access
+ */
+static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
-	int retval;
+	int retval = 0;
 	struct mqd_manager *mqd;
 
-	BUG_ON(!dqm || !q || !q->mqd || !qpd);
+	WARN_ON(!dqm || !q || !q->mqd || !qpd);
 
-	retval = 0;
-
-	pr_debug("kfd: In Func %s\n", __func__);
-
-	mutex_lock(&dqm->lock);
+	mqd = dqm->ops.get_mqd_manager(dqm,
+		get_mqd_type_from_queue_type(q->properties.type));
+	if (!mqd) {
+		retval = -ENOMEM;
+		goto out;
+	}
 
-	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
-		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-		if (mqd == NULL) {
-			retval = -ENOMEM;
-			goto out;
-		}
+	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 		deallocate_hqd(dqm, q);
-	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
-		if (mqd == NULL) {
-			retval = -ENOMEM;
-			goto out;
-		}
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		dqm->sdma_queue_count--;
 		deallocate_sdma_queue(dqm, q->sdma_id);
 	} else {
 		pr_debug("q->properties.type is invalid (%d)\n",
-				q->properties.type);
+			q->properties.type);
 		retval = -EINVAL;
-		goto out;
 	}
+	dqm->total_queue_count--;
 
 	retval = mqd->destroy_mqd(mqd, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 				KFD_HIQ_TIMEOUT,
 				q->pipe, q->queue);
-
 	if (retval != 0)
 		goto out;
 
@@ -367,16 +360,22 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
-	/*
-	 * Unconditionally decrement this counter, regardless of the queue's
-	 * type
-	 */
-	dqm->total_queue_count--;
-	pr_debug("Total of %d queues are accountable so far\n",
-			dqm->total_queue_count);
-
 out:
+	return retval;
+}
+
+static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+
+	BUG_ON(!dqm || !q || !q->mqd || !qpd);
+
+	mutex_lock(&dqm->lock);
+	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
 	mutex_unlock(&dqm->lock);
+
 	return retval;
 }
 
@@ -1376,37 +1375,16 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
 		struct qcm_process_device *qpd)
 {
 	struct queue *q, *next;
-	struct mqd_manager *mqd;
 	struct device_process_node *cur, *next_dpn;
+	int retval = 0;
 
 	mutex_lock(&dqm->lock);
 
 	/* Clear all user mode queues */
 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
-		mqd = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-		if (!mqd) {
-			mutex_unlock(&dqm->lock);
-			return -ENOMEM;
-		}
-
-		if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-			dqm->sdma_queue_count--;
-			deallocate_sdma_queue(dqm, q->sdma_id);
-		}
-
-		list_del(&q->list);
-		if (q->properties.is_active)
-			dqm->queue_count--;
-
-		dqm->total_queue_count--;
-		mqd->destroy_mqd(mqd, q->mqd,
-				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
-				KFD_HIQ_TIMEOUT,
-				q->pipe, q->queue);
-		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
-		if (list_empty(&qpd->queues_list))
-			deallocate_vmid(dqm, qpd, q);
+		retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
+		if (retval)
+			goto out;
 	}
 
 	/* Unregister process */
@@ -1419,9 +1397,9 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
 		}
 	}
 
+out:
 	mutex_unlock(&dqm->lock);
-
-	return 0;
+	return retval;
 }
 
 
-- 
2.7.4