aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1504-drm-amdkfd-Fix-unbound-PASID-issue-when-process-term.patch
blob: b9a575abf5bb9c0345f15ff9705463714410ac8e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
From 30f006d99b9097263ea61ad50486903916d8b246 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 7 Sep 2016 19:44:21 -0400
Subject: [PATCH 1504/4131] drm/amdkfd: Fix unbound PASID issue when process
 terminates on APU

When a process with a long wave running is terminated using Ctrl+C on
APU, we may hang the HW because when dequeueing the terminating queues
on HW scheduler, the ATC translation fails due to unbound PASID. We
should bring forward the dequeueing into IOMMU callback when PASID is
still bound.

BUG: SWDEV-101676

Change-Id: I258551a449fb9e6acbff005e7f13f4cebd9966dd
Signed-off-by: Yong Zhao <yong.zhao@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                  |  7 +++++++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c               | 10 ++++++----
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  4 ++++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 539280b..4eb7354 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -598,6 +598,13 @@ struct kfd_process_device {
 	 * wavefronts on process termination
 	 */
 	bool reset_wavefronts;
+
+	/* Flag used to tell the pdd has dequeued from the dqm.
+	 * This is used to prevent dev->dqm->ops.process_termination() from
+	 * being called twice when it is already called in IOMMU callback
+	 * function.
+	 */
+	bool already_dequeued;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d65dbc5..c98b5da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -726,6 +726,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 		pdd->reset_wavefronts = false;
 		pdd->process = p;
 		pdd->bound = PDD_UNBOUND;
+		pdd->already_dequeued = false;
 		list_add(&pdd->per_device_list, &p->per_device_data);
 
 		/* Init idr used for memory handle translation */
@@ -869,10 +870,11 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
 	down_write(&p->lock);
 
 	pdd = kfd_get_process_device_data(dev, p);
-	if (pdd->reset_wavefronts) {
-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
-		pdd->reset_wavefronts = false;
-	}
+	if (pdd)
+		/* For GPU relying on IOMMU, we need to dequeue here
+		 * when PASID is still bound.
+		 */
+		kfd_process_dequeue_from_device(pdd);
 
 	up_write(&p->lock);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 46d0d93..af90b0a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -76,7 +76,11 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
 	struct kfd_process *p = pdd->process;
 	int retval;
 
+	if (pdd->already_dequeued)
+		return;
+
 	retval = dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+	pdd->already_dequeued = true;
 	/* Checking pdd->reset_wavefronts may not be needed, because
 	 * if reset_wavefronts was set to true before, which means unmapping
 	 * failed, process_termination should fail too until we reset
-- 
2.7.4