meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/0167-drm-amdgpu-use-kiq-to-do-invalidate-tlb.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

From 4ba0823c9eaa7d4b998da2c50735f6fe57f2580e Mon Sep 17 00:00:00 2001
From: Emily Deng <Emily.Deng@amd.com>
Date: Fri, 17 Aug 2018 18:25:36 +0800
Subject: [PATCH 0167/2940] drm/amdgpu: use kiq to do invalidate tlb
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To avoid the tlb flush not interrupted by world switch, use kiq and one
command to do tlb invalidate.

v2:
Refine the invalidate lock position.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
Reviewed-and-Tested-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c |  3 -
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 74 +++++++++++++++++++++---
 3 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f5fcee141fbb..87bfb5ae7a45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -217,6 +217,10 @@ enum amdgpu_kiq_irq {
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
 
+#define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
 int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
 					   enum amd_clockgating_state state);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 21adb1b6e5cb..38856365580d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,9 +22,6 @@
  */
 
 #include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT	5000 /* in usecs, 5ms */
-#define MAX_KIQ_REG_BAILOUT_INTERVAL	5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
 
 uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 7300be4816a9..46183c7730c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -311,6 +311,58 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
 	return req;
 }
 
+signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	signed long r, cnt = 0;
+	unsigned long flags;
+	uint32_t seq;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *ring = &kiq->ring;
+
+	if (!ring->ready)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	amdgpu_ring_alloc(ring, 32);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
+					    ref, mask);
+	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_commit(ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+	/* don't wait anymore for gpu reset case because this way may
+	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+	 * never return if we keep waiting in virt_kiq_rreg, which cause
+	 * gpu_recover() hang there.
+	 *
+	 * also don't wait anymore for IRQ context
+	 * */
+	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+		goto failed_kiq;
+
+	might_sleep();
+
+	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+	}
+
+	if (cnt > MAX_KIQ_REG_TRY)
+		goto failed_kiq;
+
+	return 0;
+
+failed_kiq:
+	pr_err("failed to invalidate tlb with kiq\n");
+	return r;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -332,13 +384,19 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 	/* Use register 17 for GART */
 	const unsigned eng = 17;
 	unsigned i, j;
-
-	spin_lock(&adev->gmc.invalidate_lock);
+	int r;
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &adev->vmhub[i];
 		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
+		r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
+			hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
+		if (!r)
+			continue;
+
+		spin_lock(&adev->gmc.invalidate_lock);
+
 		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
 		/* Busy wait for ACK.*/
@@ -349,8 +407,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 				break;
 			cpu_relax();
 		}
-		if (j < 100)
+		if (j < 100) {
+			spin_unlock(&adev->gmc.invalidate_lock);
 			continue;
+		}
 
 		/* Wait for ACK with a delay.*/
 		for (j = 0; j < adev->usec_timeout; j++) {
@@ -360,13 +420,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 				break;
 			udelay(1);
 		}
-		if (j < adev->usec_timeout)
+		if (j < adev->usec_timeout) {
+			spin_unlock(&adev->gmc.invalidate_lock);
 			continue;
-
+		}
+		spin_unlock(&adev->gmc.invalidate_lock);
 		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 	}
-
-	spin_unlock(&adev->gmc.invalidate_lock);
 }
 
 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
-- 
2.17.1