aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1540-drm-amdkfd-fix-zero-reading-of-VMID-and-PASID-for-Ha.patch
blob: 76746b0a4a19264532c72406fc28b27bfb31818b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
From 69dd0218c03469b032d6760fa71df50256a118d1 Mon Sep 17 00:00:00 2001
From: Lan Xiao <Lan.Xiao@amd.com>
Date: Thu, 13 Oct 2016 16:03:33 -0400
Subject: [PATCH 1540/4131] drm/amdkfd: fix zero reading of VMID and PASID for
 Hawaii

Upon VM Fault, the VMID and PASID written by HW are zeros in
Hawaii. Instead of reading from ih_ring_entry, read directly
from the registers. This workaround fix the soft hang issues
caused by mishandled VM Fault in Hawaii.

Fix BUG: SWDEV-100220

Change-Id: I1c89263e4bccde037d24f71f3efef7903d83d2f0
Signed-off-by: Lan Xiao <Lan.Xiao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 17 ++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c  | 24 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c           |  9 +++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c        |  7 +++++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h             |  7 +++++--
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  1 +
 6 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 3b812a8..07a843f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -147,6 +147,7 @@ static int write_config_static_mem(struct kgd_dev *kgd, bool swizzle_enable,
 		uint8_t element_size, uint8_t index_stride, uint8_t mtype);
 static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t page_table_base);
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
 
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
@@ -204,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
 			get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid =
 			get_atc_vmid_pasid_mapping_valid,
+	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
 	.alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
 	.free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
@@ -947,3 +949,18 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 	WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
 }
 
+ /**
+  * read_vmid_from_vmfault_reg - read vmid from register
+  *
+  * adev: amdgpu_device pointer
+  * @vmid: vmid pointer
+  * read vmid from register (CIK).
+  */
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+	return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 3f49f8e..c60a71a 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -37,12 +37,34 @@ static bool is_cpc_vm_fault(struct kfd_dev *dev,
 		return true;
 	return false;
 }
+
 static bool cik_event_interrupt_isr(struct kfd_dev *dev,
-					const uint32_t *ih_ring_entry)
+					const uint32_t *ih_ring_entry,
+					uint32_t *patched_ihre,
+					bool *patched_flag)
 {
 	const struct cik_ih_ring_entry *ihre =
 			(const struct cik_ih_ring_entry *)ih_ring_entry;
+	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
+	struct cik_ih_ring_entry *tmp_ihre =
+			(struct cik_ih_ring_entry *) patched_ihre;
+
+	/* This workaround is due to HW/FW limitation on Hawaii that
+	 * VMID and PASID are not written into ih_ring_entry
+	 */
+	if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
+		dev->device_info->asic_family == CHIP_HAWAII) {
+		*patched_flag = true;
+		*tmp_ihre = *ihre;
 
+		tmp_ihre->vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
+		tmp_ihre->pasid = f2g->get_atc_vmid_pasid_mapping_pasid(
+						 dev->kgd, tmp_ihre->vmid);
+		return (tmp_ihre->pasid != 0) &&
+			tmp_ihre->vmid >= dev->vm_info.first_vmid_kfd &&
+			tmp_ihre->vmid <= dev->vm_info.last_vmid_kfd;
+	}
 	/* Do not process in ISR, just request it to be forwarded to WQ. */
 	return (ihre->pasid != 0) &&
 		(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 6bab9db..dbbe3cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -674,14 +674,19 @@ static int kfd_resume(struct kfd_dev *kfd)
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
+	uint32_t patched_ihre[DIV_ROUND_UP(
+				kfd->device_info->ih_ring_entry_size,
+				sizeof(uint32_t))];
+	bool is_patched = false;
+
 	if (!kfd->init_complete)
 		return;
 
 	spin_lock(&kfd->interrupt_lock);
 
 	if (kfd->interrupts_active
-	    && interrupt_is_wanted(kfd, ih_ring_entry)
-	    && enqueue_ih_ring_entry(kfd, ih_ring_entry))
+	    && interrupt_is_wanted(kfd, ih_ring_entry, patched_ihre, &is_patched)
+	    && enqueue_ih_ring_entry(kfd, is_patched ? patched_ihre : ih_ring_entry))
 		queue_work(kfd->ih_wq, &kfd->interrupt_work);
 
 	spin_unlock(&kfd->interrupt_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 4d1639f..d737df0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -148,12 +148,15 @@ static void interrupt_wq(struct work_struct *work)
 		dev->device_info->event_interrupt_class->interrupt_wq(dev, ih_ring_entry);
 }
 
-bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
+bool interrupt_is_wanted(struct kfd_dev *dev,
+			const uint32_t *ih_ring_entry,
+			uint32_t *patched_ihre, bool *flag)
 {
 	/* integer and bitwise OR so there is no boolean short-circuiting */
 	unsigned wanted = 0;
 
-	wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, ih_ring_entry);
+	wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
+					 ih_ring_entry, patched_ihre, flag);
 
 	return wanted != 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 71a27f8..92b0adf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -175,7 +175,8 @@ enum asic_family_type {
 			   (chip) == CHIP_HAWAII)
 
 struct kfd_event_interrupt_class {
-	bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
+	bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry,
+				uint32_t *patched_ihre, bool *patched_flag);
 	void (*interrupt_wq)(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
 };
 
@@ -805,7 +806,9 @@ int kfd_interrupt_init(struct kfd_dev *dev);
 void kfd_interrupt_exit(struct kfd_dev *dev);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
 bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry);
-bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
+bool interrupt_is_wanted(struct kfd_dev *dev,
+				const uint32_t *ih_ring_entry,
+				uint32_t *patched_ihre, bool *flag);
 
 /* Power Management */
 void kgd2kfd_suspend(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 10e9f60..1971537 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -311,6 +311,7 @@ struct kfd2kgd_calls {
 	uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
 					struct kgd_dev *kgd,
 					uint8_t vmid);
+	uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
 	void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
 					uint8_t vmid);
 	int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va,
-- 
2.7.4