1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
From 930c5ff4390221cccf368b305c04351fbcf0dfcf Mon Sep 17 00:00:00 2001
From: Alexey Skidanov <alexey.skidanov@gmail.com>
Date: Tue, 25 Nov 2014 10:34:31 +0200
Subject: [PATCH 0124/1050] drm/amdkfd: Add bad opcode exception handling
Signed-off-by: Alexey Skidanov <alexey.skidanov@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 5 ++++-
drivers/gpu/drm/amd/amdkfd/cik_int.h | 1 +
drivers/gpu/drm/amd/amdkfd/kfd_events.c | 21 +++++++++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_events.h | 1 +
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 ++
5 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 629510a..211fc48 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -36,7 +36,8 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
/* Do not process in ISR, just request it to be forwarded to WQ. */
return (pasid != 0) &&
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
- ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG);
+ ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
+ ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
@@ -55,6 +56,8 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
kfd_signal_event_interrupt(pasid, 0, 0);
else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
kfd_signal_event_interrupt(pasid, ihre->data & 0xFF, 8);
+ else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
+ kfd_signal_hw_exception_event(pasid);
}
const struct kfd_event_interrupt_class event_interrupt_class_cik = {
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_int.h b/drivers/gpu/drm/amd/amdkfd/cik_int.h
index bbef9e2..79a16d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_int.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_int.h
@@ -34,6 +34,7 @@ struct cik_ih_ring_entry {
#define CIK_INTSRC_DEQUEUE_COMPLETE 0xC6
#define CIK_INTSRC_CP_END_OF_PIPE 0xB5
+#define CIK_INTSRC_CP_BAD_OPCODE 0xB7
#define CIK_INTSRC_SQ_INTERRUPT_MSG 0xEF
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index fa13d3e..5c3a81e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -936,3 +936,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
mutex_unlock(&p->event_mutex);
mutex_unlock(&p->mutex);
}
+
+void kfd_signal_hw_exception_event(unsigned int pasid)
+{
+ /*
+ * Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function returns a locked process.
+ */
+ struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+
+ if (!p)
+ return; /* Presumably process exited. */
+
+ mutex_lock(&p->event_mutex);
+
+ /* Lookup events by type and signal them */
+ lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
+
+ mutex_unlock(&p->event_mutex);
+ mutex_unlock(&p->mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 691cf85..28f6838 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -74,6 +74,7 @@ struct kfd_event {
/* Matching HSA_EVENTTYPE */
#define KFD_EVENT_TYPE_SIGNAL 0
+#define KFD_EVENT_TYPE_HW_EXCEPTION 3
#define KFD_EVENT_TYPE_DEBUG 5
#define KFD_EVENT_TYPE_MEMORY 8
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 3594503..9383494 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -682,6 +682,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_device_global_init_class device_global_init_class_cik;
enum kfd_event_wait_result {
KFD_WAIT_COMPLETE,
@@ -701,6 +702,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
void kfd_signal_iommu_event(struct kfd_dev *dev,
unsigned int pasid, unsigned long address,
bool is_write_requested, bool is_execute_requested);
+void kfd_signal_hw_exception_event(unsigned int pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
int kfd_event_create(struct file *devkfd, struct kfd_process *p,
--
1.9.1
|