aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch
blob: 27bb233ac15e640b352f64d752ab1855465e9282 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
From f22f930895f11827f355a261704ddbd017a9c0e3 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 13 Oct 2017 20:20:08 -0400
Subject: [PATCH 2123/4131] drm/amdkfd: Fix signal handling performance
 regression

Initialize all event slots to "unsignaled" when the events page is
allocated.

When looking for signaled events, the best strategy for finding all
signaled events depends on how densely-populated the event page is.
For many events, it's faster to iterate over event slots and look up
only the unsignaled ones from the IDR. For few events, it's faster
to iterate over the IDR and skip all unused event slots.

Bug: SWDEV-135399

Change-Id: I018e40c8fa8085d468ccede0f518bbb7f31a3443
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 35 +++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 69c3728..16e3a72 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -78,11 +78,15 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 	if (!page)
 		return NULL;
 
-	backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	backing_store = (void *) __get_free_pages(GFP_KERNEL,
 					get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
 	if (!backing_store)
 		goto fail_alloc_signal_store;
 
+	/* Initialize all events to unsignaled */
+	memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
+	       KFD_SIGNAL_EVENT_LIMIT * 8);
+
 	page->kernel_address = backing_store;
 	pr_debug("Allocated new event signal page at %p, for process %p\n",
 			page, p);
@@ -125,6 +129,10 @@ static struct kfd_signal_page *allocate_signal_page_dgpu(
 	if (!my_page)
 		return NULL;
 
+	/* Initialize all events to unsignaled */
+	memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
+	       KFD_SIGNAL_EVENT_LIMIT * 8);
+
 	my_page->kernel_address = kernel_address;
 	my_page->handle = handle;
 	my_page->user_address = NULL;
@@ -436,12 +444,27 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 		uint64_t *slots = page_slots(p->signal_page);
 		uint32_t id;
 
-		idr_for_each_entry(&p->event_idr, ev, id) {
-			if (id >= KFD_SIGNAL_EVENT_LIMIT)
-				break;
+		if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+			/* With relatively few events, it's faster to
+			 * iterate over the event IDR
+			 */
+			idr_for_each_entry(&p->event_idr, ev, id) {
+				if (id >= KFD_SIGNAL_EVENT_LIMIT)
+					break;
 
-			if (slots[id] != UNSIGNALED_EVENT_SLOT)
-				set_event_from_interrupt(p, ev);
+				if (slots[id] != UNSIGNALED_EVENT_SLOT)
+					set_event_from_interrupt(p, ev);
+			}
+		} else {
+			/* With relatively many events, it's faster to
+			 * iterate over the signal slots and lookup
+			 * only signaled events from the IDR.
+			 */
+			for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+				if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+					ev = lookup_event_by_id(p, id);
+					set_event_from_interrupt(p, ev);
+				}
 		}
 	}
 
-- 
2.7.4