aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch91
1 files changed, 91 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch
new file mode 100644
index 00000000..27bb233a
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch
@@ -0,0 +1,91 @@
+From f22f930895f11827f355a261704ddbd017a9c0e3 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Fri, 13 Oct 2017 20:20:08 -0400
+Subject: [PATCH 2123/4131] drm/amdkfd: Fix signal handling performance
+ regression
+
+Initialize all event slots to "unsignaled" when the events page is
+allocated.
+
+When looking for signaled events, the best strategy for finding all
+signaled events depends on how densely-populated the event page is.
+For many events, it's faster to iterate over event slots and look up
+only the unsignaled ones from the IDR. For few events, it's faster
+to iterate over the IDR and skip all unused event slots.
+
+Bug: SWDEV-135399
+
+Change-Id: I018e40c8fa8085d468ccede0f518bbb7f31a3443
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 35 +++++++++++++++++++++++++++------
+ 1 file changed, 29 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+index 69c3728..16e3a72 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+@@ -78,11 +78,15 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
+ if (!page)
+ return NULL;
+
+- backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
++ backing_store = (void *) __get_free_pages(GFP_KERNEL,
+ get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
+ if (!backing_store)
+ goto fail_alloc_signal_store;
+
++ /* Initialize all events to unsignaled */
++ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
++ KFD_SIGNAL_EVENT_LIMIT * 8);
++
+ page->kernel_address = backing_store;
+ pr_debug("Allocated new event signal page at %p, for process %p\n",
+ page, p);
+@@ -125,6 +129,10 @@ static struct kfd_signal_page *allocate_signal_page_dgpu(
+ if (!my_page)
+ return NULL;
+
++ /* Initialize all events to unsignaled */
++ memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT,
++ KFD_SIGNAL_EVENT_LIMIT * 8);
++
+ my_page->kernel_address = kernel_address;
+ my_page->handle = handle;
+ my_page->user_address = NULL;
+@@ -436,12 +444,27 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
+ uint64_t *slots = page_slots(p->signal_page);
+ uint32_t id;
+
+- idr_for_each_entry(&p->event_idr, ev, id) {
+- if (id >= KFD_SIGNAL_EVENT_LIMIT)
+- break;
++ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
++ /* With relatively few events, it's faster to
++ * iterate over the event IDR
++ */
++ idr_for_each_entry(&p->event_idr, ev, id) {
++ if (id >= KFD_SIGNAL_EVENT_LIMIT)
++ break;
+
+- if (slots[id] != UNSIGNALED_EVENT_SLOT)
+- set_event_from_interrupt(p, ev);
++ if (slots[id] != UNSIGNALED_EVENT_SLOT)
++ set_event_from_interrupt(p, ev);
++ }
++ } else {
++ /* With relatively many events, it's faster to
++ * iterate over the signal slots and lookup
++ * only signaled events from the IDR.
++ */
++ for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
++ if (slots[id] != UNSIGNALED_EVENT_SLOT) {
++ ev = lookup_event_by_id(p, id);
++ set_event_from_interrupt(p, ev);
++ }
+ }
+ }
+
+--
+2.7.4
+