diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch new file mode 100644 index 00000000..27bb233a --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2123-drm-amdkfd-Fix-signal-handling-performance-regressio.patch @@ -0,0 +1,91 @@ +From f22f930895f11827f355a261704ddbd017a9c0e3 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Fri, 13 Oct 2017 20:20:08 -0400 +Subject: [PATCH 2123/4131] drm/amdkfd: Fix signal handling performance + regression + +Initialize all event slots to "unsignaled" when the events page is +allocated. + +When looking for signaled events, the best strategy for finding all +signaled events depends on how densely-populated the event page is. +For many events, it's faster to iterate over event slots and look up +only the unsignaled ones from the IDR. For few events, it's faster +to iterate over the IDR and skip all unused event slots. + +Bug: SWDEV-135399 + +Change-Id: I018e40c8fa8085d468ccede0f518bbb7f31a3443 +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 35 +++++++++++++++++++++++++++------ + 1 file changed, 29 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index 69c3728..16e3a72 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -78,11 +78,15 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) + if (!page) + return NULL; + +- backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, ++ backing_store = (void *) __get_free_pages(GFP_KERNEL, + get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); + if (!backing_store) + goto fail_alloc_signal_store; + ++ /* Initialize all events to unsignaled */ ++ memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, ++ KFD_SIGNAL_EVENT_LIMIT * 8); ++ + page->kernel_address = backing_store; + pr_debug("Allocated new event signal page at %p, for process %p\n", + page, p); +@@ -125,6 +129,10 @@ static struct kfd_signal_page *allocate_signal_page_dgpu( + if (!my_page) + return NULL; + ++ /* Initialize all events to unsignaled */ ++ memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT, ++ KFD_SIGNAL_EVENT_LIMIT * 8); ++ + my_page->kernel_address = kernel_address; + my_page->handle = handle; + my_page->user_address = NULL; +@@ -436,12 +444,27 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + uint64_t *slots = page_slots(p->signal_page); + uint32_t id; + +- idr_for_each_entry(&p->event_idr, ev, id) { +- if (id >= KFD_SIGNAL_EVENT_LIMIT) +- break; ++ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { ++ /* With relatively few events, it's faster to ++ * iterate over the event IDR ++ */ ++ idr_for_each_entry(&p->event_idr, ev, id) { ++ if (id >= KFD_SIGNAL_EVENT_LIMIT) ++ break; + +- if (slots[id] != UNSIGNALED_EVENT_SLOT) +- set_event_from_interrupt(p, ev); ++ if (slots[id] != UNSIGNALED_EVENT_SLOT) ++ set_event_from_interrupt(p, ev); ++ } ++ } else { ++ /* With relatively many events, it's faster to ++ * iterate over the signal slots and lookup ++ * only signaled events from the IDR. ++ */ ++ for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) ++ if (slots[id] != UNSIGNALED_EVENT_SLOT) { ++ ev = lookup_event_by_id(p, id); ++ set_event_from_interrupt(p, ev); ++ } + } + } + +-- +2.7.4 + |