From f22f930895f11827f355a261704ddbd017a9c0e3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 13 Oct 2017 20:20:08 -0400 Subject: [PATCH 2123/4131] drm/amdkfd: Fix signal handling performance regression Initialize all event slots to "unsignaled" when the events page is allocated. When looking for signaled events, the best strategy for finding all signaled events depends on how densely-populated the event page is. For many events, it's faster to iterate over event slots and look up only the unsignaled ones from the IDR. For few events, it's faster to iterate over the IDR and skip all unused event slots. Bug: SWDEV-135399 Change-Id: I018e40c8fa8085d468ccede0f518bbb7f31a3443 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 35 +++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 69c3728..16e3a72 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -78,11 +78,15 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) if (!page) return NULL; - backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, + backing_store = (void *) __get_free_pages(GFP_KERNEL, get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); if (!backing_store) goto fail_alloc_signal_store; + /* Initialize all events to unsignaled */ + memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT, + KFD_SIGNAL_EVENT_LIMIT * 8); + page->kernel_address = backing_store; pr_debug("Allocated new event signal page at %p, for process %p\n", page, p); @@ -125,6 +129,10 @@ static struct kfd_signal_page *allocate_signal_page_dgpu( if (!my_page) return NULL; + /* Initialize all events to unsignaled */ + memset(kernel_address, (uint8_t) UNSIGNALED_EVENT_SLOT, + KFD_SIGNAL_EVENT_LIMIT * 8); + my_page->kernel_address = kernel_address; my_page->handle = handle; my_page->user_address = NULL; @@ -436,12 +444,27 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint64_t *slots = page_slots(p->signal_page); uint32_t id; - idr_for_each_entry(&p->event_idr, ev, id) { - if (id >= KFD_SIGNAL_EVENT_LIMIT) - break; + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + /* With relatively few events, it's faster to + * iterate over the event IDR + */ + idr_for_each_entry(&p->event_idr, ev, id) { + if (id >= KFD_SIGNAL_EVENT_LIMIT) + break; - if (slots[id] != UNSIGNALED_EVENT_SLOT) - set_event_from_interrupt(p, ev); + if (slots[id] != UNSIGNALED_EVENT_SLOT) + set_event_from_interrupt(p, ev); + } + } else { + /* With relatively many events, it's faster to + * iterate over the signal slots and lookup + * only signaled events from the IDR. + */ + for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) + if (slots[id] != UNSIGNALED_EVENT_SLOT) { + ev = lookup_event_by_id(p, id); + set_event_from_interrupt(p, ev); + } } } -- 2.7.4