diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch | 465 |
1 files changed, 465 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch new file mode 100644 index 00000000..369455b8 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch @@ -0,0 +1,465 @@ +From 91bad65959a1d04e5ed73053f72119b6abb5c945 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Fri, 27 Oct 2017 19:35:27 -0400 +Subject: [PATCH 3371/4131] drm/amdkfd: Simplify event ID and signal slot + management + +Signal slots are identical to event IDs. + +Replace the used_slot_bitmap and events hash table with an IDR to +allocate and lookup event IDs and signal slots more efficiently. + +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +Acked-by: Oded Gabbay <oded.gabbay@gmail.com> +Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++---------------------- + drivers/gpu/drm/amd/amdkfd/kfd_events.h | 14 +- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +- + 3 files changed, 80 insertions(+), 170 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index 7cc1710..41580e0 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -41,24 +41,16 @@ struct kfd_event_waiter { + bool activated; /* Becomes true when event is signaled */ + }; + +-#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT +-#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE) +- + /* +- * Over-complicated pooled allocator for event notification slots. +- * + * Each signal event needs a 64-bit signal slot where the signaler will write +- * a 1 before sending an interrupt.l (This is needed because some interrupts ++ * a 1 before sending an interrupt. (This is needed because some interrupts + * do not contain enough spare data bits to identify an event.) +- * We get whole pages from vmalloc and map them to the process VA. +- * Individual signal events are then allocated a slot in a page. ++ * We get whole pages and map them to the process VA. ++ * Individual signal events use their event_id as slot index. + */ +- + struct kfd_signal_page { + uint64_t *kernel_address; + uint64_t __user *user_address; +- unsigned int free_slots; +- unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS]; + }; + + /* +@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page) + return page->kernel_address; + } + +-static bool allocate_free_slot(struct kfd_process *process, +- unsigned int *out_slot_index) +-{ +- struct kfd_signal_page *page = process->signal_page; +- unsigned int slot; +- +- if (!page || page->free_slots == 0) { +- pr_debug("No free event signal slots were found for process %p\n", +- process); +- +- return false; +- } +- +- slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE); +- +- __set_bit(slot, page->used_slot_bitmap); +- page->free_slots--; +- +- page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT; +- +- *out_slot_index = slot; +- +- pr_debug("Allocated event signal slot in page %p, slot %d\n", +- page, slot); +- +- return true; +-} +- + static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) + { + void *backing_store; +@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) + if (!page) + return NULL; + +- page->free_slots = SLOTS_PER_PAGE; +- + backing_store = (void *) __get_free_pages(GFP_KERNEL, + get_order(KFD_SIGNAL_EVENT_LIMIT * 8)); + if (!backing_store) +@@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) + return NULL; + } + +-static bool allocate_event_notification_slot(struct kfd_process *p, +- unsigned int *signal_slot_index) ++static int allocate_event_notification_slot(struct kfd_process *p, ++ struct kfd_event *ev) + { ++ int id; ++ + if (!p->signal_page) { + p->signal_page = allocate_signal_page(p); + if (!p->signal_page) +- return false; ++ return -ENOMEM; + } + +- return allocate_free_slot(p, signal_slot_index); +-} ++ id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT, ++ GFP_KERNEL); ++ if (id < 0) ++ return id; + +-/* Assumes that the process's event_mutex is locked. */ +-static void release_event_notification_slot(struct kfd_signal_page *page, +- size_t slot_index) +-{ +- __clear_bit(slot_index, page->used_slot_bitmap); +- page->free_slots++; ++ ev->event_id = id; ++ page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT; + +- /* We don't free signal pages, they are retained by the process +- * and reused until it exits. +- */ ++ return 0; + } + + /* +@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page, + */ + static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) + { +- struct kfd_event *ev; +- +- hash_for_each_possible(p->events, ev, events, id) +- if (ev->event_id == id) +- return ev; +- +- return NULL; +-} +- +-/* +- * Produce a kfd event id for a nonsignal event. +- * These are arbitrary numbers, so we do a sequential search through +- * the hash table for an unused number. +- */ +-static u32 make_nonsignal_event_id(struct kfd_process *p) +-{ +- u32 id; +- +- for (id = p->next_nonsignal_event_id; +- id < KFD_LAST_NONSIGNAL_EVENT_ID && +- lookup_event_by_id(p, id); +- id++) +- ; +- +- if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { +- +- /* +- * What if id == LAST_NONSIGNAL_EVENT_ID - 1? +- * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so +- * the first loop fails immediately and we proceed with the +- * wraparound loop below. +- */ +- p->next_nonsignal_event_id = id + 1; +- +- return id; +- } +- +- for (id = KFD_FIRST_NONSIGNAL_EVENT_ID; +- id < KFD_LAST_NONSIGNAL_EVENT_ID && +- lookup_event_by_id(p, id); +- id++) +- ; +- +- +- if (id < KFD_LAST_NONSIGNAL_EVENT_ID) { +- p->next_nonsignal_event_id = id + 1; +- return id; +- } +- +- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; +- return 0; +-} +- +-static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p, +- unsigned int signal_slot) +-{ +- return lookup_event_by_id(p, signal_slot); ++ return idr_find(&p->event_idr, id); + } + + static int create_signal_event(struct file *devkfd, + struct kfd_process *p, + struct kfd_event *ev) + { ++ int ret; ++ + if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { + if (!p->signal_event_limit_reached) { + pr_warn("Signal event wasn't created because limit was reached\n"); + p->signal_event_limit_reached = true; + } +- return -ENOMEM; ++ return -ENOSPC; + } + +- if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) { ++ ret = allocate_event_notification_slot(p, ev); ++ if (ret) { + pr_warn("Signal event wasn't created because out of kernel memory\n"); +- return -ENOMEM; ++ return ret; + } + + p->signal_event_count++; + +- ev->user_signal_address = +- &p->signal_page->user_address[ev->signal_slot_index]; +- +- ev->event_id = ev->signal_slot_index; +- ++ ev->user_signal_address = &p->signal_page->user_address[ev->event_id]; + pr_debug("Signal event number %zu created with id %d, address %p\n", + p->signal_event_count, ev->event_id, + ev->user_signal_address); +@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd, + return 0; + } + +-/* +- * No non-signal events are supported yet. +- * We create them as events that never signal. +- * Set event calls from user-mode are failed. +- */ + static int create_other_event(struct kfd_process *p, struct kfd_event *ev) + { +- ev->event_id = make_nonsignal_event_id(p); +- if (ev->event_id == 0) +- return -ENOMEM; ++ /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an ++ * intentional integer overflow to -1 without a compiler ++ * warning. idr_alloc treats a negative value as "maximum ++ * signed integer". ++ */ ++ int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, ++ (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, ++ GFP_KERNEL); ++ ++ if (id < 0) ++ return id; ++ ev->event_id = id; + + return 0; + } +@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev) + void kfd_event_init_process(struct kfd_process *p) + { + mutex_init(&p->event_mutex); +- hash_init(p->events); ++ idr_init(&p->event_idr); + p->signal_page = NULL; +- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID; + p->signal_event_count = 0; + } + +@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) + waiter->event = NULL; + wake_up_all(&ev->wq); + +- if ((ev->type == KFD_EVENT_TYPE_SIGNAL || +- ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) { +- release_event_notification_slot(p->signal_page, +- ev->signal_slot_index); ++ if (ev->type == KFD_EVENT_TYPE_SIGNAL || ++ ev->type == KFD_EVENT_TYPE_DEBUG) + p->signal_event_count--; +- } + +- hash_del(&ev->events); ++ idr_remove(&p->event_idr, ev->event_id); + kfree(ev); + } + + static void destroy_events(struct kfd_process *p) + { + struct kfd_event *ev; +- struct hlist_node *tmp; +- unsigned int hash_bkt; ++ uint32_t id; + +- hash_for_each_safe(p->events, hash_bkt, tmp, ev, events) ++ idr_for_each_entry(&p->event_idr, ev, id) + destroy_event(p, ev); ++ idr_destroy(&p->event_idr); + } + + /* +@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, + if (!ret) { + *event_page_offset = KFD_MMAP_EVENTS_MASK; + *event_page_offset <<= PAGE_SHIFT; +- *event_slot_index = ev->signal_slot_index; ++ *event_slot_index = ev->event_id; + } + break; + default: +@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, + } + + if (!ret) { +- hash_add(p->events, &ev->events, ev->event_id); +- + *event_id = ev->event_id; + *event_trigger_data = ev->event_id; + } else { +@@ -469,17 +370,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) + + static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) + { +- page_slots(p->signal_page)[ev->signal_slot_index] = +- UNSIGNALED_EVENT_SLOT; +-} +- +-static bool is_slot_signaled(struct kfd_process *p, unsigned int index) +-{ +- if (!p->signal_page) +- return false; +- else +- return page_slots(p->signal_page)[index] != +- UNSIGNALED_EVENT_SLOT; ++ page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; + } + + static void set_event_from_interrupt(struct kfd_process *p, +@@ -518,13 +409,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + * ignore it, but we could use any bits we did receive to + * search faster. + */ +- unsigned int i; ++ uint64_t *slots = page_slots(p->signal_page); ++ uint32_t id; ++ ++ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { ++ /* With relatively few events, it's faster to ++ * iterate over the event IDR ++ */ ++ idr_for_each_entry(&p->event_idr, ev, id) { ++ if (id >= KFD_SIGNAL_EVENT_LIMIT) ++ break; + +- for (i = 0; i < SLOTS_PER_PAGE; i++) +- if (is_slot_signaled(p, i)) { +- ev = lookup_event_by_page_slot(p, i); +- set_event_from_interrupt(p, ev); ++ if (slots[id] != UNSIGNALED_EVENT_SLOT) ++ set_event_from_interrupt(p, ev); + } ++ } else { ++ /* With relatively many events, it's faster to ++ * iterate over the signal slots and lookup ++ * only signaled events from the IDR. ++ */ ++ for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) ++ if (slots[id] != UNSIGNALED_EVENT_SLOT) { ++ ev = lookup_event_by_id(p, id); ++ set_event_from_interrupt(p, ev); ++ } ++ } + } + + mutex_unlock(&p->event_mutex); +@@ -836,12 +745,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, + { + struct kfd_hsa_memory_exception_data *ev_data; + struct kfd_event *ev; +- int bkt; ++ uint32_t id; + bool send_signal = true; + + ev_data = (struct kfd_hsa_memory_exception_data *) event_data; + +- hash_for_each(p->events, bkt, ev, events) ++ id = KFD_FIRST_NONSIGNAL_EVENT_ID; ++ idr_for_each_entry_continue(&p->event_idr, ev, id) + if (ev->type == type) { + send_signal = false; + dev_dbg(kfd_device, +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h +index f85fcee..abca5bf 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h +@@ -31,9 +31,13 @@ + #include "kfd_priv.h" + #include <uapi/linux/kfd_ioctl.h> + +-#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U +-#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK +-#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX ++/* ++ * IDR supports non-negative integer IDs. Small IDs are used for ++ * signal events to match their signal slot. Use the upper half of the ++ * ID space for non-signal events. ++ */ ++#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1) ++#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX + + /* + * Written into kfd_signal_slot_t to indicate that the event is not signaled. +@@ -47,9 +51,6 @@ struct kfd_event_waiter; + struct signal_page; + + struct kfd_event { +- /* All events in process, rooted at kfd_process.events. */ +- struct hlist_node events; +- + u32 event_id; + + bool signaled; +@@ -60,7 +61,6 @@ struct kfd_event { + wait_queue_head_t wq; /* List of event waiters. */ + + /* Only for signal events. */ +- unsigned int signal_slot_index; + uint64_t __user *user_signal_address; + + /* type specific data */ +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index c1b3ee2..ebae8e1 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -31,6 +31,7 @@ + #include <linux/workqueue.h> + #include <linux/spinlock.h> + #include <linux/kfd_ioctl.h> ++#include <linux/idr.h> + #include <kgd_kfd_interface.h> + + #include "amd_shared.h" +@@ -538,11 +539,10 @@ struct kfd_process { + + /* Event-related data */ + struct mutex event_mutex; +- /* All events in process hashed by ID, linked on kfd_event.events. */ +- DECLARE_HASHTABLE(events, 4); ++ /* Event ID allocator and lookup */ ++ struct idr event_idr; + /* Event page */ + struct kfd_signal_page *signal_page; +- u32 next_nonsignal_event_id; + size_t signal_event_count; + bool signal_event_limit_reached; + }; +-- +2.7.4 + |