aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch465
1 files changed, 465 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch
new file mode 100644
index 00000000..369455b8
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/3371-drm-amdkfd-Simplify-event-ID-and-signal-slot-managem.patch
@@ -0,0 +1,465 @@
+From 91bad65959a1d04e5ed73053f72119b6abb5c945 Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Fri, 27 Oct 2017 19:35:27 -0400
+Subject: [PATCH 3371/4131] drm/amdkfd: Simplify event ID and signal slot
+ management
+
+Signal slots are identical to event IDs.
+
+Replace the used_slot_bitmap and events hash table with an IDR to
+allocate and lookup event IDs and signal slots more efficiently.
+
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
+Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++----------------------
+ drivers/gpu/drm/amd/amdkfd/kfd_events.h | 14 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +-
+ 3 files changed, 80 insertions(+), 170 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+index 7cc1710..41580e0 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+@@ -41,24 +41,16 @@ struct kfd_event_waiter {
+ bool activated; /* Becomes true when event is signaled */
+ };
+
+-#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
+-#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
+-
+ /*
+- * Over-complicated pooled allocator for event notification slots.
+- *
+ * Each signal event needs a 64-bit signal slot where the signaler will write
+- * a 1 before sending an interrupt.l (This is needed because some interrupts
++ * a 1 before sending an interrupt. (This is needed because some interrupts
+ * do not contain enough spare data bits to identify an event.)
+- * We get whole pages from vmalloc and map them to the process VA.
+- * Individual signal events are then allocated a slot in a page.
++ * We get whole pages and map them to the process VA.
++ * Individual signal events use their event_id as slot index.
+ */
+-
+ struct kfd_signal_page {
+ uint64_t *kernel_address;
+ uint64_t __user *user_address;
+- unsigned int free_slots;
+- unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
+ };
+
+ /*
+@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page)
+ return page->kernel_address;
+ }
+
+-static bool allocate_free_slot(struct kfd_process *process,
+- unsigned int *out_slot_index)
+-{
+- struct kfd_signal_page *page = process->signal_page;
+- unsigned int slot;
+-
+- if (!page || page->free_slots == 0) {
+- pr_debug("No free event signal slots were found for process %p\n",
+- process);
+-
+- return false;
+- }
+-
+- slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
+-
+- __set_bit(slot, page->used_slot_bitmap);
+- page->free_slots--;
+-
+- page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
+-
+- *out_slot_index = slot;
+-
+- pr_debug("Allocated event signal slot in page %p, slot %d\n",
+- page, slot);
+-
+- return true;
+-}
+-
+ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
+ {
+ void *backing_store;
+@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
+ if (!page)
+ return NULL;
+
+- page->free_slots = SLOTS_PER_PAGE;
+-
+ backing_store = (void *) __get_free_pages(GFP_KERNEL,
+ get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
+ if (!backing_store)
+@@ -132,28 +94,26 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
+ return NULL;
+ }
+
+-static bool allocate_event_notification_slot(struct kfd_process *p,
+- unsigned int *signal_slot_index)
++static int allocate_event_notification_slot(struct kfd_process *p,
++ struct kfd_event *ev)
+ {
++ int id;
++
+ if (!p->signal_page) {
+ p->signal_page = allocate_signal_page(p);
+ if (!p->signal_page)
+- return false;
++ return -ENOMEM;
+ }
+
+- return allocate_free_slot(p, signal_slot_index);
+-}
++ id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
++ GFP_KERNEL);
++ if (id < 0)
++ return id;
+
+-/* Assumes that the process's event_mutex is locked. */
+-static void release_event_notification_slot(struct kfd_signal_page *page,
+- size_t slot_index)
+-{
+- __clear_bit(slot_index, page->used_slot_bitmap);
+- page->free_slots++;
++ ev->event_id = id;
++ page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
+
+- /* We don't free signal pages, they are retained by the process
+- * and reused until it exits.
+- */
++ return 0;
+ }
+
+ /*
+@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page,
+ */
+ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
+ {
+- struct kfd_event *ev;
+-
+- hash_for_each_possible(p->events, ev, events, id)
+- if (ev->event_id == id)
+- return ev;
+-
+- return NULL;
+-}
+-
+-/*
+- * Produce a kfd event id for a nonsignal event.
+- * These are arbitrary numbers, so we do a sequential search through
+- * the hash table for an unused number.
+- */
+-static u32 make_nonsignal_event_id(struct kfd_process *p)
+-{
+- u32 id;
+-
+- for (id = p->next_nonsignal_event_id;
+- id < KFD_LAST_NONSIGNAL_EVENT_ID &&
+- lookup_event_by_id(p, id);
+- id++)
+- ;
+-
+- if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
+-
+- /*
+- * What if id == LAST_NONSIGNAL_EVENT_ID - 1?
+- * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
+- * the first loop fails immediately and we proceed with the
+- * wraparound loop below.
+- */
+- p->next_nonsignal_event_id = id + 1;
+-
+- return id;
+- }
+-
+- for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+- id < KFD_LAST_NONSIGNAL_EVENT_ID &&
+- lookup_event_by_id(p, id);
+- id++)
+- ;
+-
+-
+- if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
+- p->next_nonsignal_event_id = id + 1;
+- return id;
+- }
+-
+- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+- return 0;
+-}
+-
+-static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
+- unsigned int signal_slot)
+-{
+- return lookup_event_by_id(p, signal_slot);
++ return idr_find(&p->event_idr, id);
+ }
+
+ static int create_signal_event(struct file *devkfd,
+ struct kfd_process *p,
+ struct kfd_event *ev)
+ {
++ int ret;
++
+ if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
+ if (!p->signal_event_limit_reached) {
+ pr_warn("Signal event wasn't created because limit was reached\n");
+ p->signal_event_limit_reached = true;
+ }
+- return -ENOMEM;
++ return -ENOSPC;
+ }
+
+- if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
++ ret = allocate_event_notification_slot(p, ev);
++ if (ret) {
+ pr_warn("Signal event wasn't created because out of kernel memory\n");
+- return -ENOMEM;
++ return ret;
+ }
+
+ p->signal_event_count++;
+
+- ev->user_signal_address =
+- &p->signal_page->user_address[ev->signal_slot_index];
+-
+- ev->event_id = ev->signal_slot_index;
+-
++ ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
+ pr_debug("Signal event number %zu created with id %d, address %p\n",
+ p->signal_event_count, ev->event_id,
+ ev->user_signal_address);
+@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd,
+ return 0;
+ }
+
+-/*
+- * No non-signal events are supported yet.
+- * We create them as events that never signal.
+- * Set event calls from user-mode are failed.
+- */
+ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
+ {
+- ev->event_id = make_nonsignal_event_id(p);
+- if (ev->event_id == 0)
+- return -ENOMEM;
++ /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
++ * intentional integer overflow to -1 without a compiler
++ * warning. idr_alloc treats a negative value as "maximum
++ * signed integer".
++ */
++ int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
++ (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
++ GFP_KERNEL);
++
++ if (id < 0)
++ return id;
++ ev->event_id = id;
+
+ return 0;
+ }
+@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
+ void kfd_event_init_process(struct kfd_process *p)
+ {
+ mutex_init(&p->event_mutex);
+- hash_init(p->events);
++ idr_init(&p->event_idr);
+ p->signal_page = NULL;
+- p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ p->signal_event_count = 0;
+ }
+
+@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
+ waiter->event = NULL;
+ wake_up_all(&ev->wq);
+
+- if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
+- ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
+- release_event_notification_slot(p->signal_page,
+- ev->signal_slot_index);
++ if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
++ ev->type == KFD_EVENT_TYPE_DEBUG)
+ p->signal_event_count--;
+- }
+
+- hash_del(&ev->events);
++ idr_remove(&p->event_idr, ev->event_id);
+ kfree(ev);
+ }
+
+ static void destroy_events(struct kfd_process *p)
+ {
+ struct kfd_event *ev;
+- struct hlist_node *tmp;
+- unsigned int hash_bkt;
++ uint32_t id;
+
+- hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
++ idr_for_each_entry(&p->event_idr, ev, id)
+ destroy_event(p, ev);
++ idr_destroy(&p->event_idr);
+ }
+
+ /*
+@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
+ if (!ret) {
+ *event_page_offset = KFD_MMAP_EVENTS_MASK;
+ *event_page_offset <<= PAGE_SHIFT;
+- *event_slot_index = ev->signal_slot_index;
++ *event_slot_index = ev->event_id;
+ }
+ break;
+ default:
+@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
+ }
+
+ if (!ret) {
+- hash_add(p->events, &ev->events, ev->event_id);
+-
+ *event_id = ev->event_id;
+ *event_trigger_data = ev->event_id;
+ } else {
+@@ -469,17 +370,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
+
+ static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
+ {
+- page_slots(p->signal_page)[ev->signal_slot_index] =
+- UNSIGNALED_EVENT_SLOT;
+-}
+-
+-static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
+-{
+- if (!p->signal_page)
+- return false;
+- else
+- return page_slots(p->signal_page)[index] !=
+- UNSIGNALED_EVENT_SLOT;
++ page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
+ }
+
+ static void set_event_from_interrupt(struct kfd_process *p,
+@@ -518,13 +409,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
+ * ignore it, but we could use any bits we did receive to
+ * search faster.
+ */
+- unsigned int i;
++ uint64_t *slots = page_slots(p->signal_page);
++ uint32_t id;
++
++ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
++ /* With relatively few events, it's faster to
++ * iterate over the event IDR
++ */
++ idr_for_each_entry(&p->event_idr, ev, id) {
++ if (id >= KFD_SIGNAL_EVENT_LIMIT)
++ break;
+
+- for (i = 0; i < SLOTS_PER_PAGE; i++)
+- if (is_slot_signaled(p, i)) {
+- ev = lookup_event_by_page_slot(p, i);
+- set_event_from_interrupt(p, ev);
++ if (slots[id] != UNSIGNALED_EVENT_SLOT)
++ set_event_from_interrupt(p, ev);
+ }
++ } else {
++ /* With relatively many events, it's faster to
++ * iterate over the signal slots and lookup
++ * only signaled events from the IDR.
++ */
++ for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
++ if (slots[id] != UNSIGNALED_EVENT_SLOT) {
++ ev = lookup_event_by_id(p, id);
++ set_event_from_interrupt(p, ev);
++ }
++ }
+ }
+
+ mutex_unlock(&p->event_mutex);
+@@ -836,12 +745,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
+ {
+ struct kfd_hsa_memory_exception_data *ev_data;
+ struct kfd_event *ev;
+- int bkt;
++ uint32_t id;
+ bool send_signal = true;
+
+ ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
+
+- hash_for_each(p->events, bkt, ev, events)
++ id = KFD_FIRST_NONSIGNAL_EVENT_ID;
++ idr_for_each_entry_continue(&p->event_idr, ev, id)
+ if (ev->type == type) {
+ send_signal = false;
+ dev_dbg(kfd_device,
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+index f85fcee..abca5bf 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+@@ -31,9 +31,13 @@
+ #include "kfd_priv.h"
+ #include <uapi/linux/kfd_ioctl.h>
+
+-#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
+-#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
+-#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
++/*
++ * IDR supports non-negative integer IDs. Small IDs are used for
++ * signal events to match their signal slot. Use the upper half of the
++ * ID space for non-signal events.
++ */
++#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
++#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
+
+ /*
+ * Written into kfd_signal_slot_t to indicate that the event is not signaled.
+@@ -47,9 +51,6 @@ struct kfd_event_waiter;
+ struct signal_page;
+
+ struct kfd_event {
+- /* All events in process, rooted at kfd_process.events. */
+- struct hlist_node events;
+-
+ u32 event_id;
+
+ bool signaled;
+@@ -60,7 +61,6 @@ struct kfd_event {
+ wait_queue_head_t wq; /* List of event waiters. */
+
+ /* Only for signal events. */
+- unsigned int signal_slot_index;
+ uint64_t __user *user_signal_address;
+
+ /* type specific data */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index c1b3ee2..ebae8e1 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -31,6 +31,7 @@
+ #include <linux/workqueue.h>
+ #include <linux/spinlock.h>
+ #include <linux/kfd_ioctl.h>
++#include <linux/idr.h>
+ #include <kgd_kfd_interface.h>
+
+ #include "amd_shared.h"
+@@ -538,11 +539,10 @@ struct kfd_process {
+
+ /* Event-related data */
+ struct mutex event_mutex;
+- /* All events in process hashed by ID, linked on kfd_event.events. */
+- DECLARE_HASHTABLE(events, 4);
++ /* Event ID allocator and lookup */
++ struct idr event_idr;
+ /* Event page */
+ struct kfd_signal_page *signal_page;
+- u32 next_nonsignal_event_id;
+ size_t signal_event_count;
+ bool signal_event_limit_reached;
+ };
+--
+2.7.4
+