diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2124-drm-amdkfd-Use-IH-context-ID-for-signal-lookup.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2124-drm-amdkfd-Use-IH-context-ID-for-signal-lookup.patch | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2124-drm-amdkfd-Use-IH-context-ID-for-signal-lookup.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2124-drm-amdkfd-Use-IH-context-ID-for-signal-lookup.patch new file mode 100644 index 00000000..01505e23 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2124-drm-amdkfd-Use-IH-context-ID-for-signal-lookup.patch @@ -0,0 +1,186 @@ +From 28e145cfc6e1a5e1d6e9b030362652389b48e992 Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Thu, 12 Oct 2017 15:06:28 -0400 +Subject: [PATCH 2124/4131] drm/amdkfd: Use IH context ID for signal lookup + +This speeds up signal lookup when the IH ring entry includes a +valid context ID or partial context ID. Only if the context ID is +found to be invalid, fall back to an exhaustive search of all +signaled events. + +Change-Id: Iabadf722b2fc20bb6de8eda6ed028761cdf372fb +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 7 ++- + drivers/gpu/drm/amd/amdkfd/kfd_events.c | 73 +++++++++++++++++++----- + drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 8 ++- + 3 files changed, 69 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +index 00536a1..751c004 100644 +--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c ++++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +@@ -79,16 +79,17 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, + { + const struct cik_ih_ring_entry *ihre = + (const struct cik_ih_ring_entry *)ih_ring_entry; ++ uint32_t context_id = ihre->data & 0xfffffff; + + if (ihre->pasid == 0) + return; + + if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) +- kfd_signal_event_interrupt(ihre->pasid, 0, 0); ++ kfd_signal_event_interrupt(ihre->pasid, context_id, 28); + else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) +- kfd_signal_event_interrupt(ihre->pasid, 0, 0); ++ kfd_signal_event_interrupt(ihre->pasid, context_id, 28); + else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) +- kfd_signal_event_interrupt(ihre->pasid, ihre->data & 0xFF, 8); ++ kfd_signal_event_interrupt(ihre->pasid, context_id & 0xff, 8); + else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(ihre->pasid); + else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +index 16e3a72..ecd182e 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c +@@ -57,12 +57,6 @@ struct kfd_signal_page { + uint64_t __user *user_address; + }; + +-/* +- * For signal events, the event ID is used as the interrupt user data. +- * For SQ s_sendmsg interrupts, this is limited to 8 bits. +- */ +- +-#define INTERRUPT_DATA_BITS 12 + + static uint64_t *page_slots(struct kfd_signal_page *page) + { +@@ -152,6 +146,54 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) + return idr_find(&p->event_idr, id); + } + ++/** ++ * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID ++ * @p: Pointer to struct kfd_process ++ * @id: ID to look up ++ * @bits: Number of valid bits in @id ++ * ++ * Finds the first signaled event with a matching partial ID. If no ++ * matching signaled event is found, returns NULL. In that case the ++ * caller should assume that the partial ID is invalid and do an ++ * exhaustive search of all siglaned events. ++ * ++ * If multiple events with the same partial ID signal at the same ++ * time, they will be found one interrupt at a time, not necessarily ++ * in the same order the interrupts occurred. As long as the number of ++ * interrupts is correct, all signaled events will be seen by the ++ * driver. ++ */ ++static struct kfd_event *lookup_signaled_event_by_partial_id( ++ struct kfd_process *p, uint32_t id, uint32_t bits) ++{ ++ struct kfd_event *ev; ++ ++ if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) ++ return NULL; ++ ++ /* Fast path for the common case that @id is not a partial ID ++ * and we only need a single lookup. ++ */ ++ if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { ++ if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) ++ return NULL; ++ ++ return idr_find(&p->event_idr, id); ++ } ++ ++ /* General case for partial IDs: Iterate over all matching IDs ++ * and find the first one that has signaled. ++ */ ++ for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { ++ if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) ++ continue; ++ ++ ev = idr_find(&p->event_idr, id); ++ } ++ ++ return ev; ++} ++ + static int create_signal_event(struct file *devkfd, + struct kfd_process *p, + struct kfd_event *ev) +@@ -417,7 +459,7 @@ static void set_event_from_interrupt(struct kfd_process *p, + void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + uint32_t valid_id_bits) + { +- struct kfd_event *ev; ++ struct kfd_event *ev = NULL; + + /* + * Because we are called from arbitrary context (workqueue) as opposed +@@ -431,19 +473,24 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, + + mutex_lock(&p->event_mutex); + +- if (valid_id_bits >= INTERRUPT_DATA_BITS) { +- /* Partial ID is a full ID. */ +- ev = lookup_event_by_id(p, partial_id); ++ if (valid_id_bits) ++ ev = lookup_signaled_event_by_partial_id(p, partial_id, ++ valid_id_bits); ++ if (ev) { + set_event_from_interrupt(p, ev); + } else if (p->signal_page) { + /* +- * Partial ID is in fact partial. For now we completely +- * ignore it, but we could use any bits we did receive to +- * search faster. ++ * Partial ID lookup failed. Assume that the event ID ++ * in the interrupt payload was invalid and do an ++ * exhaustive search of signaled events. + */ + uint64_t *slots = page_slots(p->signal_page); + uint32_t id; + ++ if (valid_id_bits) ++ pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", ++ partial_id, valid_id_bits); ++ + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + /* With relatively few events, it's faster to + * iterate over the event IDR +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +index b2c6b52..009d6f4 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +@@ -95,18 +95,20 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) + { + uint16_t source_id, client_id, pasid, vmid; ++ uint32_t context_id; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); ++ context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) +- kfd_signal_event_interrupt(pasid, 0, 0); ++ kfd_signal_event_interrupt(pasid, context_id, 32); + else if (source_id == SOC15_INTSRC_SDMA_TRAP) +- kfd_signal_event_interrupt(pasid, 0, 0); ++ kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) +- kfd_signal_event_interrupt(pasid, 0, 0); /*todo */ ++ kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(pasid); + else if (client_id == SOC15_IH_CLIENTID_VMC || +-- +2.7.4 + |