From 28e145cfc6e1a5e1d6e9b030362652389b48e992 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 12 Oct 2017 15:06:28 -0400 Subject: [PATCH 2124/4131] drm/amdkfd: Use IH context ID for signal lookup This speeds up signal lookup when the IH ring entry includes a valid context ID or partial context ID. Only if the context ID is found to be invalid, fall back to an exhaustive search of all signaled events. Change-Id: Iabadf722b2fc20bb6de8eda6ed028761cdf372fb Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 7 ++- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 73 +++++++++++++++++++----- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 8 ++- 3 files changed, 69 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 00536a1..751c004 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -79,16 +79,17 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, { const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + uint32_t context_id = ihre->data & 0xfffffff; if (ihre->pasid == 0) return; if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(ihre->pasid, 0, 0); + kfd_signal_event_interrupt(ihre->pasid, context_id, 28); else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(ihre->pasid, 0, 0); + kfd_signal_event_interrupt(ihre->pasid, context_id, 28); else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(ihre->pasid, ihre->data & 0xFF, 8); + kfd_signal_event_interrupt(ihre->pasid, context_id & 0xff, 8); else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(ihre->pasid); else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 16e3a72..ecd182e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -57,12 +57,6 @@ struct kfd_signal_page { uint64_t __user *user_address; }; -/* - * For signal events, the event ID is used as the interrupt user data. - * For SQ s_sendmsg interrupts, this is limited to 8 bits. - */ - -#define INTERRUPT_DATA_BITS 12 static uint64_t *page_slots(struct kfd_signal_page *page) { @@ -152,6 +146,54 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) return idr_find(&p->event_idr, id); } +/** + * lookup_signaled_event_by_partial_id - Lookup signaled event from partial ID + * @p: Pointer to struct kfd_process + * @id: ID to look up + * @bits: Number of valid bits in @id + * + * Finds the first signaled event with a matching partial ID. If no + * matching signaled event is found, returns NULL. In that case the + * caller should assume that the partial ID is invalid and do an + * exhaustive search of all siglaned events. + * + * If multiple events with the same partial ID signal at the same + * time, they will be found one interrupt at a time, not necessarily + * in the same order the interrupts occurred. As long as the number of + * interrupts is correct, all signaled events will be seen by the + * driver. + */ +static struct kfd_event *lookup_signaled_event_by_partial_id( + struct kfd_process *p, uint32_t id, uint32_t bits) +{ + struct kfd_event *ev; + + if (!p->signal_page || id >= KFD_SIGNAL_EVENT_LIMIT) + return NULL; + + /* Fast path for the common case that @id is not a partial ID + * and we only need a single lookup. + */ + if (bits > 31 || (1U << bits) >= KFD_SIGNAL_EVENT_LIMIT) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + return NULL; + + return idr_find(&p->event_idr, id); + } + + /* General case for partial IDs: Iterate over all matching IDs + * and find the first one that has signaled. + */ + for (ev = NULL; id < KFD_SIGNAL_EVENT_LIMIT && !ev; id += 1U << bits) { + if (page_slots(p->signal_page)[id] == UNSIGNALED_EVENT_SLOT) + continue; + + ev = idr_find(&p->event_idr, id); + } + + return ev; +} + static int create_signal_event(struct file *devkfd, struct kfd_process *p, struct kfd_event *ev) @@ -417,7 +459,7 @@ static void set_event_from_interrupt(struct kfd_process *p, void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, uint32_t valid_id_bits) { - struct kfd_event *ev; + struct kfd_event *ev = NULL; /* * Because we are called from arbitrary context (workqueue) as opposed @@ -431,19 +473,24 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, mutex_lock(&p->event_mutex); - if (valid_id_bits >= INTERRUPT_DATA_BITS) { - /* Partial ID is a full ID. */ - ev = lookup_event_by_id(p, partial_id); + if (valid_id_bits) + ev = lookup_signaled_event_by_partial_id(p, partial_id, + valid_id_bits); + if (ev) { set_event_from_interrupt(p, ev); } else if (p->signal_page) { /* - * Partial ID is in fact partial. For now we completely - * ignore it, but we could use any bits we did receive to - * search faster. + * Partial ID lookup failed. Assume that the event ID + * in the interrupt payload was invalid and do an + * exhaustive search of signaled events. */ uint64_t *slots = page_slots(p->signal_page); uint32_t id; + if (valid_id_bits) + pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", + partial_id, valid_id_bits); + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { /* With relatively few events, it's faster to * iterate over the event IDR diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index b2c6b52..009d6f4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -95,18 +95,20 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { uint16_t source_id, client_id, pasid, vmid; + uint32_t context_id; source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, 0, 0); + kfd_signal_event_interrupt(pasid, context_id, 32); else if (source_id == SOC15_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(pasid, 0, 0); + kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(pasid, 0, 0); /*todo */ + kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); else if (client_id == SOC15_IH_CLIENTID_VMC || -- 2.7.4