From 1a97cc79780e70d1e6624a8d2fdc3c1d5a56461f Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 30 Apr 2018 19:22:49 -0400 Subject: [PATCH 4295/5725] drm/amdkfd: Add sanity checks in IRQ handlers Only accept interrupts from KFD VMIDs. Just checking for a PASID may not be enough because amdgpu started using PASIDs to map VM faults to processes. Warn if an IRQ doesn't have a valid PASID (indicating a firmware bug). Change-Id: I34ca5b4b03ffe51a23d03490fc65b6c946bbbf51 Suggested-by: Shaoyun Liu Suggested-by: Oak Zeng Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 33 +++++++++--------- drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 44 ++++++++++++++---------- 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 1261432..5d2475d 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -24,15 +24,6 @@ #include "kfd_events.h" #include "cik_int.h" -static bool is_cpc_vm_fault(struct kfd_dev *dev, uint32_t source_id, - unsigned int vmid) -{ - return (source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || - source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && - vmid >= dev->vm_info.first_vmid_kfd && - vmid <= dev->vm_info.last_vmid_kfd; -} - static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, uint32_t *patched_ihre, @@ -67,16 +58,26 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, vmid <= dev->vm_info.last_vmid_kfd; } + /* Only handle interrupts from KFD VMIDs */ vmid = (ihre->ring_id & 0x0000ff00) >> 8; + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + + /* If there is no valid PASID, it's likely a firmware bug */ pasid = (ihre->ring_id & 0xffff0000) >> 16; + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; - /* Do not process in ISR, just request it to be forwarded to WQ. */ - return (pasid != 0) && - (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || - ihre->source_id == CIK_INTSRC_SDMA_TRAP || - ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || - ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || - is_cpc_vm_fault(dev, ihre->source_id, vmid)); + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + ihre->source_id == CIK_INTSRC_SDMA_TRAP || + ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || + ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT; } static void cik_event_interrupt_wq(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 5217e51..f836897 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -31,29 +31,37 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, bool *patched_flag) { uint16_t source_id, client_id, pasid, vmid; + const uint32_t *data = ih_ring_entry; - source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); - client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); - pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + /* Only handle interrupts from KFD VMIDs */ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + + /* If there is no valid PASID, it's likely a firmware bug */ + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; - if (pasid) { - const uint32_t *data = ih_ring_entry; + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); - pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", - client_id, source_id, pasid); - pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", - data[0], data[1], data[2], data[3], - data[4], data[5], data[6], data[7]); - } + pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", + client_id, source_id, pasid); + pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); - return (pasid != 0) && - (source_id == SOC15_INTSRC_CP_END_OF_PIPE || - source_id == SOC15_INTSRC_SDMA_TRAP || - source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || - source_id == SOC15_INTSRC_CP_BAD_OPCODE || - client_id == SOC15_IH_CLIENTID_VMC || - client_id == SOC15_IH_CLIENTID_UTCL2); + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return source_id == SOC15_INTSRC_CP_END_OF_PIPE || + source_id == SOC15_INTSRC_SDMA_TRAP || + source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || + source_id == SOC15_INTSRC_CP_BAD_OPCODE || + client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_UTCL2; } static void event_interrupt_wq_v9(struct kfd_dev *dev, -- 2.7.4