From e025d369a48a7bd490ac83a62cf98bfa946cb6ee Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 8 Nov 2017 19:03:59 -0500 Subject: [PATCH 2859/4131] drm/amdkfd: Simplify CWSR trap handler management for kfd_dev Instead of allocating pages and copying the trap handler ISA for the device, just use a pointer to the global trap handler ISA. Remove the cwsr_size variable. The size always has to be two pages due to assumptions throughout the code. Define this as a constant. Remove the tma_offset variable. The TMA offset is always one page. Change-Id: I32330252f9d5d126d7678781a601b71373b2f386 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 45 ++-------------------- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 ++++++-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 ++++---- 5 files changed, 25 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 4645328..8ab3d9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -25,7 +25,6 @@ #endif #include #include -#include #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" @@ -477,57 +476,23 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, static int kfd_cwsr_init(struct kfd_dev *kfd) { - /* - * Initialize the CWSR required memory for TBA and TMA - */ if (cwsr_enable && kfd->device_info->supports_cwsr) { - const uint32_t *cwsr_hex; - void *cwsr_addr = NULL; - unsigned int size; - if (kfd->device_info->asic_family < CHIP_VEGA10) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); - cwsr_hex = cwsr_trap_gfx8_hex; - size = sizeof(cwsr_trap_gfx8_hex); + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); } else { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); - cwsr_hex = cwsr_trap_gfx9_hex; - size = sizeof(cwsr_trap_gfx9_hex); + kfd->cwsr_isa = cwsr_trap_gfx9_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); } - if (size > PAGE_SIZE) { - pr_err("Wrong CWSR ISA size.\n"); - return -EINVAL; - } - kfd->cwsr_size = - ALIGN(size, PAGE_SIZE) + PAGE_SIZE; - kfd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, - get_order(kfd->cwsr_size)); - if (!kfd->cwsr_pages) { - pr_err("Failed to allocate CWSR isa memory.\n"); - return -ENOMEM; - } - /*Only first page used for cwsr ISA code */ - cwsr_addr = kmap(kfd->cwsr_pages); - memset(cwsr_addr, 0, PAGE_SIZE); - memcpy(cwsr_addr, cwsr_hex, size); - kunmap(kfd->cwsr_pages); - kfd->tma_offset = ALIGN(size, PAGE_SIZE); kfd->cwsr_enabled = true; - dev_info(kfd_device, - "Reserved %d pages for cwsr.\n", - (kfd->cwsr_size >> PAGE_SHIFT)); } return 0; } -static void kfd_cwsr_fini(struct kfd_dev *kfd) -{ - if (kfd->cwsr_pages) - __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size)); -} - static void kfd_ib_mem_init(struct kfd_dev *kdev) { /* In certain cases we need to send IB from kernel using the GPU address @@ -659,7 +624,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto out; kfd_resume_error: - kfd_cwsr_fini(kfd); device_iommu_pasid_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: @@ -686,7 +650,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) kfd_cleanup_processes_srcu(); #endif - kfd_cwsr_fini(kfd); device_queue_manager_uninit(kfd->dqm); kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f509850..005e6d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1414,7 +1414,7 @@ static int set_trap_handler(struct device_queue_manager *dqm, if (dqm->dev->cwsr_enabled) { /* Jump from CWSR trap handler to user trap */ - tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset); + tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); tma[0] = tba_addr; tma[1] = tma_addr; } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 09595a9..be2d072 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -314,7 +314,7 @@ int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, uint64_t base, uint64_t limit) { - if (base < (pdd->qpd.cwsr_base + pdd->dev->cwsr_size)) { + if (base < (pdd->qpd.cwsr_base + KFD_CWSR_TBA_TMA_SIZE)) { pr_err("Set dgpu vm base 0x%llx failed.\n", base); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 113bfe9..4513643 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -93,6 +93,15 @@ #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 /* + * Size of the per-process TBA+TMA buffer: 2 pages + * + * The first page is the TBA used for the CWSR ISA code. The second + * page is used as TMA for daisy changing a user-mode trap handler. + */ +#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) +#define KFD_CWSR_TMA_OFFSET PAGE_SIZE + +/* * Kernel module parameter to specify maximum number of supported queues per * device */ @@ -286,11 +295,10 @@ struct kfd_dev { /* Maximum process number mapped to HW scheduler */ unsigned int max_proc_per_quantum; - /* cwsr */ + /* CWSR */ bool cwsr_enabled; - struct page *cwsr_pages; - uint32_t cwsr_size; - uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/ + const void *cwsr_isa; + unsigned int cwsr_isa_size; /* IB usage */ uint32_t ib_size; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3cb45c1..70799c6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -379,7 +379,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) if (pdd->qpd.cwsr_pages) { kunmap(pdd->qpd.cwsr_pages); __free_pages(pdd->qpd.cwsr_pages, - get_order(pdd->dev->cwsr_size)); + get_order(KFD_CWSR_TBA_TMA_SIZE)); } kfree(pdd->qpd.doorbell_bitmap); @@ -531,7 +531,7 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) if (qpd->cwsr_base) { /* cwsr_base is only set for DGPU */ ret = kfd_process_alloc_gpuvm(p, dev, qpd->cwsr_base, - dev->cwsr_size, &kaddr, pdd, flags); + KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd, flags); if (!ret) { qpd->cwsr_kaddr = kaddr; qpd->tba_addr = qpd->cwsr_base; @@ -546,7 +546,7 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) offset = (dev->id | KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT; qpd->tba_addr = (uint64_t)vm_mmap(filep, 0, - dev->cwsr_size, PROT_READ | PROT_EXEC, + KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, offset); if (IS_ERR_VALUE(qpd->tba_addr)) { @@ -558,10 +558,9 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) } } - memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages), PAGE_SIZE); - kunmap(dev->cwsr_pages); + memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); - qpd->tma_addr = qpd->tba_addr + dev->tma_offset; + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); } @@ -1128,7 +1127,7 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, if (!dev) return -EINVAL; - if (((vma->vm_end - vma->vm_start) != dev->cwsr_size) || + if (((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) || (vma->vm_start & (PAGE_SIZE - 1)) || (vma->vm_end & (PAGE_SIZE - 1))) { pr_err("KFD only support page aligned memory map and correct size.\n"); @@ -1148,7 +1147,7 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, return -EINVAL; qpd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, - get_order(dev->cwsr_size)); + get_order(KFD_CWSR_TBA_TMA_SIZE)); if (!qpd->cwsr_pages) { pr_err("amdkfd: error alloc CWSR isa memory per process.\n"); return -ENOMEM; -- 2.7.4