diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch new file mode 100644 index 00000000..0d42eb81 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch @@ -0,0 +1,240 @@ +From e025d369a48a7bd490ac83a62cf98bfa946cb6ee Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Wed, 8 Nov 2017 19:03:59 -0500 +Subject: [PATCH 2859/4131] drm/amdkfd: Simplify CWSR trap handler management + for kfd_dev + +Instead of allocating pages and copying the trap handler ISA for the +device, just use a pointer to the global trap handler ISA. + +Remove the cwsr_size variable. The size always has to be two pages +due to assumptions throughout the code. Define this as a constant. + +Remove the tma_offset variable. The TMA offset is always one page. + +Change-Id: I32330252f9d5d126d7678781a601b71373b2f386 +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +--- + drivers/gpu/drm/amd/amdkfd/kfd_device.c | 45 ++-------------------- + .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 2 +- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 ++++++-- + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 ++++---- + 5 files changed, 25 insertions(+), 55 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +index 4645328..8ab3d9f 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c +@@ -25,7 +25,6 @@ + #endif + #include <linux/pci.h> + #include <linux/slab.h> +-#include <linux/highmem.h> + #include "kfd_priv.h" + #include "kfd_device_queue_manager.h" + #include "kfd_pm4_headers_vi.h" +@@ -477,57 +476,23 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, + + static int kfd_cwsr_init(struct kfd_dev *kfd) + { +- /* +- * Initialize the CWSR required memory for TBA and TMA +- */ + if (cwsr_enable && kfd->device_info->supports_cwsr) { +- const uint32_t *cwsr_hex; +- void *cwsr_addr = NULL; +- unsigned int size; +- + if (kfd->device_info->asic_family < CHIP_VEGA10) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); +- cwsr_hex = cwsr_trap_gfx8_hex; +- size = sizeof(cwsr_trap_gfx8_hex); ++ kfd->cwsr_isa = cwsr_trap_gfx8_hex; ++ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); +- cwsr_hex = cwsr_trap_gfx9_hex; +- size = sizeof(cwsr_trap_gfx9_hex); ++ kfd->cwsr_isa = cwsr_trap_gfx9_hex; ++ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); + } + +- if (size > PAGE_SIZE) { +- pr_err("Wrong CWSR ISA size.\n"); +- return -EINVAL; +- } +- kfd->cwsr_size = +- ALIGN(size, PAGE_SIZE) + PAGE_SIZE; +- kfd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, +- get_order(kfd->cwsr_size)); +- if (!kfd->cwsr_pages) { +- pr_err("Failed to allocate CWSR isa memory.\n"); +- return -ENOMEM; +- } +- /*Only first page used for cwsr ISA code */ +- cwsr_addr = kmap(kfd->cwsr_pages); +- memset(cwsr_addr, 0, PAGE_SIZE); +- memcpy(cwsr_addr, cwsr_hex, size); +- kunmap(kfd->cwsr_pages); +- kfd->tma_offset = ALIGN(size, PAGE_SIZE); + kfd->cwsr_enabled = true; +- dev_info(kfd_device, +- "Reserved %d pages for cwsr.\n", +- (kfd->cwsr_size >> PAGE_SHIFT)); + } + + return 0; + } + +-static void kfd_cwsr_fini(struct kfd_dev *kfd) +-{ +- if (kfd->cwsr_pages) +- __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size)); +-} +- + static void kfd_ib_mem_init(struct kfd_dev *kdev) + { + /* In certain cases we need to send IB from kernel using the GPU address +@@ -659,7 +624,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, + goto out; + + kfd_resume_error: +- kfd_cwsr_fini(kfd); + device_iommu_pasid_error: + device_queue_manager_uninit(kfd->dqm); + device_queue_manager_error: +@@ -686,7 +650,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0) + kfd_cleanup_processes_srcu(); + #endif +- kfd_cwsr_fini(kfd); + device_queue_manager_uninit(kfd->dqm); + kfd_interrupt_exit(kfd); + kfd_topology_remove_device(kfd); +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +index f509850..005e6d2 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +@@ -1414,7 +1414,7 @@ static int set_trap_handler(struct device_queue_manager *dqm, + + if (dqm->dev->cwsr_enabled) { + /* Jump from CWSR trap handler to user trap */ +- tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset); ++ tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + tma[0] = tba_addr; + tma[1] = tma_addr; + } else { +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +index 09595a9..be2d072 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +@@ -314,7 +314,7 @@ + int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd, + uint64_t base, uint64_t limit) + { +- if (base < (pdd->qpd.cwsr_base + pdd->dev->cwsr_size)) { ++ if (base < (pdd->qpd.cwsr_base + KFD_CWSR_TBA_TMA_SIZE)) { + pr_err("Set dgpu vm base 0x%llx failed.\n", base); + return -EINVAL; + } +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 113bfe9..4513643 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -93,6 +93,15 @@ + #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 + + /* ++ * Size of the per-process TBA+TMA buffer: 2 pages ++ * ++ * The first page is the TBA used for the CWSR ISA code. The second ++ * page is used as TMA for daisy changing a user-mode trap handler. ++ */ ++#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) ++#define KFD_CWSR_TMA_OFFSET PAGE_SIZE ++ ++/* + * Kernel module parameter to specify maximum number of supported queues per + * device + */ +@@ -286,11 +295,10 @@ struct kfd_dev { + /* Maximum process number mapped to HW scheduler */ + unsigned int max_proc_per_quantum; + +- /* cwsr */ ++ /* CWSR */ + bool cwsr_enabled; +- struct page *cwsr_pages; +- uint32_t cwsr_size; +- uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/ ++ const void *cwsr_isa; ++ unsigned int cwsr_isa_size; + + /* IB usage */ + uint32_t ib_size; +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +index 3cb45c1..70799c6 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c +@@ -379,7 +379,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) + if (pdd->qpd.cwsr_pages) { + kunmap(pdd->qpd.cwsr_pages); + __free_pages(pdd->qpd.cwsr_pages, +- get_order(pdd->dev->cwsr_size)); ++ get_order(KFD_CWSR_TBA_TMA_SIZE)); + } + + kfree(pdd->qpd.doorbell_bitmap); +@@ -531,7 +531,7 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) + if (qpd->cwsr_base) { + /* cwsr_base is only set for DGPU */ + ret = kfd_process_alloc_gpuvm(p, dev, qpd->cwsr_base, +- dev->cwsr_size, &kaddr, pdd, flags); ++ KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd, flags); + if (!ret) { + qpd->cwsr_kaddr = kaddr; + qpd->tba_addr = qpd->cwsr_base; +@@ -546,7 +546,7 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) + offset = (dev->id | + KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT; + qpd->tba_addr = (uint64_t)vm_mmap(filep, 0, +- dev->cwsr_size, PROT_READ | PROT_EXEC, ++ KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, + MAP_SHARED, offset); + + if (IS_ERR_VALUE(qpd->tba_addr)) { +@@ -558,10 +558,9 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep) + } + } + +- memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages), PAGE_SIZE); +- kunmap(dev->cwsr_pages); ++ memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + +- qpd->tma_addr = qpd->tba_addr + dev->tma_offset; ++ qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; + pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", + qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); + } +@@ -1128,7 +1127,7 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, + + if (!dev) + return -EINVAL; +- if (((vma->vm_end - vma->vm_start) != dev->cwsr_size) || ++ if (((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) || + (vma->vm_start & (PAGE_SIZE - 1)) || + (vma->vm_end & (PAGE_SIZE - 1))) { + pr_err("KFD only support page aligned memory map and correct size.\n"); +@@ -1148,7 +1147,7 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, + return -EINVAL; + + qpd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, +- get_order(dev->cwsr_size)); ++ get_order(KFD_CWSR_TBA_TMA_SIZE)); + if (!qpd->cwsr_pages) { + pr_err("amdkfd: error alloc CWSR isa memory per process.\n"); + return -ENOMEM; +-- +2.7.4 + |