aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch240
1 files changed, 240 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch
new file mode 100644
index 00000000..0d42eb81
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2859-drm-amdkfd-Simplify-CWSR-trap-handler-management-for.patch
@@ -0,0 +1,240 @@
+From e025d369a48a7bd490ac83a62cf98bfa946cb6ee Mon Sep 17 00:00:00 2001
+From: Felix Kuehling <Felix.Kuehling@amd.com>
+Date: Wed, 8 Nov 2017 19:03:59 -0500
+Subject: [PATCH 2859/4131] drm/amdkfd: Simplify CWSR trap handler management
+ for kfd_dev
+
+Instead of allocating pages and copying the trap handler ISA for the
+device, just use a pointer to the global trap handler ISA.
+
+Remove the cwsr_size variable. The size always has to be two pages
+due to assumptions throughout the code. Define this as a constant.
+
+Remove the tma_offset variable. The TMA offset is always one page.
+
+Change-Id: I32330252f9d5d126d7678781a601b71373b2f386
+Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 45 ++--------------------
+ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 2 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 ++++++--
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 ++++----
+ 5 files changed, 25 insertions(+), 55 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 4645328..8ab3d9f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -25,7 +25,6 @@
+ #endif
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+-#include <linux/highmem.h>
+ #include "kfd_priv.h"
+ #include "kfd_device_queue_manager.h"
+ #include "kfd_pm4_headers_vi.h"
+@@ -477,57 +476,23 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
+
+ static int kfd_cwsr_init(struct kfd_dev *kfd)
+ {
+- /*
+- * Initialize the CWSR required memory for TBA and TMA
+- */
+ if (cwsr_enable && kfd->device_info->supports_cwsr) {
+- const uint32_t *cwsr_hex;
+- void *cwsr_addr = NULL;
+- unsigned int size;
+-
+ if (kfd->device_info->asic_family < CHIP_VEGA10) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+- cwsr_hex = cwsr_trap_gfx8_hex;
+- size = sizeof(cwsr_trap_gfx8_hex);
++ kfd->cwsr_isa = cwsr_trap_gfx8_hex;
++ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+- cwsr_hex = cwsr_trap_gfx9_hex;
+- size = sizeof(cwsr_trap_gfx9_hex);
++ kfd->cwsr_isa = cwsr_trap_gfx9_hex;
++ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+ }
+
+- if (size > PAGE_SIZE) {
+- pr_err("Wrong CWSR ISA size.\n");
+- return -EINVAL;
+- }
+- kfd->cwsr_size =
+- ALIGN(size, PAGE_SIZE) + PAGE_SIZE;
+- kfd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM,
+- get_order(kfd->cwsr_size));
+- if (!kfd->cwsr_pages) {
+- pr_err("Failed to allocate CWSR isa memory.\n");
+- return -ENOMEM;
+- }
+- /*Only first page used for cwsr ISA code */
+- cwsr_addr = kmap(kfd->cwsr_pages);
+- memset(cwsr_addr, 0, PAGE_SIZE);
+- memcpy(cwsr_addr, cwsr_hex, size);
+- kunmap(kfd->cwsr_pages);
+- kfd->tma_offset = ALIGN(size, PAGE_SIZE);
+ kfd->cwsr_enabled = true;
+- dev_info(kfd_device,
+- "Reserved %d pages for cwsr.\n",
+- (kfd->cwsr_size >> PAGE_SHIFT));
+ }
+
+ return 0;
+ }
+
+-static void kfd_cwsr_fini(struct kfd_dev *kfd)
+-{
+- if (kfd->cwsr_pages)
+- __free_pages(kfd->cwsr_pages, get_order(kfd->cwsr_size));
+-}
+-
+ static void kfd_ib_mem_init(struct kfd_dev *kdev)
+ {
+ /* In certain cases we need to send IB from kernel using the GPU address
+@@ -659,7 +624,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
+ goto out;
+
+ kfd_resume_error:
+- kfd_cwsr_fini(kfd);
+ device_iommu_pasid_error:
+ device_queue_manager_uninit(kfd->dqm);
+ device_queue_manager_error:
+@@ -686,7 +650,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
+ #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 0, 0)
+ kfd_cleanup_processes_srcu();
+ #endif
+- kfd_cwsr_fini(kfd);
+ device_queue_manager_uninit(kfd->dqm);
+ kfd_interrupt_exit(kfd);
+ kfd_topology_remove_device(kfd);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index f509850..005e6d2 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -1414,7 +1414,7 @@ static int set_trap_handler(struct device_queue_manager *dqm,
+
+ if (dqm->dev->cwsr_enabled) {
+ /* Jump from CWSR trap handler to user trap */
+- tma = (uint64_t *)(qpd->cwsr_kaddr + dqm->dev->tma_offset);
++ tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
+ tma[0] = tba_addr;
+ tma[1] = tma_addr;
+ } else {
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+index 09595a9..be2d072 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+@@ -314,7 +314,7 @@
+ int kfd_set_process_dgpu_aperture(struct kfd_process_device *pdd,
+ uint64_t base, uint64_t limit)
+ {
+- if (base < (pdd->qpd.cwsr_base + pdd->dev->cwsr_size)) {
++ if (base < (pdd->qpd.cwsr_base + KFD_CWSR_TBA_TMA_SIZE)) {
+ pr_err("Set dgpu vm base 0x%llx failed.\n", base);
+ return -EINVAL;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 113bfe9..4513643 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -93,6 +93,15 @@
+ #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
+
+ /*
++ * Size of the per-process TBA+TMA buffer: 2 pages
++ *
++ * The first page is the TBA used for the CWSR ISA code. The second
++ * page is used as TMA for daisy changing a user-mode trap handler.
++ */
++#define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
++#define KFD_CWSR_TMA_OFFSET PAGE_SIZE
++
++/*
+ * Kernel module parameter to specify maximum number of supported queues per
+ * device
+ */
+@@ -286,11 +295,10 @@ struct kfd_dev {
+ /* Maximum process number mapped to HW scheduler */
+ unsigned int max_proc_per_quantum;
+
+- /* cwsr */
++ /* CWSR */
+ bool cwsr_enabled;
+- struct page *cwsr_pages;
+- uint32_t cwsr_size;
+- uint32_t tma_offset; /*Offset for TMA from the start of cwsr_mem*/
++ const void *cwsr_isa;
++ unsigned int cwsr_isa_size;
+
+ /* IB usage */
+ uint32_t ib_size;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 3cb45c1..70799c6 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -379,7 +379,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
+ if (pdd->qpd.cwsr_pages) {
+ kunmap(pdd->qpd.cwsr_pages);
+ __free_pages(pdd->qpd.cwsr_pages,
+- get_order(pdd->dev->cwsr_size));
++ get_order(KFD_CWSR_TBA_TMA_SIZE));
+ }
+
+ kfree(pdd->qpd.doorbell_bitmap);
+@@ -531,7 +531,7 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+ if (qpd->cwsr_base) {
+ /* cwsr_base is only set for DGPU */
+ ret = kfd_process_alloc_gpuvm(p, dev, qpd->cwsr_base,
+- dev->cwsr_size, &kaddr, pdd, flags);
++ KFD_CWSR_TBA_TMA_SIZE, &kaddr, pdd, flags);
+ if (!ret) {
+ qpd->cwsr_kaddr = kaddr;
+ qpd->tba_addr = qpd->cwsr_base;
+@@ -546,7 +546,7 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+ offset = (dev->id |
+ KFD_MMAP_TYPE_RESERVED_MEM) << PAGE_SHIFT;
+ qpd->tba_addr = (uint64_t)vm_mmap(filep, 0,
+- dev->cwsr_size, PROT_READ | PROT_EXEC,
++ KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
+ MAP_SHARED, offset);
+
+ if (IS_ERR_VALUE(qpd->tba_addr)) {
+@@ -558,10 +558,9 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+ }
+ }
+
+- memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages), PAGE_SIZE);
+- kunmap(dev->cwsr_pages);
++ memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size);
+
+- qpd->tma_addr = qpd->tba_addr + dev->tma_offset;
++ qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET;
+ pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
+ qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+ }
+@@ -1128,7 +1127,7 @@ int kfd_reserved_mem_mmap(struct kfd_process *process,
+
+ if (!dev)
+ return -EINVAL;
+- if (((vma->vm_end - vma->vm_start) != dev->cwsr_size) ||
++ if (((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) ||
+ (vma->vm_start & (PAGE_SIZE - 1)) ||
+ (vma->vm_end & (PAGE_SIZE - 1))) {
+ pr_err("KFD only support page aligned memory map and correct size.\n");
+@@ -1148,7 +1147,7 @@ int kfd_reserved_mem_mmap(struct kfd_process *process,
+ return -EINVAL;
+
+ qpd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM,
+- get_order(dev->cwsr_size));
++ get_order(KFD_CWSR_TBA_TMA_SIZE));
+ if (!qpd->cwsr_pages) {
+ pr_err("amdkfd: error alloc CWSR isa memory per process.\n");
+ return -ENOMEM;
+--
+2.7.4
+