aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1650-drm-amdkfd-Allocate-CWSR-pages-per-process-for-APU.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1650-drm-amdkfd-Allocate-CWSR-pages-per-process-for-APU.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1650-drm-amdkfd-Allocate-CWSR-pages-per-process-for-APU.patch131
1 files changed, 131 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1650-drm-amdkfd-Allocate-CWSR-pages-per-process-for-APU.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1650-drm-amdkfd-Allocate-CWSR-pages-per-process-for-APU.patch
new file mode 100644
index 00000000..36d1d32f
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/1650-drm-amdkfd-Allocate-CWSR-pages-per-process-for-APU.patch
@@ -0,0 +1,131 @@
+From c639f892f55f8f26de32fc7c8eafcab2f3ef2a38 Mon Sep 17 00:00:00 2001
+From: Shaoyun Liu <Shaoyun.Liu@amd.com>
+Date: Mon, 27 Mar 2017 16:43:32 -0400
+Subject: [PATCH 1650/4131] drm/amdkfd: Allocate CWSR pages per process for APU
+
+The original code for APU uses common reserved pages per device for CWSR trap
+ handler TBA and TMA which will be overwrote when mulitple process register
+its own second level trap handler.
+
+Change-Id: I62c7c6dccd9b6a95533b65de7c7135f4083497e2
+Signed-off-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 55 ++++++++++++++++++++++----------
+ 2 files changed, 40 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 2d780ec..c0d5b10 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -567,6 +567,7 @@ struct qcm_process_device {
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+ void *cwsr_kaddr;
++ struct page *cwsr_pages;
+
+ /* IB memory */
+ uint64_t ib_base; /* ib_base+ib_size must be below cwsr_base */
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 562f061..3c0c40f 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -349,6 +349,13 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
+ pdd->dev->kfd2kgd->destroy_process_vm(
+ pdd->dev->kgd, pdd->vm);
+ list_del(&pdd->per_device_list);
++
++ if (pdd->qpd.cwsr_pages) {
++ kunmap(pdd->qpd.cwsr_pages);
++ __free_pages(pdd->qpd.cwsr_pages,
++ get_order(pdd->dev->cwsr_size));
++ }
++
+ kfree(pdd);
+ }
+ }
+@@ -505,9 +512,6 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+ dev->cwsr_size, &kaddr, pdd);
+ if (!err) {
+ qpd->cwsr_kaddr = kaddr;
+- memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages),
+- PAGE_SIZE);
+- kunmap(dev->cwsr_pages);
+ qpd->tba_addr = qpd->cwsr_base;
+ } else
+ goto out;
+@@ -525,13 +529,15 @@ static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep)
+ qpd->cwsr_kaddr = NULL;
+ err = -ENOMEM;
+ goto out;
+- } else
+- qpd->cwsr_kaddr = (void *)qpd->tba_addr;
++ }
+ }
+
++ memcpy(qpd->cwsr_kaddr, kmap(dev->cwsr_pages), PAGE_SIZE);
++ kunmap(dev->cwsr_pages);
++
+ qpd->tma_addr = qpd->tba_addr + dev->tma_offset;
+- pr_debug("set tba :0x%llx, tma:0x%llx for pqm.\n",
+- qpd->tba_addr, qpd->tma_addr);
++ pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n",
++ qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr);
+ }
+
+ out:
+@@ -1004,25 +1010,42 @@ int kfd_reserved_mem_mmap(struct kfd_process *process, struct vm_area_struct *vm
+ unsigned long pfn, i;
+ int ret = 0;
+ struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
++ struct kfd_process_device *temp, *pdd = NULL;
++ struct qcm_process_device *qpd = NULL;
+
+ if (dev == NULL)
+ return -EINVAL;
+- if ((vma->vm_start & (PAGE_SIZE - 1)) ||
++ if (((vma->vm_end - vma->vm_start) != dev->cwsr_size) ||
++ (vma->vm_start & (PAGE_SIZE - 1)) ||
+ (vma->vm_end & (PAGE_SIZE - 1))) {
+- pr_err("KFD only support page aligned memory map.\n");
++ pr_err("KFD only support page aligned memory map and correct size.\n");
+ return -EINVAL;
+ }
+
+ pr_debug("kfd reserved mem mmap been called.\n");
+- /* We supported two reserved memory mmap in the future .
+- 1. Trap handler code and parameter (TBA and TMA , 2 pages total)
+- 2. Relaunch stack (control block, 1 page for Carrizo)
+- */
+
++ list_for_each_entry_safe(pdd, temp, &process->per_device_data,
++ per_device_list) {
++ if (dev == pdd->dev) {
++ qpd = &pdd->qpd;
++ break;
++ }
++ }
++ if (qpd == NULL)
++ return -EINVAL;
++
++ qpd->cwsr_pages = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM,
++ get_order(dev->cwsr_size));
++ if (!qpd->cwsr_pages) {
++ pr_err("amdkfd: error alloc CWSR isa memory per process.\n");
++ return -ENOMEM;
++ }
++ qpd->cwsr_kaddr = kmap(qpd->cwsr_pages);
++
++ vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
++ | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
+ for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); ++i) {
+- pfn = page_to_pfn(&dev->cwsr_pages[i]);
+- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+- | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
++ pfn = page_to_pfn(&qpd->cwsr_pages[i]);
+ /* mapping the page to user process */
+ ret = remap_pfn_range(vma, vma->vm_start + (i << PAGE_SHIFT),
+ pfn, PAGE_SIZE, vma->vm_page_prot);
+--
+2.7.4
+