aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/4162-drm-amdkfd-CMA-Handle-userptr-to-userptr-BO-copy.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4162-drm-amdkfd-CMA-Handle-userptr-to-userptr-BO-copy.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/4162-drm-amdkfd-CMA-Handle-userptr-to-userptr-BO-copy.patch284
1 files changed, 284 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4162-drm-amdkfd-CMA-Handle-userptr-to-userptr-BO-copy.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4162-drm-amdkfd-CMA-Handle-userptr-to-userptr-BO-copy.patch
new file mode 100644
index 00000000..0ab2b27c
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4162-drm-amdkfd-CMA-Handle-userptr-to-userptr-BO-copy.patch
@@ -0,0 +1,284 @@
+From 4d98ca2586f4857e43946b29175cb5d953d79b15 Mon Sep 17 00:00:00 2001
+From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Date: Mon, 26 Mar 2018 16:45:06 -0400
+Subject: [PATCH 4162/5725] drm/amdkfd: CMA: Handle userptr to userptr BO copy
+
+CMA userptr implementations are incomplete because it doesn't properly
+handle if the BO is evicted. This patch handles the case where both
+source and destination BOs are userptr. It is more efficient to use CPU
+to do the copy in this case, very similar to process_vm_read/write()
+functions.
+
+Change-Id: I5d01d906f04190d71e8663785718060411dede4e
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>
+
+Conflicts:
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 179 +++++++++++++++++++++++++++++--
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +
+ 2 files changed, 172 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index ef1bd27..bd09647 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -35,6 +35,7 @@
+ #include <linux/mman.h>
+ #include <asm/processor.h>
+ #include <linux/ptrace.h>
++#include <linux/pagemap.h>
+
+ #include "kfd_priv.h"
+ #include "kfd_device_queue_manager.h"
+@@ -1714,6 +1715,12 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep,
+ return r;
+ }
+
++/* Maximum number of entries for process pages array which lives on stack */
++#define MAX_PP_STACK_COUNT 16
++/* Maximum number of pages kmalloc'd to hold struct page's during copy */
++#define MAX_KMALLOC_PAGES (PAGE_SIZE * 2)
++#define MAX_PP_KMALLOC_COUNT (MAX_KMALLOC_PAGES/sizeof(struct page *))
++
+ /* Update cma_iter.cur_bo with KFD BO that is assocaited with
+ * cma_iter.array.va_addr
+ */
+@@ -1762,7 +1769,8 @@ static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size)
+ }
+
+ static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs,
+- struct kfd_process *p, struct cma_iter *ci)
++ struct kfd_process *p, struct mm_struct *mm,
++ struct task_struct *task, struct cma_iter *ci)
+ {
+ int ret;
+ int nr;
+@@ -1775,6 +1783,8 @@ static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs,
+ ci->nr_segs = segs;
+ ci->p = p;
+ ci->offset = 0;
++ ci->mm = mm;
++ ci->task = task;
+ for (nr = 0; nr < segs; nr++)
+ ci->total += arr[nr].size;
+
+@@ -1795,6 +1805,159 @@ static bool kfd_cma_iter_end(struct cma_iter *ci)
+ return false;
+ }
+
++/* Copies @size bytes from si->cur_bo to di->cur_bo BO. The function assumes
++ * both source and dest. BOs are userptr BOs. Both BOs can either belong to
++ * current process or one of the BOs can belong to a differnt
++ * process. @Returns 0 on success, -ve on failure
++ *
++ * @si: Source iter
++ * @di: Dest. iter
++ * @cma_write: Indicates if it is write to remote or read from remote
++ * @size: amount of bytes to be copied
++ * @copied: Return number of bytes actually copied.
++ */
++static int kfd_copy_userptr_bos(struct cma_iter *si, struct cma_iter *di,
++ bool cma_write, uint64_t size,
++ uint64_t *copied)
++{
++ int i, ret = 0, locked;
++ unsigned int nents, nl;
++ unsigned int offset_in_page;
++ struct page *pp_stack[MAX_PP_STACK_COUNT];
++ struct page **process_pages = pp_stack;
++ unsigned long rva, lva = 0, flags = 0;
++ uint64_t copy_size, to_copy = size;
++ struct cma_iter *li, *ri;
++
++ if (cma_write) {
++ ri = di;
++ li = si;
++ flags |= FOLL_WRITE;
++ } else {
++ li = di;
++ ri = si;
++ }
++ /* rva: remote virtual address. Page aligned to start page.
++ * rva + offset_in_page: Points to remote start address
++ * lva: local virtual address. Points to the start address.
++ * nents: computes number of remote pages to request
++ */
++ offset_in_page = ri->bo_offset & (PAGE_SIZE - 1);
++ rva = (ri->cur_bo->cpuva + ri->bo_offset) & PAGE_MASK;
++ lva = li->cur_bo->cpuva + li->bo_offset;
++
++ nents = (size + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE;
++
++ copy_size = min_t(uint64_t, size, PAGE_SIZE - offset_in_page);
++ *copied = 0;
++
++ if (nents > MAX_PP_STACK_COUNT) {
++ /* For reliability kmalloc only 2 pages worth */
++ process_pages = kmalloc(min_t(size_t, MAX_KMALLOC_PAGES,
++ sizeof(struct pages *)*nents),
++ GFP_KERNEL);
++
++ if (!process_pages)
++ return -ENOMEM;
++ }
++
++ while (nents && to_copy) {
++ nl = min_t(unsigned int, MAX_PP_KMALLOC_COUNT, nents);
++ locked = 1;
++ down_read(&ri->mm->mmap_sem);
++ nl = get_user_pages_remote(ri->task, ri->mm, rva, nl,
++ flags, process_pages, NULL,
++ &locked);
++ if (locked)
++ up_read(&ri->mm->mmap_sem);
++ if (nl <= 0) {
++ pr_err("CMA: Invalid virtual address 0x%lx\n", rva);
++ ret = -EFAULT;
++ break;
++ }
++
++ for (i = 0; i < nl; i++) {
++ unsigned int n;
++ void *kaddr = kmap_atomic(process_pages[i]);
++
++ if (cma_write) {
++ n = copy_from_user(kaddr+offset_in_page,
++ (void *)lva, copy_size);
++ set_page_dirty(process_pages[i]);
++ } else {
++ n = copy_to_user((void *)lva,
++ kaddr+offset_in_page,
++ copy_size);
++ }
++ kunmap_atomic(kaddr);
++ if (n) {
++ ret = -EFAULT;
++ break;
++ }
++ to_copy -= copy_size;
++ if (!to_copy)
++ break;
++ lva += copy_size;
++ rva += (copy_size + offset_in_page);
++ WARN_ONCE(rva & (PAGE_SIZE - 1),
++ "CMA: Error in remote VA computation");
++ offset_in_page = 0;
++ copy_size = min_t(uint64_t, to_copy, PAGE_SIZE);
++ }
++
++ for (i = 0; i < nl; i++)
++ put_page(process_pages[i]);
++
++ if (ret)
++ break;
++ nents -= nl;
++ }
++
++ if (process_pages != pp_stack)
++ kfree(process_pages);
++
++ *copied = (size - to_copy);
++ return ret;
++
++}
++
++/* Copies @size bytes from si->cur_bo to di->cur_bo starting at their
++ * respective offset.
++ * @si: Source iter
++ * @di: Dest. iter
++ * @cma_write: Indicates if it is write to remote or read from remote
++ * @size: amount of bytes to be copied
++ * @f: Return the last fence if any
++ * @copied: Return number of bytes actually copied.
++ */
++static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di,
++ int cma_write, uint64_t size,
++ struct dma_fence **f, uint64_t *copied)
++{
++ int err = 0;
++ struct kfd_bo *dst_bo = di->cur_bo, *src_bo = si->cur_bo;
++ uint64_t src_offset = si->bo_offset, dst_offset = di->bo_offset;
++ struct kgd_mem *src_mem = src_bo->mem, *dst_mem = dst_bo->mem;
++
++ *copied = 0;
++ if (f)
++ *f = NULL;
++ if (src_bo->cpuva && dst_bo->cpuva)
++ return kfd_copy_userptr_bos(si, di, cma_write, size, copied);
++
++ if (src_bo->dev->kgd != dst_bo->dev->kgd) {
++ pr_err("CMA %d fail. Not same dev\n", cma_write);
++ err = -EINVAL;
++ }
++
++ err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd, src_mem,
++ src_offset, dst_mem,
++ dst_offset, size, f,
++ copied);
++
++ return err;
++}
++
+ /* Copy single range from source iterator @si to destination iterator @di.
+ * @si will move to next range and @di will move by bytes copied.
+ * @return : 0 for success or -ve for failure
+@@ -1829,11 +1992,9 @@ static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di,
+ return -EINVAL;
+ }
+
+- err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd,
+- src_bo->mem, si->bo_offset, dst_bo->mem, di->bo_offset,
+- copy_size, &fence, &n);
++ err = kfd_copy_bos(si, di, cma_write, copy_size, &fence, &n);
+ if (err) {
+- pr_err("GPU CMA %d failed\n", err);
++ pr_err("CMA %d failed\n", err);
+ break;
+ }
+
+@@ -1942,11 +2103,11 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
+ cma_op = "WRITE";
+ pr_debug("CMA WRITE: local -> remote\n");
+ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
+- remote_p, &di);
++ remote_p, remote_mm, remote_task, &di);
+ if (err)
+ goto kfd_process_fail;
+ err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
+- local_p, &si);
++ local_p, current->mm, current, &si);
+ if (err)
+ goto kfd_process_fail;
+ } else {
+@@ -1954,11 +2115,11 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
+ pr_debug("CMA READ: remote -> local\n");
+
+ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
+- local_p, &di);
++ local_p, current->mm, current, &di);
+ if (err)
+ goto kfd_process_fail;
+ err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
+- remote_p, &si);
++ remote_p, remote_mm, remote_task, &si);
+ if (err)
+ goto kfd_process_fail;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index facd9d9..2744154 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -316,6 +316,8 @@ struct cma_iter {
+ /* offset into the entry pointed by cma_iter.array */
+ unsigned long offset;
+ struct kfd_process *p;
++ struct mm_struct *mm;
++ struct task_struct *task;
+ /* current kfd_bo associated with cma_iter.array.va_addr */
+ struct kfd_bo *cur_bo;
+ /* offset w.r.t cur_bo */
+--
+2.7.4
+