aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch311
1 files changed, 311 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch
new file mode 100644
index 00000000..9aaeb05c
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch
@@ -0,0 +1,311 @@
+From e3660ab379356393311bf7d36234d71012b61f0d Mon Sep 17 00:00:00 2001
+From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Date: Tue, 27 Mar 2018 14:36:18 -0400
+Subject: [PATCH 4165/5725] drm/amdkfd: CMA: Use shadow system BO for userptr
+
+userptrs BO could be evicted during CMA operations. If one of the BO
+involved is a userptr, then a shadow BO is created using its underlying
+pages. A sg table is created by pinning the backing system pages and
+system BO is created using this sg table. This temporary BO is used for
+the copy operation.
+
+v2: get_user_pages() could return less than requrested pages. Handle
+this condition
+
+Change-Id: Ied26bb481bfa8bb5b488f46f94451477b45746e0
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 216 ++++++++++++++++++++++++++++++-
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 ++
+ 2 files changed, 227 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 1a35938..a242208 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -1721,6 +1721,187 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep,
+ #define MAX_KMALLOC_PAGES (PAGE_SIZE * 2)
+ #define MAX_PP_KMALLOC_COUNT (MAX_KMALLOC_PAGES/sizeof(struct page *))
+
++static void kfd_put_sg_table(struct sg_table *sg)
++{
++ unsigned int i;
++ struct scatterlist *s;
++
++ for_each_sg(sg->sgl, s, sg->nents, i)
++ put_page(sg_page(s));
++}
++
++
++/* Create a sg table for the given userptr BO by pinning its system pages
++ * @bo: userptr BO
++ * @offset: Offset into BO
++ * @mm/@task: mm_struct & task_struct of the process that holds the BO
++ * @size: in/out: desired size / actual size which could be smaller
++ * @sg_size: out: Size of sg table. This is ALIGN_UP(@size)
++ * @ret_sg: out sg table
++ */
++static int kfd_create_sg_table_from_userptr_bo(struct kfd_bo *bo,
++ int64_t offset, int cma_write,
++ struct mm_struct *mm,
++ struct task_struct *task,
++ uint64_t *size,
++ uint64_t *sg_size,
++ struct sg_table **ret_sg)
++{
++ int ret, locked = 1;
++ struct sg_table *sg = NULL;
++ unsigned int i, offset_in_page, flags = 0;
++ unsigned long nents, n;
++ unsigned long pa = (bo->cpuva + offset) & PAGE_MASK;
++ unsigned int cur_page = 0;
++ struct scatterlist *s;
++ uint64_t sz = *size;
++ struct page **process_pages;
++
++ *sg_size = 0;
++ sg = kmalloc(sizeof(*sg), GFP_KERNEL);
++ if (!sg)
++ return -ENOMEM;
++
++ offset_in_page = offset & (PAGE_SIZE - 1);
++ nents = (sz + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE;
++
++ ret = sg_alloc_table(sg, nents, GFP_KERNEL);
++ if (unlikely(ret)) {
++ ret = -ENOMEM;
++ goto sg_alloc_fail;
++ }
++ process_pages = kmalloc_array(nents, sizeof(struct pages *),
++ GFP_KERNEL);
++ if (!process_pages) {
++ ret = -ENOMEM;
++ goto page_alloc_fail;
++ }
++
++ if (cma_write)
++ flags = FOLL_WRITE;
++ locked = 1;
++ down_read(&mm->mmap_sem);
++ n = get_user_pages_remote(task, mm, pa, nents, flags, process_pages,
++ NULL, &locked);
++ if (locked)
++ up_read(&mm->mmap_sem);
++ if (n <= 0) {
++ pr_err("CMA: Invalid virtual address 0x%lx\n", pa);
++ ret = -EFAULT;
++ goto get_user_fail;
++ }
++ if (n != nents) {
++ /* Pages pinned < requested. Set the size accordingly */
++ *size = (n * PAGE_SIZE) - offset_in_page;
++ pr_debug("Requested %lx but pinned %lx\n", nents, n);
++ }
++
++ sz = 0;
++ for_each_sg(sg->sgl, s, n, i) {
++ sg_set_page(s, process_pages[cur_page], PAGE_SIZE,
++ offset_in_page);
++ sg_dma_address(s) = page_to_phys(process_pages[cur_page]);
++ offset_in_page = 0;
++ cur_page++;
++ sz += PAGE_SIZE;
++ }
++ *ret_sg = sg;
++ *sg_size = sz;
++
++ kfree(process_pages);
++ return 0;
++
++get_user_fail:
++ kfree(process_pages);
++page_alloc_fail:
++ sg_free_table(sg);
++sg_alloc_fail:
++ kfree(sg);
++ return ret;
++}
++
++static void kfd_free_cma_bos(struct cma_iter *ci)
++{
++ struct cma_system_bo *cma_bo, *tmp;
++
++ list_for_each_entry_safe(cma_bo, tmp, &ci->cma_list, list) {
++ struct kfd_dev *dev = cma_bo->dev;
++
++ /* sg table is deleted by free_memory_of_gpu */
++ kfd_put_sg_table(cma_bo->sg);
++ dev->kfd2kgd->free_memory_of_gpu(dev->kgd, cma_bo->mem);
++ list_del(&cma_bo->list);
++ kfree(cma_bo);
++ }
++}
++
++/* Create a system BO by pinning underlying system pages of the given userptr
++ * BO @ubo
++ * @ubo: Userptr BO
++ * @offset: Offset into ubo
++ * @size: in/out: The size of the new BO could be less than requested if all
++ * the pages couldn't be pinned. This would be reflected in @size
++ * @mm/@task: mm/task to which @ubo belongs to
++ * @cma_bo: out: new system BO
++ */
++static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *ubo,
++ uint64_t *size, uint64_t offset,
++ int cma_write, struct kfd_process *p,
++ struct mm_struct *mm,
++ struct task_struct *task,
++ struct cma_system_bo **cma_bo)
++{
++ int ret;
++ struct kfd_process_device *pdd = NULL;
++ struct cma_system_bo *cbo;
++ uint64_t sg_size;
++
++ uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
++ ALLOC_MEM_FLAGS_NO_SUBSTITUTE;
++
++ *cma_bo = NULL;
++ cbo = kzalloc(sizeof(**cma_bo), GFP_KERNEL);
++ if (!cbo)
++ return -ENOMEM;
++
++ INIT_LIST_HEAD(&cbo->list);
++ ret = kfd_create_sg_table_from_userptr_bo(ubo, offset, cma_write, mm,
++ task, size, &sg_size,
++ &cbo->sg);
++ if (ret) {
++ pr_err("Failed to create system BO. sg table error %d\n", ret);
++ return ret;
++ }
++
++ mutex_lock(&p->mutex);
++ pdd = kfd_get_process_device_data(kdev, p);
++ if (!pdd) {
++ pr_err("Process device data doesn't exist\n");
++ ret = -EINVAL;
++ goto pdd_fail;
++ }
++
++ ret = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, 0ULL, sg_size,
++ pdd->vm, cbo->sg,
++ &cbo->mem, NULL, flags);
++ if (ret) {
++ pr_err("Failed to create shadow system BO %d\n", ret);
++ goto pdd_fail;
++ }
++ mutex_unlock(&p->mutex);
++ cbo->dev = kdev;
++ *cma_bo = cbo;
++
++ return ret;
++
++pdd_fail:
++ mutex_unlock(&p->mutex);
++ kfd_put_sg_table(cbo->sg);
++ sg_free_table(cbo->sg);
++ kfree(cbo->sg);
++ return ret;
++}
++
+ /* Update cma_iter.cur_bo with KFD BO that is assocaited with
+ * cma_iter.array.va_addr
+ */
+@@ -1779,6 +1960,7 @@ static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs,
+ return -EINVAL;
+
+ memset(ci, 0, sizeof(*ci));
++ INIT_LIST_HEAD(&ci->cma_list);
+ ci->array = arr;
+ ci->nr_segs = segs;
+ ci->p = p;
+@@ -1945,16 +2127,43 @@ static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di,
+ if (src_bo->cpuva && dst_bo->cpuva)
+ return kfd_copy_userptr_bos(si, di, cma_write, size, copied);
+
+- if (src_bo->dev->kgd != dst_bo->dev->kgd) {
++ /* If either source or dest. is userptr, create a shadow system BO
++ * by using the underlying userptr BO pages. Then use this shadow
++ * BO for copy. src_offset & dst_offset are adjusted because the new BO
++ * is only created for the window (offset, size) requested.
++ * The BOs are stored in cma_list for deferred cleanup. This minimizes
++ * fence waiting just to the last fence.
++ */
++ if (src_bo->cpuva) {
++ err = kfd_create_cma_system_bo(dst_bo->dev, src_bo, &size,
++ si->bo_offset, cma_write,
++ si->p, si->mm, si->task,
++ &si->cma_bo);
++ src_mem = si->cma_bo->mem;
++ src_offset = si->bo_offset & (PAGE_SIZE - 1);
++ list_add_tail(&si->cma_bo->list, &si->cma_list);
++ } else if (dst_bo->cpuva) {
++ err = kfd_create_cma_system_bo(src_bo->dev, dst_bo, &size,
++ di->bo_offset, cma_write,
++ di->p, di->mm, di->task,
++ &di->cma_bo);
++ dst_mem = di->cma_bo->mem;
++ dst_offset = di->bo_offset & (PAGE_SIZE - 1);
++ list_add_tail(&di->cma_bo->list, &di->cma_list);
++ } else if (src_bo->dev->kgd != dst_bo->dev->kgd) {
+ pr_err("CMA %d fail. Not same dev\n", cma_write);
+ err = -EINVAL;
+ }
+
++ if (err) {
++ pr_err("Failed to create system BO %d", err);
++ err = -EINVAL;
++ }
++
+ err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd, src_mem,
+ src_offset, dst_mem,
+ dst_offset, size, f,
+ copied);
+-
+ return err;
+ }
+
+@@ -2156,6 +2365,9 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
+ dma_fence_put(lfence);
+ }
+
++ kfd_free_cma_bos(&si);
++ kfd_free_cma_bos(&di);
++
+ kfd_process_fail:
+ mmput(remote_mm);
+ mm_access_fail:
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 2744154..cbb65b0 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -305,6 +305,13 @@ struct kfd_bo {
+ uint64_t cpuva;
+ };
+
++struct cma_system_bo {
++ struct kgd_mem *mem;
++ struct sg_table *sg;
++ struct kfd_dev *dev;
++ struct list_head list;
++};
++
+ /* Similar to iov_iter */
+ struct cma_iter {
+ /* points to current entry of range array */
+@@ -322,6 +329,12 @@ struct cma_iter {
+ struct kfd_bo *cur_bo;
+ /* offset w.r.t cur_bo */
+ unsigned long bo_offset;
++ /* If cur_bo is a userptr BO, then a shadow system BO is created
++ * using its underlying pages. cma_bo holds this BO. cma_list is a
++ * list cma_bos created in one session
++ */
++ struct cma_system_bo *cma_bo;
++ struct list_head cma_list;
+ };
+
+ /* KGD2KFD callbacks */
+--
+2.7.4
+