aboutsummaryrefslogtreecommitdiffstats
path: root/meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch')
-rw-r--r--meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch311
1 files changed, 0 insertions, 311 deletions
diff --git a/meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch b/meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch
deleted file mode 100644
index 9aaeb05c..00000000
--- a/meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4165-drm-amdkfd-CMA-Use-shadow-system-BO-for-userptr.patch
+++ /dev/null
@@ -1,311 +0,0 @@
-From e3660ab379356393311bf7d36234d71012b61f0d Mon Sep 17 00:00:00 2001
-From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
-Date: Tue, 27 Mar 2018 14:36:18 -0400
-Subject: [PATCH 4165/5725] drm/amdkfd: CMA: Use shadow system BO for userptr
-
-userptrs BO could be evicted during CMA operations. If one of the BO
-involved is a userptr, then a shadow BO is created using its underlying
-pages. A sg table is created by pinning the backing system pages and
-system BO is created using this sg table. This temporary BO is used for
-the copy operation.
-
-v2: get_user_pages() could return less than requrested pages. Handle
-this condition
-
-Change-Id: Ied26bb481bfa8bb5b488f46f94451477b45746e0
-Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
----
- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 216 ++++++++++++++++++++++++++++++-
- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 ++
- 2 files changed, 227 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-index 1a35938..a242208 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
-@@ -1721,6 +1721,187 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep,
- #define MAX_KMALLOC_PAGES (PAGE_SIZE * 2)
- #define MAX_PP_KMALLOC_COUNT (MAX_KMALLOC_PAGES/sizeof(struct page *))
-
-+static void kfd_put_sg_table(struct sg_table *sg)
-+{
-+ unsigned int i;
-+ struct scatterlist *s;
-+
-+ for_each_sg(sg->sgl, s, sg->nents, i)
-+ put_page(sg_page(s));
-+}
-+
-+
-+/* Create a sg table for the given userptr BO by pinning its system pages
-+ * @bo: userptr BO
-+ * @offset: Offset into BO
-+ * @mm/@task: mm_struct & task_struct of the process that holds the BO
-+ * @size: in/out: desired size / actual size which could be smaller
-+ * @sg_size: out: Size of sg table. This is ALIGN_UP(@size)
-+ * @ret_sg: out sg table
-+ */
-+static int kfd_create_sg_table_from_userptr_bo(struct kfd_bo *bo,
-+ int64_t offset, int cma_write,
-+ struct mm_struct *mm,
-+ struct task_struct *task,
-+ uint64_t *size,
-+ uint64_t *sg_size,
-+ struct sg_table **ret_sg)
-+{
-+ int ret, locked = 1;
-+ struct sg_table *sg = NULL;
-+ unsigned int i, offset_in_page, flags = 0;
-+ unsigned long nents, n;
-+ unsigned long pa = (bo->cpuva + offset) & PAGE_MASK;
-+ unsigned int cur_page = 0;
-+ struct scatterlist *s;
-+ uint64_t sz = *size;
-+ struct page **process_pages;
-+
-+ *sg_size = 0;
-+ sg = kmalloc(sizeof(*sg), GFP_KERNEL);
-+ if (!sg)
-+ return -ENOMEM;
-+
-+ offset_in_page = offset & (PAGE_SIZE - 1);
-+ nents = (sz + offset_in_page + PAGE_SIZE - 1) / PAGE_SIZE;
-+
-+ ret = sg_alloc_table(sg, nents, GFP_KERNEL);
-+ if (unlikely(ret)) {
-+ ret = -ENOMEM;
-+ goto sg_alloc_fail;
-+ }
-+ process_pages = kmalloc_array(nents, sizeof(struct pages *),
-+ GFP_KERNEL);
-+ if (!process_pages) {
-+ ret = -ENOMEM;
-+ goto page_alloc_fail;
-+ }
-+
-+ if (cma_write)
-+ flags = FOLL_WRITE;
-+ locked = 1;
-+ down_read(&mm->mmap_sem);
-+ n = get_user_pages_remote(task, mm, pa, nents, flags, process_pages,
-+ NULL, &locked);
-+ if (locked)
-+ up_read(&mm->mmap_sem);
-+ if (n <= 0) {
-+ pr_err("CMA: Invalid virtual address 0x%lx\n", pa);
-+ ret = -EFAULT;
-+ goto get_user_fail;
-+ }
-+ if (n != nents) {
-+ /* Pages pinned < requested. Set the size accordingly */
-+ *size = (n * PAGE_SIZE) - offset_in_page;
-+ pr_debug("Requested %lx but pinned %lx\n", nents, n);
-+ }
-+
-+ sz = 0;
-+ for_each_sg(sg->sgl, s, n, i) {
-+ sg_set_page(s, process_pages[cur_page], PAGE_SIZE,
-+ offset_in_page);
-+ sg_dma_address(s) = page_to_phys(process_pages[cur_page]);
-+ offset_in_page = 0;
-+ cur_page++;
-+ sz += PAGE_SIZE;
-+ }
-+ *ret_sg = sg;
-+ *sg_size = sz;
-+
-+ kfree(process_pages);
-+ return 0;
-+
-+get_user_fail:
-+ kfree(process_pages);
-+page_alloc_fail:
-+ sg_free_table(sg);
-+sg_alloc_fail:
-+ kfree(sg);
-+ return ret;
-+}
-+
-+static void kfd_free_cma_bos(struct cma_iter *ci)
-+{
-+ struct cma_system_bo *cma_bo, *tmp;
-+
-+ list_for_each_entry_safe(cma_bo, tmp, &ci->cma_list, list) {
-+ struct kfd_dev *dev = cma_bo->dev;
-+
-+ /* sg table is deleted by free_memory_of_gpu */
-+ kfd_put_sg_table(cma_bo->sg);
-+ dev->kfd2kgd->free_memory_of_gpu(dev->kgd, cma_bo->mem);
-+ list_del(&cma_bo->list);
-+ kfree(cma_bo);
-+ }
-+}
-+
-+/* Create a system BO by pinning underlying system pages of the given userptr
-+ * BO @ubo
-+ * @ubo: Userptr BO
-+ * @offset: Offset into ubo
-+ * @size: in/out: The size of the new BO could be less than requested if all
-+ * the pages couldn't be pinned. This would be reflected in @size
-+ * @mm/@task: mm/task to which @ubo belongs to
-+ * @cma_bo: out: new system BO
-+ */
-+static int kfd_create_cma_system_bo(struct kfd_dev *kdev, struct kfd_bo *ubo,
-+ uint64_t *size, uint64_t offset,
-+ int cma_write, struct kfd_process *p,
-+ struct mm_struct *mm,
-+ struct task_struct *task,
-+ struct cma_system_bo **cma_bo)
-+{
-+ int ret;
-+ struct kfd_process_device *pdd = NULL;
-+ struct cma_system_bo *cbo;
-+ uint64_t sg_size;
-+
-+ uint32_t flags = ALLOC_MEM_FLAGS_GTT | ALLOC_MEM_FLAGS_NONPAGED |
-+ ALLOC_MEM_FLAGS_NO_SUBSTITUTE;
-+
-+ *cma_bo = NULL;
-+ cbo = kzalloc(sizeof(**cma_bo), GFP_KERNEL);
-+ if (!cbo)
-+ return -ENOMEM;
-+
-+ INIT_LIST_HEAD(&cbo->list);
-+ ret = kfd_create_sg_table_from_userptr_bo(ubo, offset, cma_write, mm,
-+ task, size, &sg_size,
-+ &cbo->sg);
-+ if (ret) {
-+ pr_err("Failed to create system BO. sg table error %d\n", ret);
-+ return ret;
-+ }
-+
-+ mutex_lock(&p->mutex);
-+ pdd = kfd_get_process_device_data(kdev, p);
-+ if (!pdd) {
-+ pr_err("Process device data doesn't exist\n");
-+ ret = -EINVAL;
-+ goto pdd_fail;
-+ }
-+
-+ ret = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, 0ULL, sg_size,
-+ pdd->vm, cbo->sg,
-+ &cbo->mem, NULL, flags);
-+ if (ret) {
-+ pr_err("Failed to create shadow system BO %d\n", ret);
-+ goto pdd_fail;
-+ }
-+ mutex_unlock(&p->mutex);
-+ cbo->dev = kdev;
-+ *cma_bo = cbo;
-+
-+ return ret;
-+
-+pdd_fail:
-+ mutex_unlock(&p->mutex);
-+ kfd_put_sg_table(cbo->sg);
-+ sg_free_table(cbo->sg);
-+ kfree(cbo->sg);
-+ return ret;
-+}
-+
- /* Update cma_iter.cur_bo with KFD BO that is assocaited with
- * cma_iter.array.va_addr
- */
-@@ -1779,6 +1960,7 @@ static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs,
- return -EINVAL;
-
- memset(ci, 0, sizeof(*ci));
-+ INIT_LIST_HEAD(&ci->cma_list);
- ci->array = arr;
- ci->nr_segs = segs;
- ci->p = p;
-@@ -1945,16 +2127,43 @@ static int kfd_copy_bos(struct cma_iter *si, struct cma_iter *di,
- if (src_bo->cpuva && dst_bo->cpuva)
- return kfd_copy_userptr_bos(si, di, cma_write, size, copied);
-
-- if (src_bo->dev->kgd != dst_bo->dev->kgd) {
-+ /* If either source or dest. is userptr, create a shadow system BO
-+ * by using the underlying userptr BO pages. Then use this shadow
-+ * BO for copy. src_offset & dst_offset are adjusted because the new BO
-+ * is only created for the window (offset, size) requested.
-+ * The BOs are stored in cma_list for deferred cleanup. This minimizes
-+ * fence waiting just to the last fence.
-+ */
-+ if (src_bo->cpuva) {
-+ err = kfd_create_cma_system_bo(dst_bo->dev, src_bo, &size,
-+ si->bo_offset, cma_write,
-+ si->p, si->mm, si->task,
-+ &si->cma_bo);
-+ src_mem = si->cma_bo->mem;
-+ src_offset = si->bo_offset & (PAGE_SIZE - 1);
-+ list_add_tail(&si->cma_bo->list, &si->cma_list);
-+ } else if (dst_bo->cpuva) {
-+ err = kfd_create_cma_system_bo(src_bo->dev, dst_bo, &size,
-+ di->bo_offset, cma_write,
-+ di->p, di->mm, di->task,
-+ &di->cma_bo);
-+ dst_mem = di->cma_bo->mem;
-+ dst_offset = di->bo_offset & (PAGE_SIZE - 1);
-+ list_add_tail(&di->cma_bo->list, &di->cma_list);
-+ } else if (src_bo->dev->kgd != dst_bo->dev->kgd) {
- pr_err("CMA %d fail. Not same dev\n", cma_write);
- err = -EINVAL;
- }
-
-+ if (err) {
-+ pr_err("Failed to create system BO %d", err);
-+ err = -EINVAL;
-+ }
-+
- err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd, src_mem,
- src_offset, dst_mem,
- dst_offset, size, f,
- copied);
--
- return err;
- }
-
-@@ -2156,6 +2365,9 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
- dma_fence_put(lfence);
- }
-
-+ kfd_free_cma_bos(&si);
-+ kfd_free_cma_bos(&di);
-+
- kfd_process_fail:
- mmput(remote_mm);
- mm_access_fail:
-diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-index 2744154..cbb65b0 100644
---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
-@@ -305,6 +305,13 @@ struct kfd_bo {
- uint64_t cpuva;
- };
-
-+struct cma_system_bo {
-+ struct kgd_mem *mem;
-+ struct sg_table *sg;
-+ struct kfd_dev *dev;
-+ struct list_head list;
-+};
-+
- /* Similar to iov_iter */
- struct cma_iter {
- /* points to current entry of range array */
-@@ -322,6 +329,12 @@ struct cma_iter {
- struct kfd_bo *cur_bo;
- /* offset w.r.t cur_bo */
- unsigned long bo_offset;
-+ /* If cur_bo is a userptr BO, then a shadow system BO is created
-+ * using its underlying pages. cma_bo holds this BO. cma_list is a
-+ * list cma_bos created in one session
-+ */
-+ struct cma_system_bo *cma_bo;
-+ struct list_head cma_list;
- };
-
- /* KGD2KFD callbacks */
---
-2.7.4
-