diff options
Diffstat (limited to 'meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4160-drm-amdkfd-CMA-Refactor-CMA-code.patch')
-rw-r--r-- | meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4160-drm-amdkfd-CMA-Refactor-CMA-code.patch | 428 |
1 files changed, 0 insertions, 428 deletions
diff --git a/meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4160-drm-amdkfd-CMA-Refactor-CMA-code.patch b/meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4160-drm-amdkfd-CMA-Refactor-CMA-code.patch deleted file mode 100644 index 67343863..00000000 --- a/meta-r1000/recipes-kernel/linux/linux-yocto-4.14.71/4160-drm-amdkfd-CMA-Refactor-CMA-code.patch +++ /dev/null @@ -1,428 +0,0 @@ -From 4c5de602a4b2e3a49fe96f5ab6403697209e0121 Mon Sep 17 00:00:00 2001 -From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> -Date: Thu, 22 Mar 2018 17:25:54 -0400 -Subject: [PATCH 4160/5725] drm/amdkfd: CMA: Refactor CMA code - -This is similar to process_vm_rw() functions. This refactoring is also -helpful for the special handling of userptr BOs (upcoming commits). - -This commit does not change any functionality. - -v2: Fix potential fence leak - -Change-Id: Ic8f9c6a7599d2beac54d768831618df0207f10e9 -Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> -Signed-off-by: Kalyan Alle <kalyan.alle@amd.com> - -Conflicts: - drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ---- - drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 329 +++++++++++++++++-------------- - drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 17 ++ - 2 files changed, 203 insertions(+), 143 deletions(-) - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c -index fd62468..ebb862b 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c -@@ -1706,22 +1706,164 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep, - return r; - } - -+/* Update cma_iter.cur_bo with KFD BO that is assocaited with -+ * cma_iter.array.va_addr -+ */ -+static int kfd_cma_iter_update_bo(struct cma_iter *ci) -+{ -+ struct kfd_memory_range *arr = ci->array; -+ uint64_t va_end = arr->va_addr + arr->size - 1; -+ -+ mutex_lock(&ci->p->mutex); -+ ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr, -+ va_end); -+ mutex_unlock(&ci->p->mutex); -+ -+ if (!ci->cur_bo || va_end > ci->cur_bo->it.last) { -+ pr_err("CMA failed. Range out of bounds\n"); -+ return -EFAULT; -+ } -+ return 0; -+} -+ -+/* Advance iter by @size bytes. */ -+static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size) -+{ -+ int ret = 0; -+ -+ ci->offset += size; -+ if (WARN_ON(size > ci->total || ci->offset > ci->array->size)) -+ return -EFAULT; -+ ci->total -= size; -+ /* If current range is copied, move to next range if available. */ -+ if (ci->offset == ci->array->size) { -+ -+ /* End of all ranges */ -+ if (!(--ci->nr_segs)) -+ return 0; -+ -+ ci->array++; -+ ci->offset = 0; -+ ret = kfd_cma_iter_update_bo(ci); -+ if (ret) -+ return ret; -+ } -+ ci->bo_offset = (ci->array->va_addr + ci->offset) - -+ ci->cur_bo->it.start; -+ return ret; -+} -+ -+static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs, -+ struct kfd_process *p, struct cma_iter *ci) -+{ -+ int ret; -+ int nr; -+ -+ if (!arr || !segs) -+ return -EINVAL; -+ -+ memset(ci, 0, sizeof(*ci)); -+ ci->array = arr; -+ ci->nr_segs = segs; -+ ci->p = p; -+ ci->offset = 0; -+ for (nr = 0; nr < segs; nr++) -+ ci->total += arr[nr].size; -+ -+ /* Valid but size is 0. So copied will also be 0 */ -+ if (!ci->total) -+ return 0; -+ -+ ret = kfd_cma_iter_update_bo(ci); -+ if (!ret) -+ ci->bo_offset = arr->va_addr - ci->cur_bo->it.start; -+ return ret; -+} -+ -+static bool kfd_cma_iter_end(struct cma_iter *ci) -+{ -+ if (!(ci->nr_segs) || !(ci->total)) -+ return true; -+ return false; -+} -+ -+/* Copy single range from source iterator @si to destination iterator @di. -+ * @si will move to next range and @di will move by bytes copied. -+ * @return : 0 for success or -ve for failure -+ * @f: The last fence if any -+ * @copied: out: number of bytes copied -+ */ -+static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di, -+ bool cma_write, struct dma_fence **f, -+ uint64_t *copied) -+{ -+ int err = 0; -+ uint64_t copy_size, n; -+ uint64_t size = si->array->size; -+ struct kfd_bo *src_bo = si->cur_bo; -+ struct dma_fence *lfence = NULL; -+ -+ if (!src_bo || !di || !copied) -+ return -EINVAL; -+ *copied = 0; -+ if (f) -+ *f = NULL; -+ -+ while (size && !kfd_cma_iter_end(di)) { -+ struct dma_fence *fence = NULL; -+ struct kfd_bo *dst_bo = di->cur_bo; -+ -+ copy_size = min(size, (di->array->size - di->offset)); -+ -+ /* Check both BOs belong to same device */ -+ if (src_bo->dev->kgd != dst_bo->dev->kgd) { -+ pr_err("CMA fail. Not same dev\n"); -+ return -EINVAL; -+ } -+ -+ err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd, -+ src_bo->mem, si->bo_offset, dst_bo->mem, di->bo_offset, -+ copy_size, &fence, &n); -+ if (err) { -+ pr_err("GPU CMA %d failed\n", err); -+ break; -+ } -+ -+ if (fence) { -+ dma_fence_put(lfence); -+ lfence = fence; -+ } -+ size -= n; -+ *copied += n; -+ err = kfd_cma_iter_advance(si, n); -+ if (err) -+ break; -+ err = kfd_cma_iter_advance(di, n); -+ if (err) -+ break; -+ } -+ -+ if (f) -+ *f = dma_fence_get(lfence); -+ dma_fence_put(lfence); -+ -+ return err; -+} -+ - static int kfd_ioctl_cross_memory_copy(struct file *filep, - struct kfd_process *local_p, void *data) - { - struct kfd_ioctl_cross_memory_copy_args *args = data; - struct kfd_memory_range *src_array, *dst_array; -- struct kfd_bo *src_bo, *dst_bo; -- struct kfd_process *remote_p, *src_p, *dst_p; -+ struct kfd_process *remote_p; - struct task_struct *remote_task; - struct mm_struct *remote_mm; - struct pid *remote_pid; -- struct dma_fence *fence = NULL, *lfence = NULL; -- uint64_t dst_va_addr; -- uint64_t copied, total_copied = 0; -- uint64_t src_offset, dst_offset, dst_va_addr_end; -+ struct dma_fence *lfence = NULL; -+ uint64_t copied = 0, total_copied = 0; -+ struct cma_iter di, si; - const char *cma_op; -- int i, j = 0, err = 0; -+ int err = 0; - - /* Check parameters */ - if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 || -@@ -1787,160 +1929,61 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep, - err = -EINVAL; - goto kfd_process_fail; - } -- -+ /* Initialise cma_iter si & @di with source & destination range. */ - if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) { -- src_p = local_p; -- dst_p = remote_p; - cma_op = "WRITE"; - pr_debug("CMA WRITE: local -> remote\n"); -+ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, -+ remote_p, &di); -+ if (err) -+ goto kfd_process_fail; -+ err = kfd_cma_iter_init(src_array, args->src_mem_array_size, -+ local_p, &si); -+ if (err) -+ goto kfd_process_fail; - } else { -- src_p = remote_p; -- dst_p = local_p; - cma_op = "READ"; - pr_debug("CMA READ: remote -> local\n"); -- } - -+ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, -+ local_p, &di); -+ if (err) -+ goto kfd_process_fail; -+ err = kfd_cma_iter_init(src_array, args->src_mem_array_size, -+ remote_p, &si); -+ if (err) -+ goto kfd_process_fail; -+ } - -- /* For each source kfd_range: -- * - Find the BO. Each range has to be within the same BO. -- * - Copy this range to single or multiple destination BOs. -- * - dst_va_addr - will point to next va address into which data will -- * be copied. -- * - dst_bo & src_bo - the current destination and source BOs -- * - src_offset & dst_offset - offset into the respective BOs from -- * data will be sourced or copied -+ /* Copy one si range at a time into di. After each call to -+ * kfd_copy_single_range() si will move to next range. di will be -+ * incremented by bytes copied - */ -- dst_va_addr = dst_array[0].va_addr; -- dst_va_addr_end = dst_va_addr + dst_array[0].size - 1; -- mutex_lock(&dst_p->mutex); -- dst_bo = kfd_process_find_bo_from_interval(dst_p, -- dst_va_addr, -- dst_va_addr_end); -- mutex_unlock(&dst_p->mutex); -- if (!dst_bo || dst_va_addr_end > dst_bo->it.last) { -- pr_err("CMA %s failed. Invalid dst range\n", cma_op); -- err = -EFAULT; -- goto kfd_process_fail; -- } -- dst_offset = dst_va_addr - dst_bo->it.start; -- -- for (i = 0; i < args->src_mem_array_size; i++) { -- uint64_t src_va_addr_end = src_array[i].va_addr + -- src_array[i].size - 1; -- uint64_t src_size_to_copy = src_array[i].size; -- -- mutex_lock(&src_p->mutex); -- src_bo = kfd_process_find_bo_from_interval(src_p, -- src_array[i].va_addr, -- src_va_addr_end); -- mutex_unlock(&src_p->mutex); -- if (!src_bo || src_va_addr_end > src_bo->it.last) { -- pr_err("CMA %s failed. Invalid src range\n", cma_op); -- err = -EFAULT; -- break; -- } -+ while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) { -+ struct dma_fence *fence = NULL; - -- src_offset = src_array[i].va_addr - src_bo->it.start; -+ err = kfd_copy_single_range(&si, &di, -+ KFD_IS_CROSS_MEMORY_WRITE(args->flags), -+ &fence, &copied); -+ total_copied += copied; - -- /* Copy src_bo to one or multiple dst_bo(s) based on size and -- * and current copy location. -- */ -- while (j < args->dst_mem_array_size) { -- uint64_t copy_size; -- int64_t space_left; -- -- /* Find the current copy_size. This will be smaller of -- * the following -- * - space left in the current dest memory range -- * - data left to copy from source range -- */ -- space_left = (dst_array[j].va_addr + dst_array[j].size) -- - dst_va_addr; -- copy_size = (src_size_to_copy < space_left) ? -- src_size_to_copy : space_left; -- -- /* Check both BOs belong to same device */ -- if (src_bo->dev->kgd != dst_bo->dev->kgd) { -- pr_err("CMA %s fail. Not same dev\n", cma_op); -- err = -EINVAL; -- break; -- } -+ if (err) -+ break; - -- /* Store prev fence. Release it when a later fence is -- * created -- */ -+ /* Release old fence if a later fence is created. If no -+ * new fence is created, then keep the preivous fence -+ */ -+ if (fence) { -+ dma_fence_put(lfence); - lfence = fence; -- fence = NULL; -- -- err = dst_bo->dev->kfd2kgd->copy_mem_to_mem( -- src_bo->dev->kgd, -- src_bo->mem, src_offset, -- dst_bo->mem, dst_offset, -- copy_size, -- &fence, &copied); -- -- if (err) { -- pr_err("GPU CMA %s failed\n", cma_op); -- break; -- } -- -- /* Later fence available. Release old fence */ -- if (fence && lfence) { -- dma_fence_put(lfence); -- lfence = NULL; -- } -- -- total_copied += copied; -- src_size_to_copy -= copied; -- space_left -= copied; -- dst_va_addr += copied; -- dst_offset += copied; -- src_offset += copied; -- if (dst_va_addr > dst_bo->it.last + 1) { -- pr_err("CMA %s fail. Mem overflow\n", cma_op); -- err = -EFAULT; -- break; -- } -- -- /* If the cur dest range is full move to next one */ -- if (space_left <= 0) { -- if (++j >= args->dst_mem_array_size) -- break; -- -- dst_va_addr = dst_array[j].va_addr; -- dst_va_addr_end = dst_va_addr + -- dst_array[j].size - 1; -- dst_bo = kfd_process_find_bo_from_interval( -- dst_p, -- dst_va_addr, -- dst_va_addr_end); -- if (!dst_bo || -- dst_va_addr_end > dst_bo->it.last) { -- pr_err("CMA %s failed. Invalid dst range\n", -- cma_op); -- err = -EFAULT; -- break; -- } -- dst_offset = dst_va_addr - dst_bo->it.start; -- } -- -- /* If the cur src range is done, move to next one */ -- if (src_size_to_copy <= 0) -- break; - } -- if (err) -- break; - } - - /* Wait for the last fence irrespective of error condition */ -- if (fence) { -- if (dma_fence_wait_timeout(fence, false, msecs_to_jiffies(1000)) -- < 0) -+ if (lfence) { -+ if (dma_fence_wait_timeout(lfence, false, -+ msecs_to_jiffies(1000)) < 0) - pr_err("CMA %s failed. BO timed out\n", cma_op); -- dma_fence_put(fence); -- } else if (lfence) { -- pr_debug("GPU copy fail. But wait for prev DMA to finish\n"); -- dma_fence_wait_timeout(lfence, true, msecs_to_jiffies(1000)); - dma_fence_put(lfence); - } - -diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -index 0a019a6..da61ae8 100644 ---- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h -@@ -303,6 +303,23 @@ struct kfd_bo { - struct kfd_ipc_obj *kfd_ipc_obj; - }; - -+/* Similar to iov_iter */ -+struct cma_iter { -+ /* points to current entry of range array */ -+ struct kfd_memory_range *array; -+ /* total number of entries in the initial array */ -+ unsigned long nr_segs; -+ /* total amount of data pointed by kfd array*/ -+ unsigned long total; -+ /* offset into the entry pointed by cma_iter.array */ -+ unsigned long offset; -+ struct kfd_process *p; -+ /* current kfd_bo associated with cma_iter.array.va_addr */ -+ struct kfd_bo *cur_bo; -+ /* offset w.r.t cur_bo */ -+ unsigned long bo_offset; -+}; -+ - /* KGD2KFD callbacks */ - void kgd2kfd_exit(void); - struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, --- -2.7.4 - |