diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch new file mode 100644 index 00000000..aef93aa8 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch @@ -0,0 +1,430 @@ +From d4bee1f1396310283c73937b00a0b5e8997ffd59 Mon Sep 17 00:00:00 2001 +From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +Date: Thu, 22 Mar 2018 17:25:54 -0400 +Subject: [PATCH 5635/5725] drm/amdkfd: CMA: Refactor CMA code + +This is similar to process_vm_rw() functions. This refactoring is also +helpful for the special handling of userptr BOs (upcoming commits). + +This commit does not change any functionality. + +v2: Fix potential fence leak + +Change-Id: Ic8f9c6a7599d2beac54d768831618df0207f10e9 +Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +Signed-off-by: Kalyan Alle <kalyan.alle@amd.com> + +Conflicts: + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +--- + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 337 +++++++++++++++++-------------- + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 17 ++ + 2 files changed, 206 insertions(+), 148 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +index 98b000b..a7f0bdc 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +@@ -1672,22 +1672,165 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep, + return r; + } + ++ ++/* Update cma_iter.cur_bo with KFD BO that is assocaited with ++ * cma_iter.array.va_addr ++ */ ++static int kfd_cma_iter_update_bo(struct cma_iter *ci) ++{ ++ struct kfd_memory_range *arr = ci->array; ++ uint64_t va_end = arr->va_addr + arr->size - 1; ++ ++ mutex_lock(&ci->p->mutex); ++ ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr, ++ va_end); ++ mutex_unlock(&ci->p->mutex); ++ ++ if (!ci->cur_bo || va_end > ci->cur_bo->it.last) { ++ pr_err("CMA failed. Range out of bounds\n"); ++ return -EFAULT; ++ } ++ return 0; ++} ++ ++/* Advance iter by @size bytes. */ ++static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size) ++{ ++ int ret = 0; ++ ++ ci->offset += size; ++ if (WARN_ON(size > ci->total || ci->offset > ci->array->size)) ++ return -EFAULT; ++ ci->total -= size; ++ /* If current range is copied, move to next range if available. */ ++ if (ci->offset == ci->array->size) { ++ ++ /* End of all ranges */ ++ if (!(--ci->nr_segs)) ++ return 0; ++ ++ ci->array++; ++ ci->offset = 0; ++ ret = kfd_cma_iter_update_bo(ci); ++ if (ret) ++ return ret; ++ } ++ ci->bo_offset = (ci->array->va_addr + ci->offset) - ++ ci->cur_bo->it.start; ++ return ret; ++} ++ ++static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs, ++ struct kfd_process *p, struct cma_iter *ci) ++{ ++ int ret; ++ int nr; ++ ++ if (!arr || !segs) ++ return -EINVAL; ++ ++ memset(ci, 0, sizeof(*ci)); ++ ci->array = arr; ++ ci->nr_segs = segs; ++ ci->p = p; ++ ci->offset = 0; ++ for (nr = 0; nr < segs; nr++) ++ ci->total += arr[nr].size; ++ ++ /* Valid but size is 0. So copied will also be 0 */ ++ if (!ci->total) ++ return 0; ++ ++ ret = kfd_cma_iter_update_bo(ci); ++ if (!ret) ++ ci->bo_offset = arr->va_addr - ci->cur_bo->it.start; ++ return ret; ++} ++ ++static bool kfd_cma_iter_end(struct cma_iter *ci) ++{ ++ if (!(ci->nr_segs) || !(ci->total)) ++ return true; ++ return false; ++} ++ ++/* Copy single range from source iterator @si to destination iterator @di. ++ * @si will move to next range and @di will move by bytes copied. ++ * @return : 0 for success or -ve for failure ++ * @f: The last fence if any ++ * @copied: out: number of bytes copied ++ */ ++static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di, ++ bool cma_write, struct dma_fence **f, ++ uint64_t *copied) ++{ ++ int err = 0; ++ uint64_t copy_size, n; ++ uint64_t size = si->array->size; ++ struct kfd_bo *src_bo = si->cur_bo; ++ struct dma_fence *lfence = NULL; ++ ++ if (!src_bo || !di || !copied) ++ return -EINVAL; ++ *copied = 0; ++ if (f) ++ *f = NULL; ++ ++ while (size && !kfd_cma_iter_end(di)) { ++ struct dma_fence *fence = NULL; ++ struct kfd_bo *dst_bo = di->cur_bo; ++ ++ copy_size = min(size, (di->array->size - di->offset)); ++ ++ /* Check both BOs belong to same device */ ++ if (src_bo->dev->kgd != dst_bo->dev->kgd) { ++ pr_err("CMA fail. Not same dev\n"); ++ return -EINVAL; ++ } ++ ++ err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd, ++ src_bo->mem, si->bo_offset, dst_bo->mem, di->bo_offset, ++ copy_size, &fence, &n); ++ if (err) { ++ pr_err("GPU CMA %d failed\n", err); ++ break; ++ } ++ ++ if (fence) { ++ dma_fence_put(lfence); ++ lfence = fence; ++ } ++ size -= n; ++ *copied += n; ++ err = kfd_cma_iter_advance(si, n); ++ if (err) ++ break; ++ err = kfd_cma_iter_advance(di, n); ++ if (err) ++ break; ++ } ++ ++ if (f) ++ *f = dma_fence_get(lfence); ++ dma_fence_put(lfence); ++ ++ return err; ++} ++ + static int kfd_ioctl_cross_memory_copy(struct file *filep, + struct kfd_process *local_p, void *data) + { + struct kfd_ioctl_cross_memory_copy_args *args = data; + struct kfd_memory_range *src_array, *dst_array; +- struct kfd_bo *src_bo, *dst_bo; +- struct kfd_process *remote_p, *src_p, *dst_p; ++ struct kfd_process *remote_p; + struct task_struct *remote_task; + struct mm_struct *remote_mm; + struct pid *remote_pid; +- struct dma_fence *fence = NULL, *lfence = NULL; +- uint64_t dst_va_addr; +- uint64_t copied, total_copied = 0; +- uint64_t src_offset, dst_offset, dst_va_addr_end; ++ struct dma_fence *lfence = NULL; ++ uint64_t copied = 0, total_copied = 0; ++ struct cma_iter di, si; + const char *cma_op; +- int i, j = 0, err = 0; ++ int err = 0; + + /* Check parameters */ + if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 || +@@ -1754,159 +1897,57 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep, + goto kfd_process_fail; + } + ++ /* Initialise cma_iter si & @di with source & destination range. */ + if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) { +- src_p = local_p; +- dst_p = remote_p; + cma_op = "WRITE"; + pr_debug("CMA WRITE: local -> remote\n"); ++ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, ++ remote_p, &di); ++ if (err) ++ goto kfd_process_fail; ++ err = kfd_cma_iter_init(src_array, args->src_mem_array_size, ++ local_p, &si); ++ if (err) ++ goto kfd_process_fail; + } else { +- src_p = remote_p; +- dst_p = local_p; + cma_op = "READ"; + pr_debug("CMA READ: remote -> local\n"); +- } +- +- +- /* For each source kfd_range: +- * - Find the BO. Each range has to be within the same BO. +- * - Copy this range to single or multiple destination BOs. +- * - dst_va_addr - will point to next va address into which data will +- * be copied. +- * - dst_bo & src_bo - the current destination and source BOs +- * - src_offset & dst_offset - offset into the respective BOs from +- * data will be sourced or copied ++ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size, ++ local_p, &di); ++ if (err) ++ goto kfd_process_fail; ++ err = kfd_cma_iter_init(src_array, args->src_mem_array_size, ++ remote_p, &si); ++ if (err) ++ goto kfd_process_fail; ++ } ++ ++ /* Copy one si range at a time into di. After each call to ++ * kfd_copy_single_range() si will move to next range. di will be ++ * incremented by bytes copied + */ +- dst_va_addr = dst_array[0].va_addr; +- dst_va_addr_end = dst_va_addr + dst_array[0].size - 1; +- mutex_lock(&dst_p->mutex); +- dst_bo = kfd_process_find_bo_from_interval(dst_p, +- dst_va_addr, +- dst_va_addr_end); +- mutex_unlock(&dst_p->mutex); +- if (!dst_bo || dst_va_addr_end > dst_bo->it.last) { +- pr_err("CMA %s failed. Invalid dst range\n", cma_op); +- err = -EFAULT; +- goto kfd_process_fail; +- } +- dst_offset = dst_va_addr - dst_bo->it.start; +- +- for (i = 0; i < args->src_mem_array_size; i++) { +- uint64_t src_va_addr_end = src_array[i].va_addr + +- src_array[i].size - 1; +- uint64_t src_size_to_copy = src_array[i].size; +- +- mutex_lock(&src_p->mutex); +- src_bo = kfd_process_find_bo_from_interval(src_p, +- src_array[i].va_addr, +- src_va_addr_end); +- mutex_unlock(&src_p->mutex); +- if (!src_bo || src_va_addr_end > src_bo->it.last) { +- pr_err("CMA %s failed. Invalid src range\n", cma_op); +- err = -EFAULT; +- break; +- } +- +- src_offset = src_array[i].va_addr - src_bo->it.start; +- +- /* Copy src_bo to one or multiple dst_bo(s) based on size and +- * and current copy location. +- */ +- while (j < args->dst_mem_array_size) { +- uint64_t copy_size; +- int64_t space_left; +- +- /* Find the current copy_size. This will be smaller of +- * the following +- * - space left in the current dest memory range +- * - data left to copy from source range +- */ +- space_left = (dst_array[j].va_addr + dst_array[j].size) +- - dst_va_addr; +- copy_size = (src_size_to_copy < space_left) ? +- src_size_to_copy : space_left; +- +- /* Check both BOs belong to same device */ +- if (src_bo->dev->kgd != dst_bo->dev->kgd) { +- pr_err("CMA %s fail. Not same dev\n", cma_op); +- err = -EINVAL; +- break; +- } +- +- /* Store prev fence. Release it when a later fence is +- * created +- */ ++ while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) { ++ struct dma_fence *fence = NULL; ++ err = kfd_copy_single_range(&si, &di, ++ KFD_IS_CROSS_MEMORY_WRITE(args->flags), ++ &fence, &copied); ++ total_copied += copied; ++ if (err) ++ break; ++ /* Release old fence if a later fence is created. If no ++ * new fence is created, then keep the preivous fence ++ */ ++ if (fence) { ++ dma_fence_put(lfence); + lfence = fence; +- fence = NULL; +- +- err = dst_bo->dev->kfd2kgd->copy_mem_to_mem( +- src_bo->dev->kgd, +- src_bo->mem, src_offset, +- dst_bo->mem, dst_offset, +- copy_size, +- &fence, &copied); +- +- if (err) { +- pr_err("GPU CMA %s failed\n", cma_op); +- break; +- } +- +- /* Later fence available. Release old fence */ +- if (fence && lfence) { +- dma_fence_put(lfence); +- lfence = NULL; +- } +- +- total_copied += copied; +- src_size_to_copy -= copied; +- space_left -= copied; +- dst_va_addr += copied; +- dst_offset += copied; +- src_offset += copied; +- if (dst_va_addr > dst_bo->it.last + 1) { +- pr_err("CMA %s fail. Mem overflow\n", cma_op); +- err = -EFAULT; +- break; +- } +- +- /* If the cur dest range is full move to next one */ +- if (space_left <= 0) { +- if (++j >= args->dst_mem_array_size) +- break; +- +- dst_va_addr = dst_array[j].va_addr; +- dst_va_addr_end = dst_va_addr + +- dst_array[j].size - 1; +- dst_bo = kfd_process_find_bo_from_interval( +- dst_p, +- dst_va_addr, +- dst_va_addr_end); +- if (!dst_bo || +- dst_va_addr_end > dst_bo->it.last) { +- pr_err("CMA %s failed. Invalid dst range\n", +- cma_op); +- err = -EFAULT; +- break; +- } +- dst_offset = dst_va_addr - dst_bo->it.start; +- } +- +- /* If the cur src range is done, move to next one */ +- if (src_size_to_copy <= 0) +- break; + } +- if (err) +- break; + } + + /* Wait for the last fence irrespective of error condition */ +- if (fence) { +- if (dma_fence_wait_timeout(fence, false, msecs_to_jiffies(1000)) +- < 0) +- pr_err("CMA %s failed. BO timed out\n", cma_op); +- dma_fence_put(fence); +- } else if (lfence) { +- pr_debug("GPU copy fail. But wait for prev DMA to finish\n"); +- dma_fence_wait_timeout(lfence, true, msecs_to_jiffies(1000)); ++ if (lfence) { ++ if (dma_fence_wait_timeout(lfence, false, ++ msecs_to_jiffies(1000)) < 0) ++ pr_err("CMA %s failed. BO timed out\n", cma_op); + dma_fence_put(lfence); + } + +diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +index 97f729c..a74cfbc 100644 +--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h ++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +@@ -303,6 +303,23 @@ struct kfd_bo { + struct kfd_ipc_obj *kfd_ipc_obj; + }; + ++/* Similar to iov_iter */ ++struct cma_iter { ++ /* points to current entry of range array */ ++ struct kfd_memory_range *array; ++ /* total number of entries in the initial array */ ++ unsigned long nr_segs; ++ /* total amount of data pointed by kfd array*/ ++ unsigned long total; ++ /* offset into the entry pointed by cma_iter.array */ ++ unsigned long offset; ++ struct kfd_process *p; ++ /* current kfd_bo associated with cma_iter.array.va_addr */ ++ struct kfd_bo *cur_bo; ++ /* offset w.r.t cur_bo */ ++ unsigned long bo_offset; ++}; ++ + /* KGD2KFD callbacks */ + void kgd2kfd_exit(void); + struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, +-- +2.7.4 + |