aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch430
1 files changed, 430 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch
new file mode 100644
index 00000000..aef93aa8
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/5635-drm-amdkfd-CMA-Refactor-CMA-code.patch
@@ -0,0 +1,430 @@
+From d4bee1f1396310283c73937b00a0b5e8997ffd59 Mon Sep 17 00:00:00 2001
+From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Date: Thu, 22 Mar 2018 17:25:54 -0400
+Subject: [PATCH 5635/5725] drm/amdkfd: CMA: Refactor CMA code
+
+This is similar to process_vm_rw() functions. This refactoring is also
+helpful for the special handling of userptr BOs (upcoming commits).
+
+This commit does not change any functionality.
+
+v2: Fix potential fence leak
+
+Change-Id: Ic8f9c6a7599d2beac54d768831618df0207f10e9
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>
+
+Conflicts:
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 337 +++++++++++++++++--------------
+ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 17 ++
+ 2 files changed, 206 insertions(+), 148 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+index 98b000b..a7f0bdc 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+@@ -1672,22 +1672,165 @@ static int kfd_ioctl_ipc_import_handle(struct file *filep,
+ return r;
+ }
+
++
++/* Update cma_iter.cur_bo with KFD BO that is assocaited with
++ * cma_iter.array.va_addr
++ */
++static int kfd_cma_iter_update_bo(struct cma_iter *ci)
++{
++ struct kfd_memory_range *arr = ci->array;
++ uint64_t va_end = arr->va_addr + arr->size - 1;
++
++ mutex_lock(&ci->p->mutex);
++ ci->cur_bo = kfd_process_find_bo_from_interval(ci->p, arr->va_addr,
++ va_end);
++ mutex_unlock(&ci->p->mutex);
++
++ if (!ci->cur_bo || va_end > ci->cur_bo->it.last) {
++ pr_err("CMA failed. Range out of bounds\n");
++ return -EFAULT;
++ }
++ return 0;
++}
++
++/* Advance iter by @size bytes. */
++static int kfd_cma_iter_advance(struct cma_iter *ci, unsigned long size)
++{
++ int ret = 0;
++
++ ci->offset += size;
++ if (WARN_ON(size > ci->total || ci->offset > ci->array->size))
++ return -EFAULT;
++ ci->total -= size;
++ /* If current range is copied, move to next range if available. */
++ if (ci->offset == ci->array->size) {
++
++ /* End of all ranges */
++ if (!(--ci->nr_segs))
++ return 0;
++
++ ci->array++;
++ ci->offset = 0;
++ ret = kfd_cma_iter_update_bo(ci);
++ if (ret)
++ return ret;
++ }
++ ci->bo_offset = (ci->array->va_addr + ci->offset) -
++ ci->cur_bo->it.start;
++ return ret;
++}
++
++static int kfd_cma_iter_init(struct kfd_memory_range *arr, unsigned long segs,
++ struct kfd_process *p, struct cma_iter *ci)
++{
++ int ret;
++ int nr;
++
++ if (!arr || !segs)
++ return -EINVAL;
++
++ memset(ci, 0, sizeof(*ci));
++ ci->array = arr;
++ ci->nr_segs = segs;
++ ci->p = p;
++ ci->offset = 0;
++ for (nr = 0; nr < segs; nr++)
++ ci->total += arr[nr].size;
++
++ /* Valid but size is 0. So copied will also be 0 */
++ if (!ci->total)
++ return 0;
++
++ ret = kfd_cma_iter_update_bo(ci);
++ if (!ret)
++ ci->bo_offset = arr->va_addr - ci->cur_bo->it.start;
++ return ret;
++}
++
++static bool kfd_cma_iter_end(struct cma_iter *ci)
++{
++ if (!(ci->nr_segs) || !(ci->total))
++ return true;
++ return false;
++}
++
++/* Copy single range from source iterator @si to destination iterator @di.
++ * @si will move to next range and @di will move by bytes copied.
++ * @return : 0 for success or -ve for failure
++ * @f: The last fence if any
++ * @copied: out: number of bytes copied
++ */
++static int kfd_copy_single_range(struct cma_iter *si, struct cma_iter *di,
++ bool cma_write, struct dma_fence **f,
++ uint64_t *copied)
++{
++ int err = 0;
++ uint64_t copy_size, n;
++ uint64_t size = si->array->size;
++ struct kfd_bo *src_bo = si->cur_bo;
++ struct dma_fence *lfence = NULL;
++
++ if (!src_bo || !di || !copied)
++ return -EINVAL;
++ *copied = 0;
++ if (f)
++ *f = NULL;
++
++ while (size && !kfd_cma_iter_end(di)) {
++ struct dma_fence *fence = NULL;
++ struct kfd_bo *dst_bo = di->cur_bo;
++
++ copy_size = min(size, (di->array->size - di->offset));
++
++ /* Check both BOs belong to same device */
++ if (src_bo->dev->kgd != dst_bo->dev->kgd) {
++ pr_err("CMA fail. Not same dev\n");
++ return -EINVAL;
++ }
++
++ err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(src_bo->dev->kgd,
++ src_bo->mem, si->bo_offset, dst_bo->mem, di->bo_offset,
++ copy_size, &fence, &n);
++ if (err) {
++ pr_err("GPU CMA %d failed\n", err);
++ break;
++ }
++
++ if (fence) {
++ dma_fence_put(lfence);
++ lfence = fence;
++ }
++ size -= n;
++ *copied += n;
++ err = kfd_cma_iter_advance(si, n);
++ if (err)
++ break;
++ err = kfd_cma_iter_advance(di, n);
++ if (err)
++ break;
++ }
++
++ if (f)
++ *f = dma_fence_get(lfence);
++ dma_fence_put(lfence);
++
++ return err;
++}
++
+ static int kfd_ioctl_cross_memory_copy(struct file *filep,
+ struct kfd_process *local_p, void *data)
+ {
+ struct kfd_ioctl_cross_memory_copy_args *args = data;
+ struct kfd_memory_range *src_array, *dst_array;
+- struct kfd_bo *src_bo, *dst_bo;
+- struct kfd_process *remote_p, *src_p, *dst_p;
++ struct kfd_process *remote_p;
+ struct task_struct *remote_task;
+ struct mm_struct *remote_mm;
+ struct pid *remote_pid;
+- struct dma_fence *fence = NULL, *lfence = NULL;
+- uint64_t dst_va_addr;
+- uint64_t copied, total_copied = 0;
+- uint64_t src_offset, dst_offset, dst_va_addr_end;
++ struct dma_fence *lfence = NULL;
++ uint64_t copied = 0, total_copied = 0;
++ struct cma_iter di, si;
+ const char *cma_op;
+- int i, j = 0, err = 0;
++ int err = 0;
+
+ /* Check parameters */
+ if (args->src_mem_range_array == 0 || args->dst_mem_range_array == 0 ||
+@@ -1754,159 +1897,57 @@ static int kfd_ioctl_cross_memory_copy(struct file *filep,
+ goto kfd_process_fail;
+ }
+
++ /* Initialise cma_iter si & @di with source & destination range. */
+ if (KFD_IS_CROSS_MEMORY_WRITE(args->flags)) {
+- src_p = local_p;
+- dst_p = remote_p;
+ cma_op = "WRITE";
+ pr_debug("CMA WRITE: local -> remote\n");
++ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
++ remote_p, &di);
++ if (err)
++ goto kfd_process_fail;
++ err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
++ local_p, &si);
++ if (err)
++ goto kfd_process_fail;
+ } else {
+- src_p = remote_p;
+- dst_p = local_p;
+ cma_op = "READ";
+ pr_debug("CMA READ: remote -> local\n");
+- }
+-
+-
+- /* For each source kfd_range:
+- * - Find the BO. Each range has to be within the same BO.
+- * - Copy this range to single or multiple destination BOs.
+- * - dst_va_addr - will point to next va address into which data will
+- * be copied.
+- * - dst_bo & src_bo - the current destination and source BOs
+- * - src_offset & dst_offset - offset into the respective BOs from
+- * data will be sourced or copied
++ err = kfd_cma_iter_init(dst_array, args->dst_mem_array_size,
++ local_p, &di);
++ if (err)
++ goto kfd_process_fail;
++ err = kfd_cma_iter_init(src_array, args->src_mem_array_size,
++ remote_p, &si);
++ if (err)
++ goto kfd_process_fail;
++ }
++
++ /* Copy one si range at a time into di. After each call to
++ * kfd_copy_single_range() si will move to next range. di will be
++ * incremented by bytes copied
+ */
+- dst_va_addr = dst_array[0].va_addr;
+- dst_va_addr_end = dst_va_addr + dst_array[0].size - 1;
+- mutex_lock(&dst_p->mutex);
+- dst_bo = kfd_process_find_bo_from_interval(dst_p,
+- dst_va_addr,
+- dst_va_addr_end);
+- mutex_unlock(&dst_p->mutex);
+- if (!dst_bo || dst_va_addr_end > dst_bo->it.last) {
+- pr_err("CMA %s failed. Invalid dst range\n", cma_op);
+- err = -EFAULT;
+- goto kfd_process_fail;
+- }
+- dst_offset = dst_va_addr - dst_bo->it.start;
+-
+- for (i = 0; i < args->src_mem_array_size; i++) {
+- uint64_t src_va_addr_end = src_array[i].va_addr +
+- src_array[i].size - 1;
+- uint64_t src_size_to_copy = src_array[i].size;
+-
+- mutex_lock(&src_p->mutex);
+- src_bo = kfd_process_find_bo_from_interval(src_p,
+- src_array[i].va_addr,
+- src_va_addr_end);
+- mutex_unlock(&src_p->mutex);
+- if (!src_bo || src_va_addr_end > src_bo->it.last) {
+- pr_err("CMA %s failed. Invalid src range\n", cma_op);
+- err = -EFAULT;
+- break;
+- }
+-
+- src_offset = src_array[i].va_addr - src_bo->it.start;
+-
+- /* Copy src_bo to one or multiple dst_bo(s) based on size and
+- * and current copy location.
+- */
+- while (j < args->dst_mem_array_size) {
+- uint64_t copy_size;
+- int64_t space_left;
+-
+- /* Find the current copy_size. This will be smaller of
+- * the following
+- * - space left in the current dest memory range
+- * - data left to copy from source range
+- */
+- space_left = (dst_array[j].va_addr + dst_array[j].size)
+- - dst_va_addr;
+- copy_size = (src_size_to_copy < space_left) ?
+- src_size_to_copy : space_left;
+-
+- /* Check both BOs belong to same device */
+- if (src_bo->dev->kgd != dst_bo->dev->kgd) {
+- pr_err("CMA %s fail. Not same dev\n", cma_op);
+- err = -EINVAL;
+- break;
+- }
+-
+- /* Store prev fence. Release it when a later fence is
+- * created
+- */
++ while (!kfd_cma_iter_end(&si) && !kfd_cma_iter_end(&di)) {
++ struct dma_fence *fence = NULL;
++ err = kfd_copy_single_range(&si, &di,
++ KFD_IS_CROSS_MEMORY_WRITE(args->flags),
++ &fence, &copied);
++ total_copied += copied;
++ if (err)
++ break;
++ /* Release old fence if a later fence is created. If no
++ * new fence is created, then keep the preivous fence
++ */
++ if (fence) {
++ dma_fence_put(lfence);
+ lfence = fence;
+- fence = NULL;
+-
+- err = dst_bo->dev->kfd2kgd->copy_mem_to_mem(
+- src_bo->dev->kgd,
+- src_bo->mem, src_offset,
+- dst_bo->mem, dst_offset,
+- copy_size,
+- &fence, &copied);
+-
+- if (err) {
+- pr_err("GPU CMA %s failed\n", cma_op);
+- break;
+- }
+-
+- /* Later fence available. Release old fence */
+- if (fence && lfence) {
+- dma_fence_put(lfence);
+- lfence = NULL;
+- }
+-
+- total_copied += copied;
+- src_size_to_copy -= copied;
+- space_left -= copied;
+- dst_va_addr += copied;
+- dst_offset += copied;
+- src_offset += copied;
+- if (dst_va_addr > dst_bo->it.last + 1) {
+- pr_err("CMA %s fail. Mem overflow\n", cma_op);
+- err = -EFAULT;
+- break;
+- }
+-
+- /* If the cur dest range is full move to next one */
+- if (space_left <= 0) {
+- if (++j >= args->dst_mem_array_size)
+- break;
+-
+- dst_va_addr = dst_array[j].va_addr;
+- dst_va_addr_end = dst_va_addr +
+- dst_array[j].size - 1;
+- dst_bo = kfd_process_find_bo_from_interval(
+- dst_p,
+- dst_va_addr,
+- dst_va_addr_end);
+- if (!dst_bo ||
+- dst_va_addr_end > dst_bo->it.last) {
+- pr_err("CMA %s failed. Invalid dst range\n",
+- cma_op);
+- err = -EFAULT;
+- break;
+- }
+- dst_offset = dst_va_addr - dst_bo->it.start;
+- }
+-
+- /* If the cur src range is done, move to next one */
+- if (src_size_to_copy <= 0)
+- break;
+ }
+- if (err)
+- break;
+ }
+
+ /* Wait for the last fence irrespective of error condition */
+- if (fence) {
+- if (dma_fence_wait_timeout(fence, false, msecs_to_jiffies(1000))
+- < 0)
+- pr_err("CMA %s failed. BO timed out\n", cma_op);
+- dma_fence_put(fence);
+- } else if (lfence) {
+- pr_debug("GPU copy fail. But wait for prev DMA to finish\n");
+- dma_fence_wait_timeout(lfence, true, msecs_to_jiffies(1000));
++ if (lfence) {
++ if (dma_fence_wait_timeout(lfence, false,
++ msecs_to_jiffies(1000)) < 0)
++ pr_err("CMA %s failed. BO timed out\n", cma_op);
+ dma_fence_put(lfence);
+ }
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+index 97f729c..a74cfbc 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+@@ -303,6 +303,23 @@ struct kfd_bo {
+ struct kfd_ipc_obj *kfd_ipc_obj;
+ };
+
++/* Similar to iov_iter */
++struct cma_iter {
++ /* points to current entry of range array */
++ struct kfd_memory_range *array;
++ /* total number of entries in the initial array */
++ unsigned long nr_segs;
++ /* total amount of data pointed by kfd array*/
++ unsigned long total;
++ /* offset into the entry pointed by cma_iter.array */
++ unsigned long offset;
++ struct kfd_process *p;
++ /* current kfd_bo associated with cma_iter.array.va_addr */
++ struct kfd_bo *cur_bo;
++ /* offset w.r.t cur_bo */
++ unsigned long bo_offset;
++};
++
+ /* KGD2KFD callbacks */
+ void kgd2kfd_exit(void);
+ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
+--
+2.7.4
+