aboutsummaryrefslogtreecommitdiffstats
path: root/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2127-drm-amdgpu-Fix-CMA.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2127-drm-amdgpu-Fix-CMA.patch')
-rw-r--r--meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2127-drm-amdgpu-Fix-CMA.patch214
1 files changed, 214 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2127-drm-amdgpu-Fix-CMA.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2127-drm-amdgpu-Fix-CMA.patch
new file mode 100644
index 00000000..1fea0fe1
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2127-drm-amdgpu-Fix-CMA.patch
@@ -0,0 +1,214 @@
+From 761b73343c0408924931627b20a6299e5834b9f0 Mon Sep 17 00:00:00 2001
+From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Date: Mon, 16 Oct 2017 11:37:50 -0400
+Subject: [PATCH 2127/4131] drm/amdgpu: Fix CMA
+
+GART size is reduced and System memory allocations no longer need to be
+mapped in the GART. Change IPC memory copy code to use a new transfer
+window in GART space. Call amdgpu_ttm_copy_mem_to_mem() which handles
+the above issue.
+
+BUG: KFD-355
+
+Change-Id: If142e615a08ba4ff2247352e167485620c060fe5
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 154 ++++-------------------
+ 1 file changed, 21 insertions(+), 133 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+index a013f5b..900019d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+@@ -2418,17 +2418,12 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem,
+ struct dma_fence **f, uint64_t *actual_size)
+ {
+ struct amdgpu_device *adev = NULL;
+- struct ttm_mem_reg *src = NULL, *dst = NULL;
+- struct ttm_buffer_object *src_ttm_bo, *dst_ttm_bo;
+- struct drm_mm_node *src_mm, *dst_mm;
+- struct amdgpu_ring *ring;
++ struct amdgpu_copy_mem src, dst;
+ struct ww_acquire_ctx ticket;
+ struct list_head list;
+ struct ttm_validate_buffer resv_list[2];
+- uint64_t src_start, dst_start;
+- uint64_t src_left, dst_left, cur_copy_size, total_copy_size = 0;
+ struct dma_fence *fence = NULL;
+- int r;
++ int i, r;
+
+ if (!kgd || !src_mem || !dst_mem)
+ return -EINVAL;
+@@ -2437,28 +2432,21 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem,
+ *actual_size = 0;
+
+ adev = get_amdgpu_device(kgd);
+- src_ttm_bo = &src_mem->bo->tbo;
+- dst_ttm_bo = &dst_mem->bo->tbo;
+- src = &src_ttm_bo->mem;
+- dst = &dst_ttm_bo->mem;
+- src_mm = (struct drm_mm_node *)src->mm_node;
+- dst_mm = (struct drm_mm_node *)dst->mm_node;
+-
+- ring = adev->mman.buffer_funcs_ring;
+-
+ INIT_LIST_HEAD(&list);
+
+- resv_list[0].bo = src_ttm_bo;
+- resv_list[0].shared = true;
+- resv_list[1].bo = dst_ttm_bo;
+- resv_list[1].shared = true;
++ src.bo = &src_mem->bo->tbo;
++ dst.bo = &dst_mem->bo->tbo;
++ src.mem = &src.bo->mem;
++ dst.mem = &dst.bo->mem;
++ src.offset = src_offset;
++ dst.offset = dst_offset;
+
+- list_add_tail(&resv_list[0].head, &list);
+- list_add_tail(&resv_list[1].head, &list);
++ resv_list[0].bo = src.bo;
++ resv_list[1].bo = dst.bo;
+
+- if (!ring->ready) {
+- pr_err("Trying to move memory with ring turned off.\n");
+- return -EINVAL;
++ for (i = 0; i < 2; i++) {
++ resv_list[i].shared = true;
++ list_add_tail(&resv_list[i].head, &list);
+ }
+
+ r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
+@@ -2467,120 +2455,20 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem,
+ return r;
+ }
+
+- switch (src->mem_type) {
+- case TTM_PL_TT:
+- r = amdgpu_ttm_bind(src_ttm_bo);
+- if (r) {
+- DRM_ERROR("Copy failed. Cannot bind to gart\n");
+- goto copy_fail;
+- }
+- break;
+- case TTM_PL_VRAM:
+- /* VRAM could be scattered. Find the node in which the offset
+- * belongs to
+- */
+- while (src_offset >= (src_mm->size << PAGE_SHIFT)) {
+- src_offset -= (src_mm->size << PAGE_SHIFT);
+- ++src_mm;
+- }
+- break;
+- default:
+- DRM_ERROR("Unknown placement %d\n", src->mem_type);
+- r = -EINVAL;
+- goto copy_fail;
+- }
+- src_start = src_mm->start << PAGE_SHIFT;
+- src_start += src_ttm_bo->bdev->man[src->mem_type].gpu_offset;
+- src_start += src_offset;
+- src_left = (src_mm->size << PAGE_SHIFT) - src_offset;
+-
+- switch (dst->mem_type) {
+- case TTM_PL_TT:
+- r = amdgpu_ttm_bind(dst_ttm_bo);
+- if (r) {
+- DRM_ERROR("Copy failed. Cannot bind to gart\n");
+- goto copy_fail;
+- }
+- break;
+- case TTM_PL_VRAM:
+- while (dst_offset >= (dst_mm->size << PAGE_SHIFT)) {
+- dst_offset -= (dst_mm->size << PAGE_SHIFT);
+- ++dst_mm;
+- }
+- break;
+- default:
+- DRM_ERROR("Unknown placement %d\n", dst->mem_type);
+- r = -EINVAL;
+- goto copy_fail;
+- }
+- dst_start = dst_mm->start << PAGE_SHIFT;
+- dst_start += dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset;
+- dst_start += dst_offset;
+- dst_left = (dst_mm->size << PAGE_SHIFT) - dst_offset;
+-
+- do {
+- struct dma_fence *next;
+-
+- /* src_left/dst_left: amount of space left in the current node
+- * Copy minimum of (src_left, dst_left, amount of bytes left to
+- * copy)
+- */
+- cur_copy_size = min3(src_left, dst_left,
+- (size - total_copy_size));
+-
+- r = amdgpu_copy_buffer(ring, src_start, dst_start,
+- cur_copy_size, NULL, &next, false, false);
+- if (r)
+- break;
+-
+- /* Just keep the last fence */
+- dma_fence_put(fence);
+- fence = next;
+-
+- total_copy_size += cur_copy_size;
+- /* Required amount of bytes copied. Done. */
+- if (total_copy_size >= size)
+- break;
+-
+- /* If end of src or dst node is reached, move to next node */
+- src_left -= cur_copy_size;
+- if (!src_left) {
+- ++src_mm;
+- src_start = src_mm->start << PAGE_SHIFT;
+- src_start +=
+- src_ttm_bo->bdev->man[src->mem_type].gpu_offset;
+- src_left = src_mm->size << PAGE_SHIFT;
+- } else
+- src_start += cur_copy_size;
+-
+- dst_left -= cur_copy_size;
+- if (!dst_left) {
+- ++dst_mm;
+- dst_start = dst_mm->start << PAGE_SHIFT;
+- dst_start +=
+- dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset;
+- dst_left = dst_mm->size << PAGE_SHIFT;
+- } else
+- dst_start += cur_copy_size;
+-
+- } while (total_copy_size < size);
+-
+- /* Failure could occur after partial copy. So fill in amount copied
+- * and fence, still fill-in
+- */
+- if (actual_size)
+- *actual_size = total_copy_size;
+-
++ r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, size, NULL,
++ &fence);
++ if (r)
++ pr_err("Copy buffer failed %d\n", r);
++ else
++ *actual_size = size;
+ if (fence) {
+ amdgpu_bo_fence(src_mem->bo, fence, true);
+ amdgpu_bo_fence(dst_mem->bo, fence, true);
+ }
+-
+ if (f)
+- *f = fence;
++ *f = dma_fence_get(fence);
++ dma_fence_put(fence);
+
+-copy_fail:
+ ttm_eu_backoff_reservation(&ticket, &list);
+ return r;
+ }
+-
+--
+2.7.4
+