1 files changed, 282 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch
new file mode 100644
index 00000000..c5c05313
--- /dev/null
+++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch
@@ -0,0 +1,282 @@
+From 2d7057733a990c5cf1a42eada9f776f220c150a8 Mon Sep 17 00:00:00 2001
+From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Date: Tue, 3 Oct 2017 15:41:56 -0400
+Subject: [PATCH 2008/4131] drm/amdgpu: Refactor amdgpu_move_blit
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
+arbitrary copy size, offsets and two BOs (source & dest.).
+
+This is useful for KFD Cross Memory Attach feature where data needs to
+be copied from BOs from different processes
+
+v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
+function parameters to use the struct
+
+v3: Minor function name change
+
+Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159
+Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  12 +++
+ 2 files changed, 132 insertions(+), 49 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 5b6723c..5cd086d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -333,97 +333,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
+ 	return addr;
+ }
+ 
+-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+-			    bool evict, bool no_wait_gpu,
+-			    struct ttm_mem_reg *new_mem,
+-			    struct ttm_mem_reg *old_mem)
++/**
++ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
++ *
++ * The function copies @size bytes from {src->mem + src->offset} to
++ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
++ * move and different for a BO to BO copy.
++ *
++ * @f: Returns the last fence if multiple jobs are submitted.
++ */
++int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
++			       struct amdgpu_copy_mem *src,
++			       struct amdgpu_copy_mem *dst,
++			       uint64_t size,
++			       struct reservation_object *resv,
++			       struct dma_fence **f)
+ {
+-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+-
+-	struct drm_mm_node *old_mm, *new_mm;
+-	uint64_t old_start, old_size, new_start, new_size;
+-	unsigned long num_pages;
++	struct drm_mm_node *src_mm, *dst_mm;
++	uint64_t src_node_start, dst_node_start, src_node_size,
++		 dst_node_size, src_page_offset, dst_page_offset;
+ 	struct dma_fence *fence = NULL;
+-	int r;
+-
+-	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
++	int r = 0;
++	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
++					AMDGPU_GPU_PAGE_SIZE);
+ 
+ 	if (!ring->ready) {
+ 		DRM_ERROR("Trying to move memory with ring turned off.\n");
+ 		return -EINVAL;
+ 	}
+ 
+-	old_mm = old_mem->mm_node;
+-	old_size = old_mm->size;
+-	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
++	src_mm = src->mem->mm_node;
++	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
++		src->offset -= (src_mm->size << PAGE_SHIFT);
++		++src_mm;
++	}
++	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
++					     src->offset;
++	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
++	src_page_offset = src_node_start & (PAGE_SIZE - 1);
+ 
+-	new_mm = new_mem->mm_node;
+-	new_size = new_mm->size;
+-	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
++	dst_mm = dst->mem->mm_node;
++	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
++		dst->offset -= (dst_mm->size << PAGE_SHIFT);
++		++dst_mm;
++	}
++	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
++					     dst->offset;
++	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
++	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
+ 
+-	num_pages = new_mem->num_pages;
+ 	mutex_lock(&adev->mman.gtt_window_lock);
+-	while (num_pages) {
+-		unsigned long cur_pages = min(min(old_size, new_size),
+-					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
+-		uint64_t from = old_start, to = new_start;
++
++	while (size) {
++		unsigned long cur_size;
++		uint64_t from = src_node_start, to = dst_node_start;
+ 		struct dma_fence *next;
+ 
+-		if (old_mem->mem_type == TTM_PL_TT &&
+-		    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
+-			r = amdgpu_map_buffer(bo, old_mem, cur_pages,
+-					      old_start, 0, ring, &from);
++		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
++		 * begins at an offset, then adjust the size accordingly
++		 */
++		cur_size = min3(min(src_node_size, dst_node_size), size,
++				GTT_MAX_BYTES);
++		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
++		    cur_size + dst_page_offset > GTT_MAX_BYTES)
++			cur_size -= max(src_page_offset, dst_page_offset);
++
++		/* Map only what needs to be accessed. Map src to window 0 and
++		 * dst to window 1
++		 */
++		if (src->mem->mem_type == TTM_PL_TT &&
++		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
++			r = amdgpu_map_buffer(src->bo, src->mem,
++					PFN_UP(cur_size + src_page_offset),
++					src_node_start, 0, ring,
++					&from);
+ 			if (r)
+ 				goto error;
++			/* Adjust the offset because amdgpu_map_buffer returns
++			 * start of mapped page
++			 */
++			from += src_page_offset;
+ 		}
+ 
+-		if (new_mem->mem_type == TTM_PL_TT &&
+-		    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
+-			r = amdgpu_map_buffer(bo, new_mem, cur_pages,
+-					      new_start, 1, ring, &to);
++		if (dst->mem->mem_type == TTM_PL_TT &&
++		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
++			r = amdgpu_map_buffer(dst->bo, dst->mem,
++					PFN_UP(cur_size + dst_page_offset),
++					dst_node_start, 1, ring,
++					&to);
+ 			if (r)
+ 				goto error;
++			to += dst_page_offset;
+ 		}
+ 
+-		r = amdgpu_copy_buffer(ring, from, to,
+-				       cur_pages * PAGE_SIZE,
+-				       bo->resv, &next, false, true);
++		r = amdgpu_copy_buffer(ring, from, to, cur_size,
++				       resv, &next, false, true);
+ 		if (r)
+ 			goto error;
+ 
+ 		dma_fence_put(fence);
+ 		fence = next;
+ 
+-		num_pages -= cur_pages;
+-		if (!num_pages)
++		size -= cur_size;
++		if (!size)
+ 			break;
+ 
+-		old_size -= cur_pages;
+-		if (!old_size) {
+-			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
+-			old_size = old_mm->size;
++		src_node_size -= cur_size;
++		if (!src_node_size) {
++			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
++							     src->mem);
++			src_node_size = (src_mm->size << PAGE_SHIFT);
+ 		} else {
+-			old_start += cur_pages * PAGE_SIZE;
++			src_node_start += cur_size;
++			src_page_offset = src_node_start & (PAGE_SIZE - 1);
+ 		}
+-
+-		new_size -= cur_pages;
+-		if (!new_size) {
+-			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
+-			new_size = new_mm->size;
++		dst_node_size -= cur_size;
++		if (!dst_node_size) {
++			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
++							     dst->mem);
++			dst_node_size = (dst_mm->size << PAGE_SHIFT);
+ 		} else {
+-			new_start += cur_pages * PAGE_SIZE;
++			dst_node_start += cur_size;
++			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
+ 		}
+ 	}
++error:
+ 	mutex_unlock(&adev->mman.gtt_window_lock);
++	if (f)
++		*f = dma_fence_get(fence);
++	dma_fence_put(fence);
++	return r;
++}
++
++
++static int amdgpu_move_blit(struct ttm_buffer_object *bo,
++			    bool evict, bool no_wait_gpu,
++			    struct ttm_mem_reg *new_mem,
++			    struct ttm_mem_reg *old_mem)
++{
++	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
++	struct amdgpu_copy_mem src, dst;
++	struct dma_fence *fence = NULL;
++	int r;
++
++	src.bo = bo;
++	dst.bo = bo;
++	src.mem = old_mem;
++	dst.mem = new_mem;
++	src.offset = 0;
++	dst.offset = 0;
++
++	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
++				       new_mem->num_pages << PAGE_SHIFT,
++				       bo->resv, &fence);
++	if (r)
++		goto error;
+ 
+ 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
+ 	dma_fence_put(fence);
+ 	return r;
+ 
+ error:
+-	mutex_unlock(&adev->mman.gtt_window_lock);
+-
+ 	if (fence)
+ 		dma_fence_wait(fence, false);
+ 	dma_fence_put(fence);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+index e431cf7..f4692cb 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+@@ -62,6 +62,12 @@ struct amdgpu_mman {
+ 	struct amd_sched_entity			entity;
+ };
+ 
++struct amdgpu_copy_mem {
++	struct ttm_buffer_object	*bo;
++	struct ttm_mem_reg		*mem;
++	unsigned long			offset;
++};
++
+ extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
+ extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
+ 
+@@ -77,6 +83,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
+ 		       struct reservation_object *resv,
+ 		       struct dma_fence **fence, bool direct_submit,
+ 		       bool vm_needs_flush);
++int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
++			       struct amdgpu_copy_mem *src,
++			       struct amdgpu_copy_mem *dst,
++			       uint64_t size,
++			       struct reservation_object *resv,
++			       struct dma_fence **f);
+ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
+ 			uint64_t src_data,
+ 			struct reservation_object *resv,
+-- 
+2.7.4
+