diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch new file mode 100644 index 00000000..c5c05313 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.14.71/2008-drm-amdgpu-Refactor-amdgpu_move_blit.patch @@ -0,0 +1,282 @@ +From 2d7057733a990c5cf1a42eada9f776f220c150a8 Mon Sep 17 00:00:00 2001 +From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +Date: Tue, 3 Oct 2017 15:41:56 -0400 +Subject: [PATCH 2008/4131] drm/amdgpu: Refactor amdgpu_move_blit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports +arbitrary copy size, offsets and two BOs (source & dest.). + +This is useful for KFD Cross Memory Attach feature where data needs to +be copied from BOs from different processes + +v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem() +function parameters to use the struct + +v3: Minor function name change + +Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159 +Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++--------- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++ + 2 files changed, 132 insertions(+), 49 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +index 5b6723c..5cd086d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +@@ -333,97 +333,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, + return addr; + } + +-static int amdgpu_move_blit(struct ttm_buffer_object *bo, +- bool evict, bool no_wait_gpu, +- struct ttm_mem_reg *new_mem, +- struct ttm_mem_reg *old_mem) ++/** ++ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy ++ * ++ * The function copies @size bytes from {src->mem + src->offset} to ++ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a ++ * move and different for a BO to BO copy. ++ * ++ * @f: Returns the last fence if multiple jobs are submitted. ++ */ ++int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, ++ struct amdgpu_copy_mem *src, ++ struct amdgpu_copy_mem *dst, ++ uint64_t size, ++ struct reservation_object *resv, ++ struct dma_fence **f) + { +- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; +- +- struct drm_mm_node *old_mm, *new_mm; +- uint64_t old_start, old_size, new_start, new_size; +- unsigned long num_pages; ++ struct drm_mm_node *src_mm, *dst_mm; ++ uint64_t src_node_start, dst_node_start, src_node_size, ++ dst_node_size, src_page_offset, dst_page_offset; + struct dma_fence *fence = NULL; +- int r; +- +- BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); ++ int r = 0; ++ const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * ++ AMDGPU_GPU_PAGE_SIZE); + + if (!ring->ready) { + DRM_ERROR("Trying to move memory with ring turned off.\n"); + return -EINVAL; + } + +- old_mm = old_mem->mm_node; +- old_size = old_mm->size; +- old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); ++ src_mm = src->mem->mm_node; ++ while (src->offset >= (src_mm->size << PAGE_SHIFT)) { ++ src->offset -= (src_mm->size << PAGE_SHIFT); ++ ++src_mm; ++ } ++ src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + ++ src->offset; ++ src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; ++ src_page_offset = src_node_start & (PAGE_SIZE - 1); + +- new_mm = new_mem->mm_node; +- new_size = new_mm->size; +- new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); ++ dst_mm = dst->mem->mm_node; ++ while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { ++ dst->offset -= (dst_mm->size << PAGE_SHIFT); ++ ++dst_mm; ++ } ++ dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + ++ dst->offset; ++ dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; ++ dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + +- num_pages = new_mem->num_pages; + mutex_lock(&adev->mman.gtt_window_lock); +- while (num_pages) { +- unsigned long cur_pages = min(min(old_size, new_size), +- (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); +- uint64_t from = old_start, to = new_start; ++ ++ while (size) { ++ unsigned long cur_size; ++ uint64_t from = src_node_start, to = dst_node_start; + struct dma_fence *next; + +- if (old_mem->mem_type == TTM_PL_TT && +- !amdgpu_gtt_mgr_is_allocated(old_mem)) { +- r = amdgpu_map_buffer(bo, old_mem, cur_pages, +- old_start, 0, ring, &from); ++ /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst ++ * begins at an offset, then adjust the size accordingly ++ */ ++ cur_size = min3(min(src_node_size, dst_node_size), size, ++ GTT_MAX_BYTES); ++ if (cur_size + src_page_offset > GTT_MAX_BYTES || ++ cur_size + dst_page_offset > GTT_MAX_BYTES) ++ cur_size -= max(src_page_offset, dst_page_offset); ++ ++ /* Map only what needs to be accessed. Map src to window 0 and ++ * dst to window 1 ++ */ ++ if (src->mem->mem_type == TTM_PL_TT && ++ !amdgpu_gtt_mgr_is_allocated(src->mem)) { ++ r = amdgpu_map_buffer(src->bo, src->mem, ++ PFN_UP(cur_size + src_page_offset), ++ src_node_start, 0, ring, ++ &from); + if (r) + goto error; ++ /* Adjust the offset because amdgpu_map_buffer returns ++ * start of mapped page ++ */ ++ from += src_page_offset; + } + +- if (new_mem->mem_type == TTM_PL_TT && +- !amdgpu_gtt_mgr_is_allocated(new_mem)) { +- r = amdgpu_map_buffer(bo, new_mem, cur_pages, +- new_start, 1, ring, &to); ++ if (dst->mem->mem_type == TTM_PL_TT && ++ !amdgpu_gtt_mgr_is_allocated(dst->mem)) { ++ r = amdgpu_map_buffer(dst->bo, dst->mem, ++ PFN_UP(cur_size + dst_page_offset), ++ dst_node_start, 1, ring, ++ &to); + if (r) + goto error; ++ to += dst_page_offset; + } + +- r = amdgpu_copy_buffer(ring, from, to, +- cur_pages * PAGE_SIZE, +- bo->resv, &next, false, true); ++ r = amdgpu_copy_buffer(ring, from, to, cur_size, ++ resv, &next, false, true); + if (r) + goto error; + + dma_fence_put(fence); + fence = next; + +- num_pages -= cur_pages; +- if (!num_pages) ++ size -= cur_size; ++ if (!size) + break; + +- old_size -= cur_pages; +- if (!old_size) { +- old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); +- old_size = old_mm->size; ++ src_node_size -= cur_size; ++ if (!src_node_size) { ++ src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, ++ src->mem); ++ src_node_size = (src_mm->size << PAGE_SHIFT); + } else { +- old_start += cur_pages * PAGE_SIZE; ++ src_node_start += cur_size; ++ src_page_offset = src_node_start & (PAGE_SIZE - 1); + } +- +- new_size -= cur_pages; +- if (!new_size) { +- new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); +- new_size = new_mm->size; ++ dst_node_size -= cur_size; ++ if (!dst_node_size) { ++ dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, ++ dst->mem); ++ dst_node_size = (dst_mm->size << PAGE_SHIFT); + } else { +- new_start += cur_pages * PAGE_SIZE; ++ dst_node_start += cur_size; ++ dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + } + } ++error: + mutex_unlock(&adev->mman.gtt_window_lock); ++ if (f) ++ *f = dma_fence_get(fence); ++ dma_fence_put(fence); ++ return r; ++} ++ ++ ++static int amdgpu_move_blit(struct ttm_buffer_object *bo, ++ bool evict, bool no_wait_gpu, ++ struct ttm_mem_reg *new_mem, ++ struct ttm_mem_reg *old_mem) ++{ ++ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); ++ struct amdgpu_copy_mem src, dst; ++ struct dma_fence *fence = NULL; ++ int r; ++ ++ src.bo = bo; ++ dst.bo = bo; ++ src.mem = old_mem; ++ dst.mem = new_mem; ++ src.offset = 0; ++ dst.offset = 0; ++ ++ r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, ++ new_mem->num_pages << PAGE_SHIFT, ++ bo->resv, &fence); ++ if (r) ++ goto error; + + r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); + dma_fence_put(fence); + return r; + + error: +- mutex_unlock(&adev->mman.gtt_window_lock); +- + if (fence) + dma_fence_wait(fence, false); + dma_fence_put(fence); +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +index e431cf7..f4692cb 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +@@ -62,6 +62,12 @@ struct amdgpu_mman { + struct amd_sched_entity entity; + }; + ++struct amdgpu_copy_mem { ++ struct ttm_buffer_object *bo; ++ struct ttm_mem_reg *mem; ++ unsigned long offset; ++}; ++ + extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; + extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; + +@@ -77,6 +83,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, + struct reservation_object *resv, + struct dma_fence **fence, bool direct_submit, + bool vm_needs_flush); ++int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, ++ struct amdgpu_copy_mem *src, ++ struct amdgpu_copy_mem *dst, ++ uint64_t size, ++ struct reservation_object *resv, ++ struct dma_fence **f); + int amdgpu_fill_buffer(struct amdgpu_bo *bo, + uint64_t src_data, + struct reservation_object *resv, +-- +2.7.4 + |