From 2c91c9a0ea6c34250054d56758c8000b3bbc0080 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 21 Feb 2017 19:34:33 -0500 Subject: [PATCH 1609/4131] drm/amdgpu: GTT bind before copy mem This change is required after merging with amd-staging-4.9, since GTT binding is done only on demand. Change-Id: I6ca60c1521ca53a6db0a5c30fae7b9f8b5ca069c Signed-off-by: Harish Kasiviswanathan --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 131 ++++++++++++++++++----- 1 file changed, 106 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 801c34e..28dae72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2279,11 +2279,15 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, { struct amdgpu_device *adev = NULL; struct ttm_mem_reg *src = NULL, *dst = NULL; + struct ttm_buffer_object *src_ttm_bo, *dst_ttm_bo; + struct drm_mm_node *src_mm, *dst_mm; struct amdgpu_ring *ring; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry *entry; uint64_t src_start, dst_start; + uint64_t src_left, dst_left, cur_copy_size, total_copy_size = 0; + struct fence *fence = NULL; int r; if (!kgd || !src_mem || !dst_mem) @@ -2293,8 +2297,12 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, *actual_size = 0; adev = get_amdgpu_device(kgd); - src = &src_mem->bo->tbo.mem; - dst = &dst_mem->bo->tbo.mem; + src_ttm_bo = &src_mem->bo->tbo; + dst_ttm_bo = &dst_mem->bo->tbo; + src = &src_ttm_bo->mem; + dst = &dst_ttm_bo->mem; + src_mm = (struct drm_mm_node *)src->mm_node; + dst_mm = (struct drm_mm_node *)dst->mm_node; ring = adev->mman.buffer_funcs_ring; @@ -2304,55 +2312,128 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, entry = &dst_mem->bo_list_entry; list_add_tail(&entry->tv.head, &list); + if (!ring->ready) { + pr_err("Trying to move memory with ring turned off.\n"); + return -EINVAL; + } + r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); if (r) { pr_err("Copy buffer failed. Unable to reserve bo (%d)\n", r); return r; } - src_start = (src->start << PAGE_SHIFT) + src_offset; - dst_start = (dst->start << PAGE_SHIFT) + dst_offset; - switch (src->mem_type) { - case TTM_PL_VRAM: - src_start += adev->mc.vram_start; - break; case TTM_PL_TT: - src_start += adev->mc.gtt_start; + r = amdgpu_ttm_bind(src_ttm_bo, src); + if (r) { + DRM_ERROR("Copy failed. Cannot bind to gart\n"); + goto copy_fail; + } + break; + case TTM_PL_VRAM: + /* VRAM could be scattered. Find the node in which the offset + * belongs to + */ + while (src_offset >= (src_mm->size << PAGE_SHIFT)) { + src_offset -= (src_mm->size << PAGE_SHIFT); + ++src_mm; + } break; default: DRM_ERROR("Unknown placement %d\n", src->mem_type); r = -EINVAL; goto copy_fail; } + src_start = src_mm->start << PAGE_SHIFT; + src_start += src_ttm_bo->bdev->man[src->mem_type].gpu_offset; + src_start += src_offset; + src_left = (src_mm->size << PAGE_SHIFT) - src_offset; + switch (dst->mem_type) { - case TTM_PL_VRAM: - dst_start += adev->mc.vram_start; - break; case TTM_PL_TT: - dst_start += adev->mc.gtt_start; + r = amdgpu_ttm_bind(dst_ttm_bo, dst); + if (r) { + DRM_ERROR("Copy failed. Cannot bind to gart\n"); + goto copy_fail; + } + break; + case TTM_PL_VRAM: + while (dst_offset >= (dst_mm->size << PAGE_SHIFT)) { + dst_offset -= (dst_mm->size << PAGE_SHIFT); + ++dst_mm; + } break; default: DRM_ERROR("Unknown placement %d\n", dst->mem_type); r = -EINVAL; goto copy_fail; } - if (!ring->ready) { - pr_err("Trying to move memory with ring turned off.\n"); - r = -EINVAL; - goto copy_fail; - } + dst_start = dst_mm->start << PAGE_SHIFT; + dst_start += dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset; + dst_start += dst_offset; + dst_left = (dst_mm->size << PAGE_SHIFT) - dst_offset; - r = amdgpu_copy_buffer(ring, src_start, dst_start, - size, NULL, f, false); - if (r) - goto copy_fail; + do { + struct fence *next; + + /* src_left/dst_left: amount of space left in the current node + * Copy minimum of (src_left, dst_left, amount of bytes left to + * copy) + */ + cur_copy_size = min3(src_left, dst_left, + (size - total_copy_size)); + + r = amdgpu_copy_buffer(ring, src_start, dst_start, + cur_copy_size, NULL, &next, false); + if (r) + break; + /* Just keep the last fence */ + fence_put(fence); + fence = next; + + total_copy_size += cur_copy_size; + /* Required amount of bytes copied. Done. */ + if (total_copy_size >= size) + break; + + /* If end of src or dst node is reached, move to next node */ + src_left -= cur_copy_size; + if (!src_left) { + ++src_mm; + src_start = src_mm->start << PAGE_SHIFT; + src_start += + src_ttm_bo->bdev->man[src->mem_type].gpu_offset; + src_left = src_mm->size << PAGE_SHIFT; + } else + src_start += cur_copy_size; + + dst_left -= cur_copy_size; + if (!dst_left) { + ++dst_mm; + dst_start = dst_mm->start << PAGE_SHIFT; + dst_start += + dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset; + dst_left = dst_mm->size << PAGE_SHIFT; + } else + dst_start += cur_copy_size; + + } while (total_copy_size < size); + + /* Failure could occur after partial copy. So fill in amount copied + * and fence, still fill-in + */ if (actual_size) - *actual_size = size; + *actual_size = total_copy_size; + + if (fence) { + amdgpu_bo_fence(src_mem->bo, fence, true); + amdgpu_bo_fence(dst_mem->bo, fence, true); + } - amdgpu_bo_fence(src_mem->bo, *f, true); - amdgpu_bo_fence(dst_mem->bo, *f, true); + if (f) + *f = fence; copy_fail: ttm_eu_backoff_reservation(&ticket, &list); -- 2.7.4