diff options
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.14.71/1609-drm-amdgpu-GTT-bind-before-copy-mem.patch')
-rw-r--r-- | common/recipes-kernel/linux/linux-yocto-4.14.71/1609-drm-amdgpu-GTT-bind-before-copy-mem.patch | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.14.71/1609-drm-amdgpu-GTT-bind-before-copy-mem.patch b/common/recipes-kernel/linux/linux-yocto-4.14.71/1609-drm-amdgpu-GTT-bind-before-copy-mem.patch new file mode 100644 index 00000000..99c18c33 --- /dev/null +++ b/common/recipes-kernel/linux/linux-yocto-4.14.71/1609-drm-amdgpu-GTT-bind-before-copy-mem.patch @@ -0,0 +1,204 @@ +From 2c91c9a0ea6c34250054d56758c8000b3bbc0080 Mon Sep 17 00:00:00 2001 +From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +Date: Tue, 21 Feb 2017 19:34:33 -0500 +Subject: [PATCH 1609/4131] drm/amdgpu: GTT bind before copy mem + +This change is required after merging with amd-staging-4.9, since GTT +binding is done only on demand. + +Change-Id: I6ca60c1521ca53a6db0a5c30fae7b9f8b5ca069c +Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 131 ++++++++++++++++++----- + 1 file changed, 106 insertions(+), 25 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +index 801c34e..28dae72 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +@@ -2279,11 +2279,15 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, + { + struct amdgpu_device *adev = NULL; + struct ttm_mem_reg *src = NULL, *dst = NULL; ++ struct ttm_buffer_object *src_ttm_bo, *dst_ttm_bo; ++ struct drm_mm_node *src_mm, *dst_mm; + struct amdgpu_ring *ring; + struct ww_acquire_ctx ticket; + struct list_head list; + struct amdgpu_bo_list_entry *entry; + uint64_t src_start, dst_start; ++ uint64_t src_left, dst_left, cur_copy_size, total_copy_size = 0; ++ struct fence *fence = NULL; + int r; + + if (!kgd || !src_mem || !dst_mem) +@@ -2293,8 +2297,12 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, + *actual_size = 0; + + adev = get_amdgpu_device(kgd); +- src = &src_mem->bo->tbo.mem; +- dst = &dst_mem->bo->tbo.mem; ++ src_ttm_bo = &src_mem->bo->tbo; ++ dst_ttm_bo = &dst_mem->bo->tbo; ++ src = &src_ttm_bo->mem; ++ dst = &dst_ttm_bo->mem; ++ src_mm = (struct drm_mm_node *)src->mm_node; ++ dst_mm = (struct drm_mm_node *)dst->mm_node; + + ring = adev->mman.buffer_funcs_ring; + +@@ -2304,55 +2312,128 @@ int amdgpu_amdkfd_copy_mem_to_mem(struct kgd_dev *kgd, struct kgd_mem *src_mem, + entry = &dst_mem->bo_list_entry; + list_add_tail(&entry->tv.head, &list); + ++ if (!ring->ready) { ++ pr_err("Trying to move memory with ring turned off.\n"); ++ return -EINVAL; ++ } ++ + r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); + if (r) { + pr_err("Copy buffer failed. Unable to reserve bo (%d)\n", r); + return r; + } + +- src_start = (src->start << PAGE_SHIFT) + src_offset; +- dst_start = (dst->start << PAGE_SHIFT) + dst_offset; +- + switch (src->mem_type) { +- case TTM_PL_VRAM: +- src_start += adev->mc.vram_start; +- break; + case TTM_PL_TT: +- src_start += adev->mc.gtt_start; ++ r = amdgpu_ttm_bind(src_ttm_bo, src); ++ if (r) { ++ DRM_ERROR("Copy failed. Cannot bind to gart\n"); ++ goto copy_fail; ++ } ++ break; ++ case TTM_PL_VRAM: ++ /* VRAM could be scattered. Find the node in which the offset ++ * belongs to ++ */ ++ while (src_offset >= (src_mm->size << PAGE_SHIFT)) { ++ src_offset -= (src_mm->size << PAGE_SHIFT); ++ ++src_mm; ++ } + break; + default: + DRM_ERROR("Unknown placement %d\n", src->mem_type); + r = -EINVAL; + goto copy_fail; + } ++ src_start = src_mm->start << PAGE_SHIFT; ++ src_start += src_ttm_bo->bdev->man[src->mem_type].gpu_offset; ++ src_start += src_offset; ++ src_left = (src_mm->size << PAGE_SHIFT) - src_offset; ++ + switch (dst->mem_type) { +- case TTM_PL_VRAM: +- dst_start += adev->mc.vram_start; +- break; + case TTM_PL_TT: +- dst_start += adev->mc.gtt_start; ++ r = amdgpu_ttm_bind(dst_ttm_bo, dst); ++ if (r) { ++ DRM_ERROR("Copy failed. Cannot bind to gart\n"); ++ goto copy_fail; ++ } ++ break; ++ case TTM_PL_VRAM: ++ while (dst_offset >= (dst_mm->size << PAGE_SHIFT)) { ++ dst_offset -= (dst_mm->size << PAGE_SHIFT); ++ ++dst_mm; ++ } + break; + default: + DRM_ERROR("Unknown placement %d\n", dst->mem_type); + r = -EINVAL; + goto copy_fail; + } +- if (!ring->ready) { +- pr_err("Trying to move memory with ring turned off.\n"); +- r = -EINVAL; +- goto copy_fail; +- } ++ dst_start = dst_mm->start << PAGE_SHIFT; ++ dst_start += dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset; ++ dst_start += dst_offset; ++ dst_left = (dst_mm->size << PAGE_SHIFT) - dst_offset; + +- r = amdgpu_copy_buffer(ring, src_start, dst_start, +- size, NULL, f, false); +- if (r) +- goto copy_fail; ++ do { ++ struct fence *next; ++ ++ /* src_left/dst_left: amount of space left in the current node ++ * Copy minimum of (src_left, dst_left, amount of bytes left to ++ * copy) ++ */ ++ cur_copy_size = min3(src_left, dst_left, ++ (size - total_copy_size)); ++ ++ r = amdgpu_copy_buffer(ring, src_start, dst_start, ++ cur_copy_size, NULL, &next, false); ++ if (r) ++ break; + ++ /* Just keep the last fence */ ++ fence_put(fence); ++ fence = next; ++ ++ total_copy_size += cur_copy_size; ++ /* Required amount of bytes copied. Done. */ ++ if (total_copy_size >= size) ++ break; ++ ++ /* If end of src or dst node is reached, move to next node */ ++ src_left -= cur_copy_size; ++ if (!src_left) { ++ ++src_mm; ++ src_start = src_mm->start << PAGE_SHIFT; ++ src_start += ++ src_ttm_bo->bdev->man[src->mem_type].gpu_offset; ++ src_left = src_mm->size << PAGE_SHIFT; ++ } else ++ src_start += cur_copy_size; ++ ++ dst_left -= cur_copy_size; ++ if (!dst_left) { ++ ++dst_mm; ++ dst_start = dst_mm->start << PAGE_SHIFT; ++ dst_start += ++ dst_ttm_bo->bdev->man[dst->mem_type].gpu_offset; ++ dst_left = dst_mm->size << PAGE_SHIFT; ++ } else ++ dst_start += cur_copy_size; ++ ++ } while (total_copy_size < size); ++ ++ /* Failure could occur after partial copy. So fill in amount copied ++ * and fence, still fill-in ++ */ + if (actual_size) +- *actual_size = size; ++ *actual_size = total_copy_size; ++ ++ if (fence) { ++ amdgpu_bo_fence(src_mem->bo, fence, true); ++ amdgpu_bo_fence(dst_mem->bo, fence, true); ++ } + +- amdgpu_bo_fence(src_mem->bo, *f, true); +- amdgpu_bo_fence(dst_mem->bo, *f, true); ++ if (f) ++ *f = fence; + + copy_fail: + ttm_eu_backoff_reservation(&ticket, &list); +-- +2.7.4 + |