diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2986-drm-amdgpu-Fix-silent-amdgpu_bo_move-failures.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2986-drm-amdgpu-Fix-silent-amdgpu_bo_move-failures.patch | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2986-drm-amdgpu-Fix-silent-amdgpu_bo_move-failures.patch b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2986-drm-amdgpu-Fix-silent-amdgpu_bo_move-failures.patch new file mode 100644 index 00000000..0b721785 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux/linux-yocto-4.19.8/2986-drm-amdgpu-Fix-silent-amdgpu_bo_move-failures.patch @@ -0,0 +1,110 @@ +From 43d07f6ba6b19edcb8fed98769bd057e3ba0a17b Mon Sep 17 00:00:00 2001 +From: Felix Kuehling <Felix.Kuehling@amd.com> +Date: Sat, 13 Jul 2019 02:27:34 -0400 +Subject: [PATCH 2986/4256] drm/amdgpu: Fix silent amdgpu_bo_move failures +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Under memory pressure, buffer moves between RAM to VRAM can +fail when there is no GTT space available. In those cases +amdgpu_bo_move falls back to ttm_bo_move_memcpy, which seems to +succeed, although it doesn't really support non-contiguous or +invisible VRAM. This manifests as VM faults with corrupted page +table entries in KFD eviction stress tests. + +Print some helpful messages when lack of GTT space is causing buffer +moves to fail. Check that source and destination memory regions are +supported by ttm_bo_move_memcpy before taking that fallback. + +Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 40 +++++++++++++++++++++++-- + 1 file changed, 37 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +index 12487f99e367..ff0ab1521cde 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +@@ -600,6 +600,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); + if (unlikely(r)) { ++ pr_err("Failed to find GTT space for blit from VRAM\n"); + return r; + } + +@@ -658,6 +659,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); + if (unlikely(r)) { ++ pr_err("Failed to find GTT space for blit to VRAM\n"); + return r; + } + +@@ -677,6 +679,30 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, + return r; + } + ++/** ++ * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy ++ * ++ * Called by amdgpu_bo_move() ++ */ ++static bool amdgpu_mem_visible(struct amdgpu_device *adev, ++ struct ttm_mem_reg *mem) ++{ ++ struct drm_mm_node *nodes = mem->mm_node; ++ ++ if (mem->mem_type == TTM_PL_SYSTEM || ++ mem->mem_type == TTM_PL_TT) ++ return true; ++ if (mem->mem_type != TTM_PL_VRAM) ++ return false; ++ ++ /* ttm_mem_reg_ioremap only supports contiguous memory */ ++ if (nodes->size != mem->num_pages) ++ return false; ++ ++ return ((nodes->start + nodes->size) << PAGE_SHIFT) ++ <= adev->gmc.visible_vram_size; ++} ++ + /** + * amdgpu_bo_move - Move a buffer object to a new memory location + * +@@ -725,8 +751,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, + return 0; + } + +- if (!adev->mman.buffer_funcs_enabled) ++ if (!adev->mman.buffer_funcs_enabled) { ++ r = -ENODEV; + goto memcpy; ++ } + + if (old_mem->mem_type == TTM_PL_VRAM && + new_mem->mem_type == TTM_PL_SYSTEM) { +@@ -741,10 +769,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, + + if (r) { + memcpy: +- r = ttm_bo_move_memcpy(bo, ctx, new_mem); +- if (r) { ++ /* Check that all memory is CPU accessible */ ++ if (!amdgpu_mem_visible(adev, old_mem) || ++ !amdgpu_mem_visible(adev, new_mem)) { ++ pr_err("Move buffer fallback to memcpy unavailable\n"); + return r; + } ++ ++ r = ttm_bo_move_memcpy(bo, ctx, new_mem); ++ if (r) ++ return r; + } + + if (bo->type == ttm_bo_type_device && +-- +2.17.1 + |