From 7e5a547f64af66fd906f266f0e8c9bde213d025c Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 24 Apr 2015 17:37:30 +0800 Subject: [PATCH 0226/1050] drm/amdgpu: implement the allocation range (v3) Pass a ttm_placement pointer to amdgpu_bo_create_restricted add min_offset to amdgpu_bo_pin_restricted. This makes it easier to allocate memory with address restrictions. With this patch we can also enable 2-ended allocation again. v2: fix rebase conflicts v3: memset placements before using Reviewed-by: Jammy Zhou Signed-off-by: Chunming Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 176 +++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 9 +- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 2 +- 8 files changed, 136 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 80f0bea..8eb5c55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -43,6 +43,7 @@ #include #include +#include #include "amd_shared.h" #include "amdgpu_family.h" @@ -542,12 +543,14 @@ struct amdgpu_bo_va { struct amdgpu_bo *bo; }; +#define AMDGPU_GEM_DOMAIN_MAX 0x3 + struct amdgpu_bo { /* Protected by gem.mutex */ struct list_head list; /* Protected by tbo.reserved */ u32 initial_domain; - struct ttm_place placements[4]; + struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index f22c067..b16b925 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -159,7 +159,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, &base); + r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base); if (unlikely(r != 0)) { amdgpu_bo_unreserve(new_rbo); r = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 73b7aad..c1645d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -150,7 +150,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, } - ret = amdgpu_bo_pin_restricted(rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, NULL); + ret = amdgpu_bo_pin_restricted(rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, NULL); if (ret) { amdgpu_bo_unreserve(rbo); goto out_unref; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index dcc6af9..62cabfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -41,13 +41,13 @@ int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev, - struct ttm_mem_reg * mem) + struct ttm_mem_reg *mem) { u64 ret = 0; if (mem->start << PAGE_SHIFT < adev->mc.visible_vram_size) { ret = (u64)((mem->start << PAGE_SHIFT) + mem->size) > adev->mc.visible_vram_size ? - adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT): + adev->mc.visible_vram_size - (mem->start << PAGE_SHIFT) : mem->size; } return ret; @@ -112,82 +112,111 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) return false; } -void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain) +static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, + struct ttm_placement *placement, + struct ttm_place *placements, + u32 domain, u64 flags) { u32 c = 0, i; - rbo->placement.placement = rbo->placements; - rbo->placement.busy_placement = rbo->placements; + + placement->placement = placements; + placement->busy_placement = placements; if (domain & AMDGPU_GEM_DOMAIN_VRAM) { - if (rbo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS && - rbo->adev->mc.visible_vram_size < rbo->adev->mc.real_vram_size) { - rbo->placements[c].fpfn = - rbo->adev->mc.visible_vram_size >> PAGE_SHIFT; - rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_VRAM; + if (flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS && + adev->mc.visible_vram_size < adev->mc.real_vram_size) { + placements[c].fpfn = + adev->mc.visible_vram_size >> PAGE_SHIFT; + placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_VRAM; } - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_VRAM; + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_VRAM; } if (domain & AMDGPU_GEM_DOMAIN_GTT) { - if (rbo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT | - TTM_PL_FLAG_UNCACHED; + if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) { + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT | + TTM_PL_FLAG_UNCACHED; } else { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; } } if (domain & AMDGPU_GEM_DOMAIN_CPU) { - if (rbo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM | - TTM_PL_FLAG_UNCACHED; + if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) { + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM | + TTM_PL_FLAG_UNCACHED; } else { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; } } if (domain & AMDGPU_GEM_DOMAIN_GDS) { - rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | - AMDGPU_PL_FLAG_GDS; + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_UNCACHED | + AMDGPU_PL_FLAG_GDS; } if (domain & AMDGPU_GEM_DOMAIN_GWS) { - rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | - AMDGPU_PL_FLAG_GWS; + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_UNCACHED | + AMDGPU_PL_FLAG_GWS; } if (domain & AMDGPU_GEM_DOMAIN_OA) { - rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | - AMDGPU_PL_FLAG_OA; + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_FLAG_UNCACHED | + AMDGPU_PL_FLAG_OA; } if (!c) { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_MASK_CACHING | - TTM_PL_FLAG_SYSTEM; + placements[c].fpfn = 0; + placements[c++].flags = TTM_PL_MASK_CACHING | + TTM_PL_FLAG_SYSTEM; } - rbo->placement.num_placement = c; - rbo->placement.num_busy_placement = c; + placement->num_placement = c; + placement->num_busy_placement = c; for (i = 0; i < c; i++) { - if ((rbo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && - (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && - !rbo->placements[i].fpfn) - rbo->placements[i].lpfn = - rbo->adev->mc.visible_vram_size >> PAGE_SHIFT; + if ((flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && + (placements[i].flags & TTM_PL_FLAG_VRAM) && + !placements[i].fpfn) + placements[i].lpfn = + adev->mc.visible_vram_size >> PAGE_SHIFT; else - rbo->placements[i].lpfn = 0; + placements[i].lpfn = 0; } } -int amdgpu_bo_create(struct amdgpu_device *adev, - unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, - struct sg_table *sg, struct amdgpu_bo **bo_ptr) +void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain) +{ + amdgpu_ttm_placement_init(rbo->adev, &rbo->placement, + rbo->placements, domain, rbo->flags); +} + +static void amdgpu_fill_placement_to_bo(struct amdgpu_bo *bo, + struct ttm_placement *placement) +{ + BUG_ON(placement->num_placement > (AMDGPU_GEM_DOMAIN_MAX + 1)); + + memcpy(bo->placements, placement->placement, + placement->num_placement * sizeof(struct ttm_place)); + bo->placement.num_placement = placement->num_placement; + bo->placement.num_busy_placement = placement->num_busy_placement; + bo->placement.placement = bo->placements; + bo->placement.busy_placement = bo->placements; +} + +int amdgpu_bo_create_restricted(struct amdgpu_device *adev, + unsigned long size, int byte_align, + bool kernel, u32 domain, u64 flags, + struct sg_table *sg, + struct ttm_placement *placement, + struct amdgpu_bo **bo_ptr) { struct amdgpu_bo *bo; enum ttm_bo_type type; @@ -241,7 +270,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_OA); bo->flags = flags; - amdgpu_ttm_placement_from_domain(bo, domain); + amdgpu_fill_placement_to_bo(bo, placement); /* Kernel allocation are uninterruptible */ down_read(&adev->pm.mclk_lock); r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type, @@ -258,6 +287,27 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return 0; } +int amdgpu_bo_create(struct amdgpu_device *adev, + unsigned long size, int byte_align, + bool kernel, u32 domain, u64 flags, + struct sg_table *sg, struct amdgpu_bo **bo_ptr) +{ + struct ttm_placement placement = {0}; + struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + + memset(&placements, 0, + (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); + + amdgpu_ttm_placement_init(adev, &placement, + placements, domain, flags); + + return amdgpu_bo_create_restricted(adev, size, byte_align, + kernel, domain, flags, + sg, + &placement, + bo_ptr); +} + int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) { bool is_iomem; @@ -313,14 +363,19 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo) *bo = NULL; } -int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset, +int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, + u64 min_offset, u64 max_offset, u64 *gpu_addr) { int r, i; + unsigned fpfn, lpfn; if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm)) return -EPERM; + if (WARN_ON_ONCE(min_offset > max_offset)) + return -EINVAL; + if (bo->pin_count) { bo->pin_count++; if (gpu_addr) @@ -328,7 +383,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset, if (max_offset != 0) { u64 domain_start; - if (domain == AMDGPU_GEM_DOMAIN_VRAM) domain_start = bo->adev->mc.vram_start; else @@ -343,13 +397,21 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset, for (i = 0; i < bo->placement.num_placement; i++) { /* force to pin into visible video ram */ if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && - !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) && - (!max_offset || max_offset > bo->adev->mc.visible_vram_size)) - bo->placements[i].lpfn = - bo->adev->mc.visible_vram_size >> PAGE_SHIFT; - else - bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; - + !(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) && + (!max_offset || max_offset > bo->adev->mc.visible_vram_size)) { + if (WARN_ON_ONCE(min_offset > + bo->adev->mc.visible_vram_size)) + return -EINVAL; + fpfn = min_offset >> PAGE_SHIFT; + lpfn = bo->adev->mc.visible_vram_size >> PAGE_SHIFT; + } else { + fpfn = min_offset >> PAGE_SHIFT; + lpfn = max_offset >> PAGE_SHIFT; + } + if (fpfn > bo->placements[i].fpfn) + bo->placements[i].fpfn = fpfn; + if (lpfn && lpfn < bo->placements[i].lpfn) + bo->placements[i].lpfn = lpfn; bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } @@ -370,7 +432,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 max_offset, int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr) { - return amdgpu_bo_pin_restricted(bo, domain, 0, gpu_addr); + return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr); } int amdgpu_bo_unpin(struct amdgpu_bo *bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index b1e0a03..675bdc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -130,13 +130,20 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bool kernel, u32 domain, u64 flags, struct sg_table *sg, struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create_restricted(struct amdgpu_device *adev, + unsigned long size, int byte_align, + bool kernel, u32 domain, u64 flags, + struct sg_table *sg, + struct ttm_placement *placement, + struct amdgpu_bo **bo_ptr); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo); void amdgpu_bo_unref(struct amdgpu_bo **bo); int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr); int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, - u64 max_offset, u64 *gpu_addr); + u64 min_offset, u64 max_offset, + u64 *gpu_addr); int amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_evict_vram(struct amdgpu_device *adev); void amdgpu_bo_force_delete(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index da9a4b9..926c8e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2553,7 +2553,7 @@ static int dce_v10_0_crtc_cursor_set(struct drm_crtc *crtc, if (unlikely(ret != 0)) goto fail; ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM, - 0, &gpu_addr); + 0, 0, &gpu_addr); amdgpu_bo_unreserve(robj); if (ret) goto fail; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index edd9d17..bc60fd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2552,7 +2552,7 @@ static int dce_v11_0_crtc_cursor_set(struct drm_crtc *crtc, if (unlikely(ret != 0)) goto fail; ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM, - 0, &gpu_addr); + 0, 0, &gpu_addr); amdgpu_bo_unreserve(robj); if (ret) goto fail; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 1d291f1d..9e8b9f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2496,7 +2496,7 @@ static int dce_v8_0_crtc_cursor_set(struct drm_crtc *crtc, if (unlikely(ret != 0)) goto fail; ret = amdgpu_bo_pin_restricted(robj, AMDGPU_GEM_DOMAIN_VRAM, - 0, &gpu_addr); + 0, 0, &gpu_addr); amdgpu_bo_unreserve(robj); if (ret) goto fail; -- 1.9.1