diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0149-drm-amdgpu-hybrid-add-direct-gma-dgma-support.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0149-drm-amdgpu-hybrid-add-direct-gma-dgma-support.patch | 822 |
1 files changed, 822 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0149-drm-amdgpu-hybrid-add-direct-gma-dgma-support.patch b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0149-drm-amdgpu-hybrid-add-direct-gma-dgma-support.patch new file mode 100644 index 00000000..319372eb --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0149-drm-amdgpu-hybrid-add-direct-gma-dgma-support.patch @@ -0,0 +1,822 @@ +From 3f3413fc666ffc49963084d48f0451d3dcb61326 Mon Sep 17 00:00:00 2001 +From: Junwei Zhang <Jerry.Zhang@amd.com> +Date: Tue, 21 Aug 2018 17:35:56 +0800 +Subject: [PATCH 0149/2940] drm/amdgpu: [hybrid] add direct gma(dgma) support + +v2: rebase on linux 4.18 and cleanup + +Signed-off-by: Flora Cui <Flora.Cui@amd.com> +Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> +Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com> (v2) +Signed-off-by: Kalyan Alle <kalyan.alle@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 15 +++ + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 + + drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 93 +++++++++++++ + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 7 +- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 22 ++- + drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 6 + + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 149 ++++++++++++++++++++- + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 + + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 70 ++++++---- + include/uapi/drm/amdgpu_drm.h | 17 +++ + 11 files changed, 358 insertions(+), 31 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +index 4ebb6ea00bc5..a15aebb028d7 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h +@@ -115,6 +115,7 @@ extern int amdgpu_dc; + extern int amdgpu_sched_jobs; + extern int amdgpu_sched_hw_submission; + extern int amdgpu_no_evict; ++extern int amdgpu_direct_gma_size; + extern uint amdgpu_pcie_gen_cap; + extern uint amdgpu_pcie_lane_cap; + extern uint amdgpu_cg_mask; +@@ -666,6 +667,9 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, + int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + ++int amdgpu_gem_dgma_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *filp); ++ + /* VRAM scratch page for HDP bug, default vram page */ + struct amdgpu_vram_scratch { + struct amdgpu_bo *robj; +@@ -801,6 +805,14 @@ enum amd_hw_ip_block_type { + + #define HWIP_MAX_INSTANCE 6 + ++struct amdgpu_direct_gma { ++ /* reserved in visible vram*/ ++ struct amdgpu_bo *dgma_bo; ++ atomic64_t vram_usage; ++ /* reserved in gart */ ++ atomic64_t gart_usage; ++}; ++ + struct amd_powerplay { + void *pp_handle; + const struct amd_pm_funcs *pp_funcs; +@@ -853,6 +865,9 @@ struct amdgpu_device { + uint32_t bios_scratch_reg_offset; + uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; + ++ /* Direct GMA */ ++ struct amdgpu_direct_gma direct_gma; ++ + /* Register/doorbell mmio */ + resource_size_t rmmio_base; + resource_size_t rmmio_size; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index ca2ee7a67029..ebc279e252c1 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -1057,6 +1057,7 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) + } + + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); ++ amdgpu_direct_gma_size = min(amdgpu_direct_gma_size, 96); + } + + /** +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +index 2f7a76a5d4d6..a139c99d39b3 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +@@ -110,6 +110,7 @@ int amdgpu_dc = -1; + int amdgpu_sched_jobs = 32; + int amdgpu_sched_hw_submission = 2; + int amdgpu_no_evict = 0; ++int amdgpu_direct_gma_size = 0; + uint amdgpu_pcie_gen_cap = 0; + uint amdgpu_pcie_lane_cap = 0; + uint amdgpu_cg_mask = 0xffffffff; +@@ -366,6 +367,10 @@ module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); + + MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))"); + module_param_named(no_evict, amdgpu_no_evict, int, 0444); ++ ++MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)"); ++module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444); ++ + /** + * DOC: pcie_gen_cap (uint) + * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h. +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +index 57bde9b6b60d..0b6f728800d9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +@@ -35,8 +35,15 @@ + void amdgpu_gem_object_free(struct drm_gem_object *gobj) + { + struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); ++ struct amdgpu_device *adev = amdgpu_ttm_adev(robj->tbo.bdev); + + if (robj) { ++ if (robj->tbo.mem.mem_type == AMDGPU_PL_DGMA) ++ atomic64_sub(amdgpu_bo_size(robj), ++ &adev->direct_gma.vram_usage); ++ else if (robj->tbo.mem.mem_type == AMDGPU_PL_DGMA_IMPORT) ++ atomic64_sub(amdgpu_bo_size(robj), ++ &adev->direct_gma.gart_usage); + amdgpu_mn_unregister(robj); + amdgpu_bo_unref(&robj); + } +@@ -50,6 +57,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, + { + struct amdgpu_bo *bo; + struct amdgpu_bo_param bp; ++ unsigned long max_size; + int r; + + memset(&bp, 0, sizeof(bp)); +@@ -59,6 +67,23 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, + alignment = PAGE_SIZE; + } + ++ if ((initial_domain & AMDGPU_GEM_DOMAIN_DGMA) || ++ (initial_domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT)) { ++ flags |= AMDGPU_GEM_CREATE_NO_EVICT; ++ max_size = (unsigned long)amdgpu_direct_gma_size << 20; ++ ++ if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA) ++ max_size -= atomic64_read(&adev->direct_gma.vram_usage); ++ else if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT) ++ max_size -= atomic64_read(&adev->direct_gma.gart_usage); ++ ++ if (size > max_size) { ++ DRM_DEBUG("Allocation size %ldMb bigger than %ldMb limit\n", ++ size >> 20, max_size >> 20); ++ return -ENOMEM; ++ } ++ } ++ + bp.size = size; + bp.byte_align = alignment; + bp.type = type; +@@ -86,6 +111,11 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, + } + *obj = &bo->gem_base; + ++ if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA) ++ atomic64_add(size, &adev->direct_gma.vram_usage); ++ else if (initial_domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT) ++ atomic64_add(size, &adev->direct_gma.gart_usage); ++ + return 0; + } + +@@ -370,6 +400,63 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, + return r; + } + ++int amdgpu_gem_dgma_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *filp) ++{ ++ struct amdgpu_device *adev = dev->dev_private; ++ struct drm_amdgpu_gem_dgma *args = data; ++ struct drm_gem_object *gobj; ++ struct amdgpu_bo *abo; ++ dma_addr_t *dma_addr; ++ uint32_t handle; ++ int i, r = 0; ++ ++ switch (args->op) { ++ case AMDGPU_GEM_DGMA_IMPORT: ++ /* create a gem object to contain this object in */ ++ r = amdgpu_gem_object_create(adev, args->size, 0, ++ AMDGPU_GEM_DOMAIN_DGMA_IMPORT, 0, ++ 0, NULL, &gobj); ++ if (r) ++ return r; ++ ++ abo = gem_to_amdgpu_bo(gobj); ++ dma_addr = kmalloc_array(abo->tbo.num_pages, sizeof(dma_addr_t), GFP_KERNEL); ++ if (unlikely(dma_addr == NULL)) ++ goto release_object; ++ ++ for (i = 0; i < abo->tbo.num_pages; i++) ++ dma_addr[i] = args->addr + i * PAGE_SIZE; ++ abo->tbo.mem.bus.base = args->addr; ++ abo->tbo.mem.bus.offset = 0; ++ abo->tbo.mem.bus.addr = (void *)dma_addr; ++ ++ r = drm_gem_handle_create(filp, gobj, &handle); ++ args->handle = handle; ++ break; ++ case AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR: ++ gobj = drm_gem_object_lookup(filp, args->handle); ++ if (gobj == NULL) ++ return -ENOENT; ++ ++ abo = gem_to_amdgpu_bo(gobj); ++ if (abo->tbo.mem.mem_type != AMDGPU_PL_DGMA) { ++ r = -EINVAL; ++ goto release_object; ++ } ++ args->addr = amdgpu_bo_gpu_offset(abo); ++ args->addr -= adev->gmc.vram_start; ++ args->addr += adev->gmc.aper_base; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++release_object: ++ drm_gem_object_unreference_unlocked(gobj); ++ return r; ++} ++ + int amdgpu_mode_dumb_mmap(struct drm_file *filp, + struct drm_device *dev, + uint32_t handle, uint64_t *offset_p) +@@ -805,6 +892,12 @@ static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) + case AMDGPU_GEM_DOMAIN_VRAM: + placement = "VRAM"; + break; ++ case AMDGPU_GEM_DOMAIN_DGMA: ++ placement = "DGMA"; ++ break; ++ case AMDGPU_GEM_DOMAIN_DGMA_IMPORT: ++ placement = "DGMA_IMPORT"; ++ break; + case AMDGPU_GEM_DOMAIN_GTT: + placement = " GTT"; + break; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +index 6b482bc52b77..57ca12599e20 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +@@ -752,6 +752,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file + memset(&cap, 0, sizeof(cap)); + if (amdgpu_no_evict) + cap.flag |= AMDGPU_CAPABILITY_PIN_MEM_FLAG; ++ if (amdgpu_direct_gma_size) { ++ cap.flag |= AMDGPU_CAPABILITY_DIRECT_GMA_FLAG; ++ cap.direct_gma_size = amdgpu_direct_gma_size; ++ } + return copy_to_user(out, &cap, + min((size_t)size, sizeof(cap))) ? -EFAULT : 0; + } +@@ -1125,7 +1129,8 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), +- DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_display_freesync_ioctl, DRM_MASTER) ++ DRM_IOCTL_DEF_DRV(AMDGPU_FREESYNC, amdgpu_display_freesync_ioctl, DRM_MASTER), ++ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_DGMA, amdgpu_gem_dgma_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + }; + const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +index e62c9601bb60..25380ceb6063 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +@@ -89,6 +89,8 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) + struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + ++ if (bo->tbo.mem.mem_type == AMDGPU_PL_DGMA_IMPORT) ++ kfree(tbo->mem.bus.addr); + if (bo->pin_count > 0) + amdgpu_bo_subtract_pin_size(bo); + +@@ -143,6 +145,22 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) + u64 flags = abo->flags; + u32 c = 0, i; + ++ if ((domain & AMDGPU_GEM_DOMAIN_DGMA) && amdgpu_direct_gma_size) { ++ places[c].fpfn = 0; ++ places[c].lpfn = 0; ++ places[c].flags = TTM_PL_FLAG_UNCACHED | ++ AMDGPU_PL_FLAG_DGMA | TTM_PL_FLAG_NO_EVICT; ++ c++; ++ } ++ ++ if ((domain & AMDGPU_GEM_DOMAIN_DGMA_IMPORT) && amdgpu_direct_gma_size) { ++ places[c].fpfn = 0; ++ places[c].lpfn = 0; ++ places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | ++ AMDGPU_PL_FLAG_DGMA_IMPORT | TTM_PL_FLAG_NO_EVICT; ++ c++; ++ } ++ + if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; + +@@ -534,7 +552,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, + if (bp->type == ttm_bo_type_device) + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + +- if ((bp->flags & AMDGPU_GEM_CREATE_NO_EVICT) && amdgpu_no_evict) { ++ if (((bp->flags & AMDGPU_GEM_CREATE_NO_EVICT) && amdgpu_no_evict) || ++ bp->domain & (AMDGPU_GEM_DOMAIN_DGMA | AMDGPU_GEM_DOMAIN_DGMA_IMPORT)) { + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + return r; +@@ -1378,6 +1397,7 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) + WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); + WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && + !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); ++ WARN_ON_ONCE(bo->tbo.mem.mem_type == AMDGPU_PL_DGMA_IMPORT); + + return bo->tbo.offset; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +index 18945dd6982d..ab987fda411c 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +@@ -133,6 +133,10 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) + return AMDGPU_GEM_DOMAIN_GWS; + case AMDGPU_PL_OA: + return AMDGPU_GEM_DOMAIN_OA; ++ case AMDGPU_PL_DGMA: ++ return AMDGPU_GEM_DOMAIN_DGMA; ++ case AMDGPU_PL_DGMA_IMPORT: ++ return AMDGPU_GEM_DOMAIN_DGMA_IMPORT; + default: + break; + } +@@ -201,6 +205,8 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) + { + switch (bo->tbo.mem.mem_type) { + case TTM_PL_TT: return amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem); ++ case AMDGPU_PL_DGMA: ++ case AMDGPU_PL_DGMA_IMPORT: + case TTM_PL_VRAM: return true; + default: return false; + } +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +index a060aa7ebe17..cdb4a6279cbc 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +@@ -213,6 +213,23 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, + man->available_caching = TTM_PL_FLAG_UNCACHED; + man->default_caching = TTM_PL_FLAG_UNCACHED; + break; ++ case AMDGPU_PL_DGMA: ++ /* reserved visible VRAM for direct GMA */ ++ man->func = &ttm_bo_manager_func; ++ man->gpu_offset = amdgpu_bo_gpu_offset(adev->direct_gma.dgma_bo); ++ man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; ++ man->available_caching = TTM_PL_FLAG_UNCACHED; ++ man->default_caching = TTM_PL_FLAG_UNCACHED; ++ break; ++ case AMDGPU_PL_DGMA_IMPORT: ++ /* reserved GTT space for direct GMA */ ++ man->func = &ttm_bo_manager_func; ++ /* meaningless for this domain */ ++ man->gpu_offset = AMDGPU_BO_INVALID_OFFSET; ++ man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; ++ man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; ++ man->default_caching = TTM_PL_FLAG_WC; ++ break; + default: + DRM_ERROR("Unsupported memory type %u\n", (unsigned)type); + return -EINVAL; +@@ -258,6 +275,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, + abo = ttm_to_amdgpu_bo(bo); + switch (bo->mem.mem_type) { + case TTM_PL_VRAM: ++ case AMDGPU_PL_DGMA: + if (!adev->mman.buffer_funcs_enabled) { + /* Move to system memory */ + amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); +@@ -282,6 +300,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, + } + break; + case TTM_PL_TT: ++ case AMDGPU_PL_DGMA_IMPORT: + default: + amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + } +@@ -664,6 +683,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, + if (WARN_ON_ONCE(abo->pin_count > 0)) + return -EINVAL; + ++ if (old_mem->mem_type == AMDGPU_GEM_DOMAIN_DGMA || ++ old_mem->mem_type == AMDGPU_GEM_DOMAIN_DGMA_IMPORT) ++ return -EINVAL; ++ + adev = amdgpu_ttm_adev(bo->bdev); + + if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) { +@@ -725,7 +748,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ + struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; + struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); + struct drm_mm_node *mm_node = mem->mm_node; ++ struct ttm_mem_reg backup; + ++ backup = *mem; + mem->bus.addr = NULL; + mem->bus.offset = 0; + mem->bus.size = mem->num_pages << PAGE_SHIFT; +@@ -740,7 +765,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ + case TTM_PL_TT: + break; + case TTM_PL_VRAM: +- mem->bus.offset = mem->start << PAGE_SHIFT; ++ case AMDGPU_PL_DGMA: ++ mem->bus.offset = (mem->start << PAGE_SHIFT) + man->gpu_offset - ++ adev->gmc.vram_start; + /* check if it's visible */ + if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size) + return -EINVAL; +@@ -756,6 +783,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ + mem->bus.base = adev->gmc.aper_base; + mem->bus.is_iomem = true; + break; ++ case AMDGPU_PL_DGMA_IMPORT: ++ mem->bus.addr = backup.bus.addr; ++ mem->bus.offset = backup.bus.offset; ++ mem->bus.base = backup.bus.base; ++ mem->bus.is_iomem = true; ++ break; + default: + return -EINVAL; + } +@@ -772,6 +805,11 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, + struct drm_mm_node *mm; + unsigned long offset = (page_offset << PAGE_SHIFT); + ++ if (bo->mem.mem_type == AMDGPU_PL_DGMA || ++ bo->mem.mem_type == AMDGPU_PL_DGMA_IMPORT) ++ return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT) ++ + page_offset; ++ + mm = amdgpu_find_mm_node(&bo->mem, &offset); + return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + + (offset >> PAGE_SHIFT); +@@ -1450,6 +1488,9 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, + flags |= AMDGPU_PTE_SNOOPED; + } + ++ if (mem && mem->mem_type == AMDGPU_PL_DGMA_IMPORT) ++ flags |= AMDGPU_PTE_SYSTEM; ++ + flags |= adev->gart.gart_pte_flags; + flags |= AMDGPU_PTE_READABLE; + +@@ -1702,6 +1743,91 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) + adev->fw_vram_usage.reserved_bo = NULL; + return r; + } ++ ++static int amdgpu_direct_gma_init(struct amdgpu_device *adev) ++{ ++ struct amdgpu_bo *abo; ++ struct amdgpu_bo_param bp; ++ unsigned long size; ++ int r; ++ ++ if (amdgpu_direct_gma_size == 0) ++ return 0; ++ ++ size = (unsigned long)amdgpu_direct_gma_size << 20; ++ ++ memset(&bp, 0, sizeof(bp)); ++ bp.size = size; ++ bp.byte_align = PAGE_SIZE; ++ bp.domain = AMDGPU_GEM_DOMAIN_VRAM; ++ bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | ++ AMDGPU_GEM_CREATE_TOP_DOWN; ++ bp.type = ttm_bo_type_kernel; ++ bp.resv = NULL; ++ ++ /* reserve in visible vram */ ++ r = amdgpu_bo_create(adev, &bp, &abo); ++ if (unlikely(r)) ++ goto error_out; ++ ++ r = amdgpu_bo_reserve(abo, false); ++ if (unlikely(r)) ++ goto error_free; ++ ++ r = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM); ++ amdgpu_bo_unreserve(abo); ++ if (unlikely(r)) ++ goto error_free; ++ ++ adev->direct_gma.dgma_bo = abo; ++ ++ /* reserve in gtt */ ++ atomic64_add(size,&adev->gart_pin_size); ++ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_DGMA, size >> PAGE_SHIFT); ++ if (unlikely(r)) ++ goto error_put_node; ++ ++ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_DGMA_IMPORT, size >> PAGE_SHIFT); ++ if (unlikely(r)) ++ goto error_release_mm; ++ ++ DRM_INFO("%dMB VRAM/GTT reserved for Direct GMA\n", amdgpu_direct_gma_size); ++ return 0; ++ ++error_release_mm: ++ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_DGMA); ++ ++error_put_node: ++ atomic64_sub(size,&adev->gart_pin_size); ++error_free: ++ amdgpu_bo_unref(&abo); ++ ++error_out: ++ amdgpu_direct_gma_size = 0; ++ memset(&adev->direct_gma, 0, sizeof(adev->direct_gma)); ++ DRM_ERROR("Fail to enable Direct GMA\n"); ++ return r; ++} ++ ++static void amdgpu_direct_gma_fini(struct amdgpu_device *adev) ++{ ++ int r; ++ ++ if (amdgpu_direct_gma_size == 0) ++ return; ++ ++ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_DGMA); ++ ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_DGMA_IMPORT); ++ ++ r = amdgpu_bo_reserve(adev->direct_gma.dgma_bo, false); ++ if (r == 0) { ++ amdgpu_bo_unpin(adev->direct_gma.dgma_bo); ++ amdgpu_bo_unreserve(adev->direct_gma.dgma_bo); ++ } ++ amdgpu_bo_unref(&adev->direct_gma.dgma_bo); ++ atomic64_sub((u64)amdgpu_direct_gma_size << 20,&adev->gart_pin_size); ++} ++ + /** + * amdgpu_ttm_init - Init the memory management (ttm) as well as various + * gtt/vram related fields. +@@ -1795,6 +1921,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) + } else + gtt_size = (uint64_t)amdgpu_gtt_size << 20; + ++ /* reserve for DGMA import domain */ ++ gtt_size -= (uint64_t)amdgpu_direct_gma_size << 20; ++ + /* Initialize GTT memory pool */ + r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_TT, gtt_size >> PAGE_SHIFT); + if (r) { +@@ -1804,6 +1933,8 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) + DRM_INFO("amdgpu: %uM of GTT memory ready.\n", + (unsigned)(gtt_size / (1024 * 1024))); + ++ amdgpu_direct_gma_init(adev); ++ + /* Initialize various on-chip memory pools */ + adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; + adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; +@@ -1876,6 +2007,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) + iounmap(adev->mman.aper_base_kaddr); + adev->mman.aper_base_kaddr = NULL; + ++ amdgpu_direct_gma_fini(adev); + ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); + ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); + if (adev->gds.mem.total_size) +@@ -2197,6 +2329,8 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) + + static int ttm_pl_vram = TTM_PL_VRAM; + static int ttm_pl_tt = TTM_PL_TT; ++static int ttm_pl_dgma = AMDGPU_PL_DGMA; ++static int ttm_pl_dgma_import = AMDGPU_PL_DGMA_IMPORT; + + static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { + {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram}, +@@ -2207,6 +2341,11 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { + #endif + }; + ++static const struct drm_info_list amdgpu_ttm_dgma_debugfs_list[] = { ++ {"amdgpu_dgma_mm", amdgpu_mm_dump_table, 0, &ttm_pl_dgma}, ++ {"amdgpu_dgma_import_mm", amdgpu_mm_dump_table, 0, &ttm_pl_dgma_import} ++}; ++ + /** + * amdgpu_ttm_vram_read - Linear read access to VRAM + * +@@ -2486,6 +2625,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) + { + #if defined(CONFIG_DEBUG_FS) + unsigned count; ++ int r; + + struct drm_minor *minor = adev->ddev->primary; + struct dentry *ent, *root = minor->debugfs_root; +@@ -2512,6 +2652,13 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) + --count; + #endif + ++ if (amdgpu_direct_gma_size) { ++ r = amdgpu_debugfs_add_files(adev, amdgpu_ttm_dgma_debugfs_list, ++ ARRAY_SIZE(amdgpu_ttm_dgma_debugfs_list)); ++ if (unlikely(r)) ++ return r; ++ } ++ + return amdgpu_debugfs_add_files(adev, amdgpu_ttm_debugfs_list, count); + #else + return 0; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +index 8b3cc6687769..66251b6f807d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +@@ -30,10 +30,14 @@ + #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) + #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) + #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) ++#define AMDGPU_PL_DGMA (TTM_PL_PRIV + 3) ++#define AMDGPU_PL_DGMA_IMPORT (TTM_PL_PRIV + 4) + + #define AMDGPU_PL_FLAG_GDS (TTM_PL_FLAG_PRIV << 0) + #define AMDGPU_PL_FLAG_GWS (TTM_PL_FLAG_PRIV << 1) + #define AMDGPU_PL_FLAG_OA (TTM_PL_FLAG_PRIV << 2) ++#define AMDGPU_PL_FLAG_DGMA (TTM_PL_FLAG_PRIV << 3) ++#define AMDGPU_PL_FLAG_DGMA_IMPORT (TTM_PL_FLAG_PRIV << 4) + + #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 + #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index 11e5cc51fa34..82c2af1e13d9 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -1527,9 +1527,10 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + struct amdgpu_bo_va_mapping *mapping, + uint64_t vram_base_offset, + uint64_t flags, +- struct drm_mm_node *nodes, ++ struct ttm_mem_reg *mem, + struct dma_fence **fence) + { ++ struct drm_mm_node *nodes = mem ? mem->mm_node : NULL; + unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; + uint64_t pfn, start = mapping->start; + int r; +@@ -1568,41 +1569,52 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + dma_addr_t *dma_addr = NULL; + uint64_t max_entries; + uint64_t addr, last; ++ uint64_t count; + + if (nodes) { + addr = nodes->start << PAGE_SHIFT; + max_entries = (nodes->size - pfn) * +- AMDGPU_GPU_PAGES_IN_CPU_PAGE; +- } else { +- addr = 0; +- max_entries = S64_MAX; +- } +- +- if (pages_addr) { +- uint64_t count; +- +- max_entries = min(max_entries, 16ull * 1024ull); ++ (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); ++ switch (mem->mem_type) { ++ case TTM_PL_TT: ++ max_entries = min(max_entries, 16ull * 1024ull); + for (count = 1; +- count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; +- ++count) { ++ count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; ++ ++count){ + uint64_t idx = pfn + count; +- +- if (pages_addr[idx] != +- (pages_addr[idx - 1] + PAGE_SIZE)) ++ if (pages_addr[idx] != ++ (pages_addr[idx - 1] + PAGE_SIZE)) + break; +- } +- +- if (count < min_linear_pages) { +- addr = pfn << PAGE_SHIFT; ++ } ++ if (count < min_linear_pages) { ++ addr = pfn << PAGE_SHIFT; ++ dma_addr = pages_addr; ++ } else { ++ addr = pages_addr[pfn]; ++ max_entries = count; ++ } ++ break; ++ case AMDGPU_PL_DGMA_IMPORT: ++ addr = 0; ++ max_entries = min(max_entries, 16ull * 1024ull); + dma_addr = pages_addr; +- } else { +- addr = pages_addr[pfn]; +- max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE; ++ break; ++ case AMDGPU_PL_DGMA: ++ addr += vram_base_offset + ++ adev->mman.bdev.man[mem->mem_type].gpu_offset - ++ adev->mman.bdev.man[TTM_PL_VRAM].gpu_offset; ++ addr += pfn << PAGE_SHIFT; ++ break; ++ case TTM_PL_VRAM: ++ addr += vram_base_offset; ++ addr += pfn << PAGE_SHIFT; ++ break; ++ default: ++ break; + } +- +- } else if (flags & AMDGPU_PTE_VALID) { +- addr += vram_base_offset; +- addr += pfn << PAGE_SHIFT; ++ } else { ++ addr = 0; ++ max_entries = S64_MAX; + } + + last = min((uint64_t)mapping->last, start + max_entries - 1); +@@ -1664,6 +1676,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, + if (mem->mem_type == TTM_PL_TT) { + ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); + pages_addr = ttm->dma_address; ++ } else if (mem->mem_type == AMDGPU_PL_DGMA_IMPORT) { ++ pages_addr = (dma_addr_t *)bo_va->base.bo->tbo.mem.bus.addr; + } + exclusive = reservation_object_get_excl(bo->tbo.resv); + } +@@ -1695,7 +1709,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, + list_for_each_entry(mapping, &bo_va->invalids, list) { + r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, + mapping, vram_base_offset, flags, +- nodes, last_update); ++ mem, last_update); + if (r) + return r; + } +diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h +index baf299d3282d..bdc897d50894 100644 +--- a/include/uapi/drm/amdgpu_drm.h ++++ b/include/uapi/drm/amdgpu_drm.h +@@ -56,6 +56,7 @@ extern "C" { + #define DRM_AMDGPU_SCHED 0x15 + /* not upstream */ + #define DRM_AMDGPU_FREESYNC 0x5d ++#define DRM_AMDGPU_GEM_DGMA 0x5c + + #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) + #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) +@@ -75,6 +76,8 @@ extern "C" { + #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) + #define DRM_IOCTL_AMDGPU_FREESYNC DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FREESYNC, struct drm_amdgpu_freesync) + ++#define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) ++ + /** + * DOC: memory domains + * +@@ -104,6 +107,8 @@ extern "C" { + #define AMDGPU_GEM_DOMAIN_GDS 0x8 + #define AMDGPU_GEM_DOMAIN_GWS 0x10 + #define AMDGPU_GEM_DOMAIN_OA 0x20 ++#define AMDGPU_GEM_DOMAIN_DGMA 0x40 ++#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT 0x80 + #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ + AMDGPU_GEM_DOMAIN_GTT | \ + AMDGPU_GEM_DOMAIN_VRAM | \ +@@ -313,6 +318,15 @@ struct drm_amdgpu_gem_userptr { + __u32 handle; + }; + ++#define AMDGPU_GEM_DGMA_IMPORT 0 ++#define AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR 1 ++struct drm_amdgpu_gem_dgma { ++ __u64 addr; ++ __u64 size; ++ __u32 op; ++ __u32 handle; ++}; ++ + /* SI-CI-VI: */ + /* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ + #define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 +@@ -735,6 +749,8 @@ struct drm_amdgpu_cs_chunk_data { + #define AMDGPU_INFO_VIRTUAL_RANGE 0x51 + /* query pin memory capability */ + #define AMDGPU_CAPABILITY_PIN_MEM_FLAG (1 << 0) ++/* query direct gma capability */ ++#define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG (1 << 1) + + #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 + #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff +@@ -1023,6 +1039,7 @@ struct drm_amdgpu_virtual_range { + + struct drm_amdgpu_capability { + __u32 flag; ++ __u32 direct_gma_size; + }; + + /* +-- +2.17.1 + |