From 5844c053f2f504b17a59b51d220a169a3fa2e846 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Mon, 13 Feb 2017 16:01:52 +0800 Subject: [PATCH 0785/4131] drm/amdgpu/hybrid: add SSG support Depend on DirectGMA and CONFIG_ZONE_DEVICE/PFN_MAP. User process can mmap a DirectGMA amdgpu_bo and use it as the buffer for file directio read/write which means disk can transfer data directly between visible VRAM and disk controller without pass the DRAM. This implementation is inspired by DAX-GUP: https://lwn.net/Articles/667148/ Change-Id: I7205f3391d67677277b4b5cbcb26fd81b66e15fb Signed-off-by: Qiang Yu Acked-by: Chunming Zhou Signed-off-by: Kalyan Alle Conflicts: drivers/gpu/drm/ttm/ttm_bo_vm.c --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 18 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 79 ++++++++++++++++++++++++++++++ include/drm/ttm/ttm_bo_api.h | 1 + 5 files changed, 105 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9db84c8..80aca7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,7 @@ extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern int amdgpu_no_evict; extern int amdgpu_direct_gma_size; +extern int amdgpu_ssg_enabled; extern unsigned amdgpu_pcie_gen_cap; extern unsigned amdgpu_pcie_lane_cap; extern unsigned amdgpu_cg_mask; @@ -1417,6 +1419,19 @@ struct amdgpu_direct_gma { atomic64_t gart_usage; }; +#if defined(CONFIG_ZONE_DEVICE) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) || defined(OS_NAME_RHEL_7_3) || defined(OS_NAME_SLE)) +#define CONFIG_ENABLE_SSG +#endif + +struct amdgpu_ssg { + bool enabled; +#ifdef CONFIG_ENABLE_SSG + struct percpu_ref ref; + struct completion cmp; +#endif +}; + #define AMDGPU_RESET_MAGIC_NUM 64 struct amdgpu_device { struct device *dev; @@ -1463,6 +1478,9 @@ struct amdgpu_device { uint32_t bios_scratch_reg_offset; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; + /* SSG */ + struct amdgpu_ssg ssg; + /* Register/doorbell mmio */ resource_size_t rmmio_base; resource_size_t rmmio_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index da98307..e190da1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -107,6 +107,7 @@ int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; int amdgpu_no_evict = 0; int amdgpu_direct_gma_size = 0; +int amdgpu_ssg_enabled = 0; unsigned amdgpu_pcie_gen_cap = 0; unsigned amdgpu_pcie_lane_cap = 0; unsigned amdgpu_cg_mask = 0xffffffff; @@ -225,6 +226,9 @@ module_param_named(no_evict, amdgpu_no_evict, int, 0444); MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)"); module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444); +MODULE_PARM_DESC(ssg, "SSG support (1 = enable, 0 = disable (default))"); +module_param_named(ssg, amdgpu_ssg_enabled, int, 0444); + MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))"); module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cb5fce1..8339fd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -433,6 +433,9 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, else amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0); + if (domain & AMDGPU_GEM_DOMAIN_DGMA && adev->ssg.enabled) + bo->tbo.ssg_can_map = true; + if (kernel) bo->tbo.priority = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c841795..e150097 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1327,6 +1327,83 @@ static void amdgpu_direct_gma_fini(struct amdgpu_device *adev) adev->gart_pin_size -= (u64)amdgpu_direct_gma_size << 20; } +#ifdef CONFIG_ENABLE_SSG +#include + +static struct amdgpu_ssg *to_amdgpu_ssg(struct percpu_ref *ref) +{ + return container_of(ref, struct amdgpu_ssg, ref); +} + +static void amdgpu_ssg_percpu_release(struct percpu_ref *ref) +{ + struct amdgpu_ssg *ssg = to_amdgpu_ssg(ref); + + complete(&ssg->cmp); +} + +static int amdgpu_ssg_init(struct amdgpu_device *adev) +{ + struct resource res; + void *addr; + int rc; + + adev->ssg.enabled = false; + + if (!amdgpu_ssg_enabled) + return 0; + + if (amdgpu_direct_gma_size == 0) { + DRM_INFO("SSG: not enabled due to DirectGMA is disabled\n"); + return 0; + } + + init_completion(&adev->ssg.cmp); + + res.start = adev->mc.aper_base + + (amdgpu_bo_gpu_offset(adev->direct_gma.dgma_bo) - + adev->mc.vram_start); + res.end = res.start + amdgpu_bo_size(adev->direct_gma.dgma_bo); + res.name = "DirectGMA"; + + rc = percpu_ref_init(&adev->ssg.ref, amdgpu_ssg_percpu_release, + 0, GFP_KERNEL); + if (rc) + return rc; + + addr = devm_memremap_pages(adev->dev, &res, &adev->ssg.ref, NULL); + if (IS_ERR(addr)) { + percpu_ref_exit(&adev->ssg.ref); + return PTR_ERR(addr); + } + + adev->ssg.enabled = true; + DRM_INFO("SSG: remap %llx-%llx to %p\n", res.start, res.end, addr); + return 0; +} + +static void amdgpu_ssg_fini(struct amdgpu_device *adev) +{ + if (!adev->ssg.enabled) + return; + + percpu_ref_kill(&adev->ssg.ref); + wait_for_completion(&adev->ssg.cmp); + percpu_ref_exit(&adev->ssg.ref); +} +#else +static int amdgpu_ssg_init(struct amdgpu_device *adev) +{ + adev->ssg.enabled = false; + return 0; +} + +static void amdgpu_ssg_fini(struct amdgpu_device *adev) +{ + +} +#endif + int amdgpu_ttm_init(struct amdgpu_device *adev) { uint64_t gtt_size; @@ -1388,6 +1465,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) (unsigned)(gtt_size / (1024 * 1024))); amdgpu_direct_gma_init(adev); + amdgpu_ssg_init(adev); adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; @@ -1451,6 +1529,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) } amdgpu_bo_unref(&adev->stolen_vga_memory); } + amdgpu_ssg_fini(adev); amdgpu_direct_gma_fini(adev); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index fa07be1..a29c078 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -175,6 +175,7 @@ struct ttm_buffer_object { void (*destroy) (struct ttm_buffer_object *); unsigned long num_pages; size_t acc_size; + bool ssg_can_map; /** * Members not needing protection. -- 2.7.4