aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.19.8/0150-drm-amdgpu-hybrid-add-SSG-support.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/0150-drm-amdgpu-hybrid-add-SSG-support.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.19.8/0150-drm-amdgpu-hybrid-add-SSG-support.patch248
1 files changed, 248 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/0150-drm-amdgpu-hybrid-add-SSG-support.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/0150-drm-amdgpu-hybrid-add-SSG-support.patch
new file mode 100644
index 00000000..cc9d2102
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/0150-drm-amdgpu-hybrid-add-SSG-support.patch
@@ -0,0 +1,248 @@
+From 438481335e9bb397ef0f2329656df87619b90826 Mon Sep 17 00:00:00 2001
+From: Junwei Zhang <Jerry.Zhang@amd.com>
+Date: Wed, 22 Aug 2018 11:01:03 +0800
+Subject: [PATCH 0150/2940] drm/amdgpu: [hybrid] add SSG support
+
+Depend on DirectGMA and CONFIG_ZONE_DEVICE/PFN_MAP. User process
+can mmap a DirectGMA amdgpu_bo and use it as the buffer for file
+directio read/write which means disk can transfer data directly
+between visible VRAM and disk controller without pass the DRAM.
+
+This implementation is inspired by DAX-GUP:
+https://lwn.net/Articles/667148/
+
+v2: rebase on 4.18
+
+Signed-off-by: Qiang Yu <Qiang.Yu@amd.com>
+Acked-by: Chunming Zhou <david1.zhou@amd.com>
+Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com> (v2)
+Signed-off-by: Kalyan Alle <kalyan.alle@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h | 13 ++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 +
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 79 ++++++++++++++++++++++
+ include/drm/ttm/ttm_bo_api.h | 1 +
+ include/uapi/drm/amdgpu_drm.h | 2 +
+ 7 files changed, 104 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+index a15aebb028d7..debab710f5be 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -125,6 +125,7 @@ extern char *amdgpu_disable_cu;
+ extern char *amdgpu_virtual_display;
+ extern uint amdgpu_pp_feature_mask;
+ extern int amdgpu_vram_page_split;
++extern int amdgpu_ssg_enabled;
+ extern int amdgpu_ngg;
+ extern int amdgpu_prim_buf_per_se;
+ extern int amdgpu_pos_buf_per_se;
+@@ -813,6 +814,16 @@ struct amdgpu_direct_gma {
+ atomic64_t gart_usage;
+ };
+
++#define CONFIG_ENABLE_SSG
++
++struct amdgpu_ssg {
++ bool enabled;
++#ifdef CONFIG_ENABLE_SSG
++ struct percpu_ref ref;
++ struct completion cmp;
++#endif
++};
++
+ struct amd_powerplay {
+ void *pp_handle;
+ const struct amd_pm_funcs *pp_funcs;
+@@ -867,6 +878,8 @@ struct amdgpu_device {
+
+ /* Direct GMA */
+ struct amdgpu_direct_gma direct_gma;
++ /* SSG */
++ struct amdgpu_ssg ssg;
+
+ /* Register/doorbell mmio */
+ resource_size_t rmmio_base;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index a139c99d39b3..1799bc410d39 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -111,6 +111,7 @@ int amdgpu_sched_jobs = 32;
+ int amdgpu_sched_hw_submission = 2;
+ int amdgpu_no_evict = 0;
+ int amdgpu_direct_gma_size = 0;
++int amdgpu_ssg_enabled = 0;
+ uint amdgpu_pcie_gen_cap = 0;
+ uint amdgpu_pcie_lane_cap = 0;
+ uint amdgpu_cg_mask = 0xffffffff;
+@@ -371,6 +372,9 @@ module_param_named(no_evict, amdgpu_no_evict, int, 0444);
+ MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)");
+ module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444);
+
++MODULE_PARM_DESC(ssg, "SSG support (1 = enable, 0 = disable (default))");
++module_param_named(ssg, amdgpu_ssg_enabled, int, 0444);
++
+ /**
+ * DOC: pcie_gen_cap (uint)
+ * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index 57ca12599e20..03d5254af382 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -756,6 +756,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
+ cap.flag |= AMDGPU_CAPABILITY_DIRECT_GMA_FLAG;
+ cap.direct_gma_size = amdgpu_direct_gma_size;
+ }
++ if (adev->ssg.enabled)
++ cap.flag |= AMDGPU_CAPABILITY_SSG_FLAG;
+ return copy_to_user(out, &cap,
+ min((size_t)size, sizeof(cap))) ? -EFAULT : 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index 25380ceb6063..3f8d3ca60388 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -529,6 +529,9 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+ else
+ amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
+
++ if (bp->domain & AMDGPU_GEM_DOMAIN_DGMA && adev->ssg.enabled)
++ bo->tbo.ssg_can_map = true;
++
+ if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
+ bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
+ struct dma_fence *fence;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index cdb4a6279cbc..67bb65cda2e7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -1828,6 +1828,83 @@ static void amdgpu_direct_gma_fini(struct amdgpu_device *adev)
+ atomic64_sub((u64)amdgpu_direct_gma_size << 20,&adev->gart_pin_size);
+ }
+
++#ifdef CONFIG_ENABLE_SSG
++#include <linux/memremap.h>
++
++static struct amdgpu_ssg *to_amdgpu_ssg(struct percpu_ref *ref)
++{
++ return container_of(ref, struct amdgpu_ssg, ref);
++}
++
++static void amdgpu_ssg_percpu_release(struct percpu_ref *ref)
++{
++ struct amdgpu_ssg *ssg = to_amdgpu_ssg(ref);
++
++ complete(&ssg->cmp);
++}
++
++static int amdgpu_ssg_init(struct amdgpu_device *adev)
++{
++ struct resource res;
++ void *addr;
++ int rc;
++
++ adev->ssg.enabled = false;
++
++ if (!amdgpu_ssg_enabled)
++ return 0;
++
++ if (amdgpu_direct_gma_size == 0) {
++ DRM_INFO("SSG: not enabled due to DirectGMA is disabled\n");
++ return 0;
++ }
++
++ init_completion(&adev->ssg.cmp);
++
++ res.start = adev->gmc.aper_base +
++ (amdgpu_bo_gpu_offset(adev->direct_gma.dgma_bo) -
++ adev->gmc.vram_start);
++ res.end = res.start + amdgpu_bo_size(adev->direct_gma.dgma_bo) - 1;
++ res.name = "DirectGMA";
++
++ rc = percpu_ref_init(&adev->ssg.ref, amdgpu_ssg_percpu_release,
++ 0, GFP_KERNEL);
++ if (rc)
++ return rc;
++
++ addr = devm_memremap_pages(adev->dev, &res, &adev->ssg.ref, NULL);
++ if (IS_ERR(addr)) {
++ percpu_ref_exit(&adev->ssg.ref);
++ return PTR_ERR(addr);
++ }
++
++ adev->ssg.enabled = true;
++ DRM_INFO("SSG: remap %llx-%llx to %p\n", res.start, res.end, addr);
++ return 0;
++}
++
++static void amdgpu_ssg_fini(struct amdgpu_device *adev)
++{
++ if (!adev->ssg.enabled)
++ return;
++
++ percpu_ref_kill(&adev->ssg.ref);
++ wait_for_completion(&adev->ssg.cmp);
++ percpu_ref_exit(&adev->ssg.ref);
++}
++#else
++static int amdgpu_ssg_init(struct amdgpu_device *adev)
++{
++ adev->ssg.enabled = false;
++ return 0;
++}
++
++static void amdgpu_ssg_fini(struct amdgpu_device *adev)
++{
++
++}
++#endif
++
+ /**
+ * amdgpu_ttm_init - Init the memory management (ttm) as well as various
+ * gtt/vram related fields.
+@@ -1934,6 +2011,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
+ (unsigned)(gtt_size / (1024 * 1024)));
+
+ amdgpu_direct_gma_init(adev);
++ amdgpu_ssg_init(adev);
+
+ /* Initialize various on-chip memory pools */
+ adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT;
+@@ -2007,6 +2085,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
+
++ amdgpu_ssg_fini(adev);
+ amdgpu_direct_gma_fini(adev);
+ ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
+ ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
+diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
+index a01ba2032f0e..84916abe1667 100644
+--- a/include/drm/ttm/ttm_bo_api.h
++++ b/include/drm/ttm/ttm_bo_api.h
+@@ -176,6 +176,7 @@ struct ttm_buffer_object {
+ void (*destroy) (struct ttm_buffer_object *);
+ unsigned long num_pages;
+ size_t acc_size;
++ bool ssg_can_map;
+
+ /**
+ * Members not needing protection.
+diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
+index bdc897d50894..72822f593f57 100644
+--- a/include/uapi/drm/amdgpu_drm.h
++++ b/include/uapi/drm/amdgpu_drm.h
+@@ -751,6 +751,8 @@ struct drm_amdgpu_cs_chunk_data {
+ #define AMDGPU_CAPABILITY_PIN_MEM_FLAG (1 << 0)
+ /* query direct gma capability */
+ #define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG (1 << 1)
++/* query ssg capability */
++#define AMDGPU_CAPABILITY_SSG_FLAG (1 << 2)
+
+ #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
+ #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff
+--
+2.17.1
+