aboutsummaryrefslogtreecommitdiffstats
path: root/meta-baldeagle/recipes-kernel/linux/linux-yocto/0018-yocto-poky-dora-10.0.0-amd-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-baldeagle/recipes-kernel/linux/linux-yocto/0018-yocto-poky-dora-10.0.0-amd-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch')
-rw-r--r--meta-baldeagle/recipes-kernel/linux/linux-yocto/0018-yocto-poky-dora-10.0.0-amd-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch237
1 files changed, 0 insertions, 237 deletions
diff --git a/meta-baldeagle/recipes-kernel/linux/linux-yocto/0018-yocto-poky-dora-10.0.0-amd-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch b/meta-baldeagle/recipes-kernel/linux/linux-yocto/0018-yocto-poky-dora-10.0.0-amd-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch
deleted file mode 100644
index 6498c071..00000000
--- a/meta-baldeagle/recipes-kernel/linux/linux-yocto/0018-yocto-poky-dora-10.0.0-amd-drm-radeon-add-large-PTE-support-for-NI-SI-and-CIK-v.patch
+++ /dev/null
@@ -1,237 +0,0 @@
-From a2bc39a6394bb8e11060df3da33d603a66ccf9f6 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
-Date: Fri, 25 Oct 2013 18:07:55 +0200
-Subject: [PATCH 18/44] drm/radeon: add large PTE support for NI, SI and CIK
- v3
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This patch implements support for VRAM page table entry compression.
-PTE construction is enhanced to identify physically contiguous page
-ranges and mark them in the PTE fragment field. L1 TLB and L2 cache
-support is enabled for 64KB (SI/CIK) and 256KB (NI) PTE fragments,
-significantly improving TLB utilization for VRAM allocations.
-
-Linear store bandwidth is improved from 60GB/s to 125GB/s on Pitcairn.
-Unigine Heaven 3.0 sees an average improvement from 24.7 to 27.7 FPS
-on default settings at 1920x1200 resolution with vsync disabled.
-
-See main comment in radeon_gart.c gives a technical description.
-
-v2 (chk): rebased and simplified.
-v3 (chk): add missing hw setup
-
-Signed-off-by: Jay Cornwall <jay@jcornwall.me>
-Signed-off-by: Christian König <christian.koenig@amd.com>
----
- drivers/gpu/drm/radeon/cik.c | 4 +-
- drivers/gpu/drm/radeon/ni.c | 2 +
- drivers/gpu/drm/radeon/radeon.h | 5 ++
- drivers/gpu/drm/radeon/radeon_gart.c | 91 +++++++++++++++++++++++++++++++---
- drivers/gpu/drm/radeon/si.c | 5 +-
- 5 files changed, 98 insertions(+), 9 deletions(-)
-
-diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
-index d7e86ef3..27fa479 100644
---- a/drivers/gpu/drm/radeon/cik.c
-+++ b/drivers/gpu/drm/radeon/cik.c
-@@ -4522,6 +4522,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
- WREG32(MC_VM_MX_L1_TLB_CNTL,
- (0xA << 7) |
- ENABLE_L1_TLB |
-+ ENABLE_L1_FRAGMENT_PROCESSING |
- SYSTEM_ACCESS_MODE_NOT_IN_SYS |
- ENABLE_ADVANCED_DRIVER_MODEL |
- SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
-@@ -4534,7 +4535,8 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
- CONTEXT1_IDENTITY_ACCESS_MODE(1));
- WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
- WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-- L2_CACHE_BIGK_FRAGMENT_SIZE(6));
-+ BANK_SELECT(4) |
-+ L2_CACHE_BIGK_FRAGMENT_SIZE(4));
- /* setup context0 */
- WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
- WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
-diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
-index f59a9e9..9291982 100644
---- a/drivers/gpu/drm/radeon/ni.c
-+++ b/drivers/gpu/drm/radeon/ni.c
-@@ -1227,12 +1227,14 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
- SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
- /* Setup L2 cache */
- WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
-+ ENABLE_L2_FRAGMENT_PROCESSING |
- ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
- ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
- EFFECTIVE_L2_QUEUE_SIZE(7) |
- CONTEXT1_IDENTITY_ACCESS_MODE(1));
- WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
- WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-+ BANK_SELECT(6) |
- L2_CACHE_BIGK_FRAGMENT_SIZE(6));
- /* setup context0 */
- WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
-index b987f01..06b3375 100644
---- a/drivers/gpu/drm/radeon/radeon.h
-+++ b/drivers/gpu/drm/radeon/radeon.h
-@@ -846,6 +846,11 @@ struct radeon_mec {
- #define R600_PTE_READABLE (1 << 5)
- #define R600_PTE_WRITEABLE (1 << 6)
-
-+/* PTE (Page Table Entry) fragment field for different page sizes */
-+#define R600_PTE_FRAG_4KB (0 << 7)
-+#define R600_PTE_FRAG_64KB (4 << 7)
-+#define R600_PTE_FRAG_256KB (6 << 7)
-+
- struct radeon_vm {
- struct list_head list;
- struct list_head va;
-diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
-index 9ceabdf..f960ce6 100644
---- a/drivers/gpu/drm/radeon/radeon_gart.c
-+++ b/drivers/gpu/drm/radeon/radeon_gart.c
-@@ -1021,6 +1021,84 @@ retry:
- }
-
- /**
-+ * radeon_vm_frag_ptes - add fragment information to PTEs
-+ *
-+ * @rdev: radeon_device pointer
-+ * @ib: IB for the update
-+ * @pe_start: first PTE to handle
-+ * @pe_end: last PTE to handle
-+ * @addr: addr those PTEs should point to
-+ * @flags: hw mapping flags
-+ *
-+ * Global and local mutex must be locked!
-+ */
-+static void radeon_vm_frag_ptes(struct radeon_device *rdev,
-+ struct radeon_ib *ib,
-+ uint64_t pe_start, uint64_t pe_end,
-+ uint64_t addr, uint32_t flags)
-+{
-+ /**
-+ * The MC L1 TLB supports variable sized pages, based on a fragment
-+ * field in the PTE. When this field is set to a non-zero value, page
-+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
-+ * flags are considered valid for all PTEs within the fragment range
-+ * and corresponding mappings are assumed to be physically contiguous.
-+ *
-+ * The L1 TLB can store a single PTE for the whole fragment,
-+ * significantly increasing the space available for translation
-+ * caching. This leads to large improvements in throughput when the
-+ * TLB is under pressure.
-+ *
-+ * The L2 cache distributes small and large fragments into two
-+ * asymmetric partitions. The large fragment cache is significantly
-+ * larger. Thus, we try to use large fragments wherever possible.
-+ * Userspace can support this by aligning virtual base address and
-+ * allocation size to the fragment size.
-+ */
-+
-+ /* NI is optimized for 256KB fragments, SI and newer for 64KB */
-+ uint64_t frag_flags = rdev->family == CHIP_CAYMAN ?
-+ R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
-+ uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80;
-+
-+ uint64_t frag_start = ALIGN(pe_start, frag_align);
-+ uint64_t frag_end = pe_end & ~(frag_align - 1);
-+
-+ unsigned count;
-+
-+ /* system pages are non continuously */
-+ if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
-+ (frag_start >= frag_end)) {
-+
-+ count = (pe_end - pe_start) / 8;
-+ radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
-+ RADEON_GPU_PAGE_SIZE, flags);
-+ return;
-+ }
-+
-+ /* handle the 4K area at the beginning */
-+ if (pe_start != frag_start) {
-+ count = (frag_start - pe_start) / 8;
-+ radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
-+ RADEON_GPU_PAGE_SIZE, flags);
-+ addr += RADEON_GPU_PAGE_SIZE * count;
-+ }
-+
-+ /* handle the area in the middle */
-+ count = (frag_end - frag_start) / 8;
-+ radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count,
-+ RADEON_GPU_PAGE_SIZE, flags | frag_flags);
-+
-+ /* handle the 4K area at the end */
-+ if (frag_end != pe_end) {
-+ addr += RADEON_GPU_PAGE_SIZE * count;
-+ count = (pe_end - frag_end) / 8;
-+ radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count,
-+ RADEON_GPU_PAGE_SIZE, flags);
-+ }
-+}
-+
-+/**
- * radeon_vm_update_ptes - make sure that page tables are valid
- *
- * @rdev: radeon_device pointer
-@@ -1066,10 +1144,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
- if ((last_pte + 8 * count) != pte) {
-
- if (count) {
-- radeon_asic_vm_set_page(rdev, ib, last_pte,
-- last_dst, count,
-- RADEON_GPU_PAGE_SIZE,
-- flags);
-+ radeon_vm_frag_ptes(rdev, ib, last_pte,
-+ last_pte + 8 * count,
-+ last_dst, flags);
- }
-
- count = nptes;
-@@ -1084,9 +1161,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
- }
-
- if (count) {
-- radeon_asic_vm_set_page(rdev, ib, last_pte,
-- last_dst, count,
-- RADEON_GPU_PAGE_SIZE, flags);
-+ radeon_vm_frag_ptes(rdev, ib, last_pte,
-+ last_pte + 8 * count,
-+ last_dst, flags);
- }
- }
-
-diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
-index 0334f3e..d9173ef 100644
---- a/drivers/gpu/drm/radeon/si.c
-+++ b/drivers/gpu/drm/radeon/si.c
-@@ -3928,18 +3928,21 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
- WREG32(MC_VM_MX_L1_TLB_CNTL,
- (0xA << 7) |
- ENABLE_L1_TLB |
-+ ENABLE_L1_FRAGMENT_PROCESSING |
- SYSTEM_ACCESS_MODE_NOT_IN_SYS |
- ENABLE_ADVANCED_DRIVER_MODEL |
- SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
- /* Setup L2 cache */
- WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
-+ ENABLE_L2_FRAGMENT_PROCESSING |
- ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
- ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
- EFFECTIVE_L2_QUEUE_SIZE(7) |
- CONTEXT1_IDENTITY_ACCESS_MODE(1));
- WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
- WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
-- L2_CACHE_BIGK_FRAGMENT_SIZE(0));
-+ BANK_SELECT(4) |
-+ L2_CACHE_BIGK_FRAGMENT_SIZE(4));
- /* setup context0 */
- WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
- WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
---
-1.7.9.5
-