diff options
Diffstat (limited to 'meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0508-Revert-drm-amdgpu-meld-together-VM-fragment-and-huge.patch')
-rw-r--r-- | meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0508-Revert-drm-amdgpu-meld-together-VM-fragment-and-huge.patch | 341 |
1 files changed, 341 insertions, 0 deletions
diff --git a/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0508-Revert-drm-amdgpu-meld-together-VM-fragment-and-huge.patch b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0508-Revert-drm-amdgpu-meld-together-VM-fragment-and-huge.patch new file mode 100644 index 00000000..7188b178 --- /dev/null +++ b/meta-amd-bsp/recipes-kernel/linux-4.19/linux-yocto-4.19.8/0508-Revert-drm-amdgpu-meld-together-VM-fragment-and-huge.patch @@ -0,0 +1,341 @@ +From 9cd17f014fe62dec34b4f06ae6f51efda5177bac Mon Sep 17 00:00:00 2001 +From: Prike Liang <Prike.Liang@amd.com> +Date: Thu, 11 Oct 2018 15:38:37 +0800 +Subject: [PATCH 0508/2940] Revert "drm/amdgpu: meld together VM fragment and + huge page handling" + +This reverts commit 6ef5680cb4eefd8d92d9ea0dbafc83953ca8c968. + +Change-Id: If43203fec093d05fb998cca557676a83553a9a52 +Signed-off-by: Prike Liang <Prike.Liang@amd.com> +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 267 +++++++++++-------------- + 1 file changed, 120 insertions(+), 147 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +index 635bd17cd709..d945db767fa0 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +@@ -1483,76 +1483,46 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev, + } + + /** +- * amdgpu_vm_update_huge - figure out parameters for PTE updates ++ * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * +- * Make sure to set the right flags for the PTEs at the desired level. ++ * @p: see amdgpu_pte_update_params definition ++ * @entry: vm_pt entry to check ++ * @parent: parent entry ++ * @nptes: number of PTEs updated with this operation ++ * @dst: destination address where the PTEs should point to ++ * @flags: access flags fro the PTEs ++ * ++ * Check if we can update the PD with a huge page. + */ +-static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params, +- struct amdgpu_bo *bo, unsigned level, +- uint64_t pe, uint64_t addr, +- unsigned count, uint32_t incr, +- uint64_t flags) +- ++static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, ++ struct amdgpu_vm_pt *entry, ++ struct amdgpu_vm_pt *parent, ++ unsigned nptes, uint64_t dst, ++ uint64_t flags) + { +- if (level != AMDGPU_VM_PTB) { ++ uint64_t pde; ++ ++ /* In the case of a mixed PT the PDE must point to it*/ ++ if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && ++ nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { ++ /* Set the huge page flag to stop scanning at this PDE */ + flags |= AMDGPU_PDE_PTE; +- amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + } + +- amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags); +-} +- +-/** +- * amdgpu_vm_fragment - get fragment for PTEs +- * +- * @params: see amdgpu_pte_update_params definition +- * @start: first PTE to handle +- * @end: last PTE to handle +- * @flags: hw mapping flags +- * @frag: resulting fragment size +- * @frag_end: end of this fragment +- * +- * Returns the first possible fragment for the start and end address. +- */ +-static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, +- uint64_t start, uint64_t end, uint64_t flags, +- unsigned int *frag, uint64_t *frag_end) +-{ +- /** +- * The MC L1 TLB supports variable sized pages, based on a fragment +- * field in the PTE. When this field is set to a non-zero value, page +- * granularity is increased from 4KB to (1 << (12 + frag)). The PTE +- * flags are considered valid for all PTEs within the fragment range +- * and corresponding mappings are assumed to be physically contiguous. +- * +- * The L1 TLB can store a single PTE for the whole fragment, +- * significantly increasing the space available for translation +- * caching. This leads to large improvements in throughput when the +- * TLB is under pressure. +- * +- * The L2 TLB distributes small and large fragments into two +- * asymmetric partitions. The large fragment cache is significantly +- * larger. Thus, we try to use large fragments wherever possible. +- * Userspace can support this by aligning virtual base address and +- * allocation size to the fragment size. +- */ +- unsigned max_frag = params->adev->vm_manager.fragment_size; +- +- /* system pages are non continuously */ +- if (params->src || !(flags & AMDGPU_PTE_VALID)) { +- *frag = 0; +- *frag_end = end; ++ if (!(flags & AMDGPU_PDE_PTE)) { ++ if (entry->huge) { ++ /* Add the entry to the relocated list to update it. */ ++ entry->huge = false; ++ amdgpu_vm_bo_relocated(&entry->base); ++ } + return; + } + +- /* This intentionally wraps around if no bit is set */ +- *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); +- if (*frag >= max_frag) { +- *frag = max_frag; +- *frag_end = end & ~((1ULL << max_frag) - 1); +- } else { +- *frag_end = start + (1 << *frag); +- } ++ entry->huge = true; ++ amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); ++ ++ pde = (entry - parent->entries) * 8; ++ amdgpu_vm_update_func(p, parent->base.bo, pde, dst, 1, 0, flags); + } + + /** +@@ -1570,105 +1540,108 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, + * 0 for success, -EINVAL for failure. + */ + static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +- uint64_t start, uint64_t end, +- uint64_t dst, uint64_t flags) ++ uint64_t start, uint64_t end, ++ uint64_t dst, uint64_t flags) + { + struct amdgpu_device *adev = params->adev; ++ const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; + struct amdgpu_vm_pt_cursor cursor; +- uint64_t frag_start = start, frag_end; +- unsigned int frag; + +- /* figure out the initial fragment */ +- amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); +- +- /* walk over the address space and update the PTs */ +- amdgpu_vm_pt_start(adev, params->vm, start, &cursor); +- while (cursor.pfn < end) { ++ /* walk over the address space and update the page tables */ ++ for_each_amdgpu_vm_pt_leaf(adev, params->vm, start, end - 1, cursor) { + struct amdgpu_bo *pt = cursor.entry->base.bo; +- unsigned shift, parent_shift, num_entries; +- uint64_t incr, entry_end, pe_start; ++ uint64_t pe_start; ++ unsigned nptes; + +- if (!pt) ++ if (!pt || cursor.level != AMDGPU_VM_PTB) + return -ENOENT; + +- /* The root level can't be a huge page */ +- if (cursor.level == adev->vm_manager.root_level) { +- if (!amdgpu_vm_pt_descendant(adev, &cursor)) +- return -ENOENT; +- continue; +- } ++ if ((cursor.pfn & ~mask) == (end & ~mask)) ++ nptes = end - cursor.pfn; ++ else ++ nptes = AMDGPU_VM_PTE_COUNT(adev) - (cursor.pfn & mask); + +- /* First check if the entry is already handled */ +- if (cursor.pfn < frag_start) { +- cursor.entry->huge = true; +- amdgpu_vm_pt_next(adev, &cursor); ++ amdgpu_vm_handle_huge_pages(params, cursor.entry, cursor.parent, ++ nptes, dst, flags); ++ /* We don't need to update PTEs for huge pages */ ++ if (cursor.entry->huge) { ++ dst += nptes * AMDGPU_GPU_PAGE_SIZE; + continue; + } + +- /* If it isn't already handled it can't be a huge page */ +- if (cursor.entry->huge) { +- /* Add the entry to the relocated list to update it. */ +- cursor.entry->huge = false; +- amdgpu_vm_bo_relocated(&cursor.entry->base); +- } ++ pe_start = (cursor.pfn & mask) * 8; ++ amdgpu_vm_update_func(params, pt, pe_start, dst, nptes, ++ AMDGPU_GPU_PAGE_SIZE, flags); ++ dst += nptes * AMDGPU_GPU_PAGE_SIZE; ++ } + +- shift = amdgpu_vm_level_shift(adev, cursor.level); +- parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); +- if (adev->asic_type < CHIP_VEGA10) { +- /* No huge page support before GMC v9 */ +- if (cursor.level != AMDGPU_VM_PTB) { +- if (!amdgpu_vm_pt_descendant(adev, &cursor)) +- return -ENOENT; +- continue; +- } +- } else if (frag < shift) { +- /* We can't use this level when the fragment size is +- * smaller than the address shift. Go to the next +- * child entry and try again. +- */ +- if (!amdgpu_vm_pt_descendant(adev, &cursor)) +- return -ENOENT; +- continue; +- } else if (frag >= parent_shift) { +- /* If the fragment size is even larger than the parent +- * shift we should go up one level and check it again. +- */ +- if (!amdgpu_vm_pt_ancestor(&cursor)) +- return -ENOENT; +- continue; ++ return 0; ++} ++ ++/* ++ * amdgpu_vm_frag_ptes - add fragment information to PTEs ++ * ++ * @params: see amdgpu_pte_update_params definition ++ * @vm: requested vm ++ * @start: first PTE to handle ++ * @end: last PTE to handle ++ * @dst: addr those PTEs should point to ++ * @flags: hw mapping flags ++ * ++ * Returns: ++ * 0 for success, -EINVAL for failure. ++ */ ++static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, ++ uint64_t start, uint64_t end, ++ uint64_t dst, uint64_t flags) ++{ ++ /** ++ * The MC L1 TLB supports variable sized pages, based on a fragment ++ * field in the PTE. When this field is set to a non-zero value, page ++ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE ++ * flags are considered valid for all PTEs within the fragment range ++ * and corresponding mappings are assumed to be physically contiguous. ++ * ++ * The L1 TLB can store a single PTE for the whole fragment, ++ * significantly increasing the space available for translation ++ * caching. This leads to large improvements in throughput when the ++ * TLB is under pressure. ++ * ++ * The L2 TLB distributes small and large fragments into two ++ * asymmetric partitions. The large fragment cache is significantly ++ * larger. Thus, we try to use large fragments wherever possible. ++ * Userspace can support this by aligning virtual base address and ++ * allocation size to the fragment size. ++ */ ++ unsigned max_frag = params->adev->vm_manager.fragment_size; ++ int r; ++ ++ /* system pages are non continuously */ ++ if (params->src || !(flags & AMDGPU_PTE_VALID)) ++ return amdgpu_vm_update_ptes(params, start, end, dst, flags); ++ ++ while (start != end) { ++ uint64_t frag_flags, frag_end; ++ unsigned frag; ++ ++ /* This intentionally wraps around if no bit is set */ ++ frag = min((unsigned)ffs(start) - 1, ++ (unsigned)fls64(end - start) - 1); ++ if (frag >= max_frag) { ++ frag_flags = AMDGPU_PTE_FRAG(max_frag); ++ frag_end = end & ~((1ULL << max_frag) - 1); ++ } else { ++ frag_flags = AMDGPU_PTE_FRAG(frag); ++ frag_end = start + (1 << frag); + } + +- /* Looks good so far, calculate parameters for the update */ +- incr = AMDGPU_GPU_PAGE_SIZE << shift; +- num_entries = amdgpu_vm_num_entries(adev, cursor.level); +- pe_start = ((cursor.pfn >> shift) & (num_entries - 1)) * 8; +- entry_end = num_entries << shift; +- entry_end += cursor.pfn & ~(entry_end - 1); +- entry_end = min(entry_end, end); +- +- do { +- uint64_t upd_end = min(entry_end, frag_end); +- unsigned nptes = (upd_end - frag_start) >> shift; +- +- amdgpu_vm_update_huge(params, pt, cursor.level, +- pe_start, dst, nptes, incr, +- flags | AMDGPU_PTE_FRAG(frag)); +- +- pe_start += nptes * 8; +- dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift; +- +- frag_start = upd_end; +- if (frag_start >= frag_end) { +- /* figure out the next fragment */ +- amdgpu_vm_fragment(params, frag_start, end, +- flags, &frag, &frag_end); +- if (frag < shift) +- break; +- } +- } while (frag_start < entry_end); ++ r = amdgpu_vm_update_ptes(params, start, frag_end, dst, ++ flags | frag_flags); ++ if (r) ++ return r; + +- if (frag >= shift) +- amdgpu_vm_pt_next(adev, &cursor); ++ dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; ++ start = frag_end; + } + + return 0; +@@ -1730,8 +1703,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + + params.func = amdgpu_vm_cpu_set_ptes; + params.pages_addr = pages_addr; +- return amdgpu_vm_update_ptes(¶ms, start, last + 1, +- addr, flags); ++ return amdgpu_vm_frag_ptes(¶ms, start, last + 1, ++ addr, flags); + } + + ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); +@@ -1810,7 +1783,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + if (r) + goto error_free; + +- r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); ++ r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + if (r) + goto error_free; + +-- +2.17.1 + |