aboutsummaryrefslogtreecommitdiffstats
path: root/common/recipes-kernel/linux/linux-yocto-4.19.8/1623-drm-amdgpu-support-userptr-cross-VMAs-case-with-HMM.patch
diff options
context:
space:
mode:
Diffstat (limited to 'common/recipes-kernel/linux/linux-yocto-4.19.8/1623-drm-amdgpu-support-userptr-cross-VMAs-case-with-HMM.patch')
-rw-r--r--common/recipes-kernel/linux/linux-yocto-4.19.8/1623-drm-amdgpu-support-userptr-cross-VMAs-case-with-HMM.patch217
1 files changed, 217 insertions, 0 deletions
diff --git a/common/recipes-kernel/linux/linux-yocto-4.19.8/1623-drm-amdgpu-support-userptr-cross-VMAs-case-with-HMM.patch b/common/recipes-kernel/linux/linux-yocto-4.19.8/1623-drm-amdgpu-support-userptr-cross-VMAs-case-with-HMM.patch
new file mode 100644
index 00000000..f308981c
--- /dev/null
+++ b/common/recipes-kernel/linux/linux-yocto-4.19.8/1623-drm-amdgpu-support-userptr-cross-VMAs-case-with-HMM.patch
@@ -0,0 +1,217 @@
+From d3b6337326b7a89c305e9729db659787cbdd03fc Mon Sep 17 00:00:00 2001
+From: Philip Yang <Philip.Yang@amd.com>
+Date: Mon, 4 Mar 2019 14:41:03 -0500
+Subject: [PATCH 1623/2940] drm/amdgpu: support userptr cross VMAs case with
+ HMM
+
+userptr may cross two VMAs if the forked child process (not call exec
+after fork) malloc buffer, then free it, and then malloc larger size
+buf, kerenl will create new VMA adjacent to old VMA which was cloned
+from parent process, some pages of userptr are in the first VMA, the
+rest pages are in the second VMA.
+
+HMM expects range only have one VMA, loop over all VMAs in the address
+range, create multiple ranges to handle this case. See
+is_mergeable_anon_vma in mm/mmap.c for details.
+
+Change-Id: I0ca8c77e28deabccc139906f9ffee04b7e383314
+Signed-off-by: Philip Yang <Philip.Yang@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
+ 1 file changed, 91 insertions(+), 35 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+index 3ec146b6b9c8..4248b7ae64fa 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+@@ -846,7 +846,8 @@ struct amdgpu_ttm_tt {
+ struct task_struct *usertask;
+ uint32_t userflags;
+ #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+- struct hmm_range range;
++ struct hmm_range *ranges;
++ int nr_ranges;
+ #endif
+ };
+
+@@ -858,62 +859,108 @@ struct amdgpu_ttm_tt {
+ * once afterwards to stop HMM tracking
+ */
+ #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
++
++/* Support Userptr pages cross max 16 vmas */
++#define MAX_NR_VMAS (16)
++
+ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
+ {
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct mm_struct *mm = gtt->usertask->mm;
+- unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+- struct hmm_range *range = &gtt->range;
+- int r = 0, i;
++ unsigned long start = gtt->userptr;
++ unsigned long end = start + ttm->num_pages * PAGE_SIZE;
++ struct hmm_range *ranges;
++ struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS];
++ uint64_t *pfns, f;
++ int r = 0, i, nr_pages;
+
+ if (!mm) /* Happens during process shutdown */
+ return -ESRCH;
+
+- amdgpu_hmm_init_range(range);
+-
+ down_read(&mm->mmap_sem);
+
+- range->vma = find_vma(mm, gtt->userptr);
+- if (!range_in_vma(range->vma, gtt->userptr, end))
+- r = -EFAULT;
+- else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+- range->vma->vm_file)
++ /* user pages may cross multiple VMAs */
++ gtt->nr_ranges = 0;
++ do {
++ unsigned long vm_start;
++
++ if (gtt->nr_ranges >= MAX_NR_VMAS) {
++ DRM_ERROR("Too many VMAs in userptr range\n");
++ r = -EFAULT;
++ goto out;
++ }
++
++ vm_start = vma ? vma->vm_end : start;
++ vma = find_vma(mm, vm_start);
++ if (unlikely(!vma || vm_start < vma->vm_start)) {
++ r = -EFAULT;
++ goto out;
++ }
++ vmas[gtt->nr_ranges++] = vma;
++ } while (end > vma->vm_end);
++
++ DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n",
++ start, gtt->nr_ranges, ttm->num_pages);
++
++ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
++ vmas[0]->vm_file)) {
+ r = -EPERM;
+- if (r)
+ goto out;
++ }
+
+- range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
+- GFP_KERNEL);
+- if (range->pfns == NULL) {
++ ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL);
++ if (unlikely(!ranges)) {
+ r = -ENOMEM;
+ goto out;
+ }
+- range->start = gtt->userptr;
+- range->end = end;
+
+- range->pfns[0] = range->flags[HMM_PFN_VALID];
+- range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
+- 0 : range->flags[HMM_PFN_WRITE];
+- for (i = 1; i < ttm->num_pages; i++)
+- range->pfns[i] = range->pfns[0];
++ pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
++ if (unlikely(!pfns)) {
++ r = -ENOMEM;
++ goto out_free_ranges;
++ }
++
++ for (i = 0; i < gtt->nr_ranges; i++)
++ amdgpu_hmm_init_range(&ranges[i]);
++
++ f = ranges[0].flags[HMM_PFN_VALID];
++ f |= amdgpu_ttm_tt_is_readonly(ttm) ?
++ 0 : ranges[0].flags[HMM_PFN_WRITE];
++ memset64(pfns, f, ttm->num_pages);
++
++ for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) {
++ ranges[i].vma = vmas[i];
++ ranges[i].start = max(start, vmas[i]->vm_start);
++ ranges[i].end = min(end, vmas[i]->vm_end);
++ ranges[i].pfns = pfns + nr_pages;
++ nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE;
++
++ r = hmm_vma_fault(&ranges[i], true);
++ if (unlikely(r))
++ break;
++ }
++ if (unlikely(r)) {
++ while (i--)
++ hmm_vma_range_done(&ranges[i]);
+
+- /* This may trigger page table update */
+- r = hmm_vma_fault(range, true);
+- if (r)
+ goto out_free_pfns;
++ }
+
+ up_read(&mm->mmap_sem);
+
+ for (i = 0; i < ttm->num_pages; i++)
+- pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
++ pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]);
++ gtt->ranges = ranges;
+
+ return 0;
+
+ out_free_pfns:
+- kvfree(range->pfns);
+- range->pfns = NULL;
++ kvfree(pfns);
++out_free_ranges:
++ kvfree(ranges);
+ out:
+ up_read(&mm->mmap_sem);
++
+ return r;
+ }
+
+@@ -927,15 +974,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+ {
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ bool r = false;
++ int i;
+
+ if (!gtt || !gtt->userptr)
+ return false;
+
+- WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");
+- if (gtt->range.pfns) {
+- r = hmm_vma_range_done(&gtt->range);
+- kvfree(gtt->range.pfns);
+- gtt->range.pfns = NULL;
++ DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n",
++ gtt->userptr, gtt->nr_ranges, ttm->num_pages);
++
++ WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns,
++ "No user pages to check\n");
++
++ if (gtt->ranges) {
++ for (i = 0; i < gtt->nr_ranges; i++)
++ r |= hmm_vma_range_done(&gtt->ranges[i]);
++ kvfree(gtt->ranges[0].pfns);
++ kvfree(gtt->ranges);
++ gtt->ranges = NULL;
+ }
+
+ return r;
+@@ -1019,8 +1074,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
+ sg_free_table(ttm->sg);
+
+ #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
+- if (gtt->range.pfns &&
+- ttm->pages[0] == hmm_pfn_to_page(&gtt->range, gtt->range.pfns[0]))
++ if (gtt->ranges &&
++ ttm->pages[0] == hmm_pfn_to_page(&gtt->ranges[0],
++ gtt->ranges[0].pfns[0]))
+ WARN_ONCE(1, "Missing get_user_page_done\n");
+ #endif
+ }
+--
+2.17.1
+