From 20b395a08bdc56f0bb9e34d3840718583159352b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 23 Feb 2016 12:36:59 +0100 Subject: [PATCH 0363/1110] drm/amdgpu: move get_user_pages out of amdgpu_ttm_tt_pin_userptr v6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That avoids lock inversion between the BO reservation lock and the anon_vma lock. v2: * Changed amdgpu_bo_list_entry.user_pages to an array of pointers * Lock mmap_sem only for get_user_pages * Added invalidation of unbound userpointer BOs * Fixed memory leak and page reference leak v3 (chk): * Revert locking mmap_sem only for_get user_pages * Revert adding invalidation of unbound userpointer BOs * Sanitize and fix error handling v4 (chk): * Init userpages pointer everywhere. * Fix error handling when get_user_pages() fails. * Add invalidation of unbound userpointer BOs again. v5 (chk): * Add maximum number of tries. v6 (chk): * Fix error handling when we run out of tries. Signed-off-by: Christian König Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling (v4) Acked-by: Alex Deucher Signed-off-by: Kalyan Alle --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 118 ++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 23 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 53 +++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 + 6 files changed, 176 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0bc033e..66aef04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -432,6 +432,8 @@ struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; uint32_t priority; + struct page **user_pages; + int user_invalidated; }; struct amdgpu_bo_va_mapping { @@ -2329,11 +2331,14 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, struct amdgpu_ring **out_ring); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); +int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); +bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, + int *last_invalidated); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 82f8caf..17a2f83 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -201,6 +201,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, list_add_tail(&list->array[i].tv.head, &bucket[priority]); + list->array[i].user_pages = NULL; } /* Connect the sorted buckets in the output list. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a92a30a..62027c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -152,6 +152,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.priority = 0; p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.shared = true; + p->uf_entry.user_pages = NULL; drm_gem_object_unreference_unlocked(gobj); return 0; @@ -344,6 +345,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = lobj->robj; + bool binding_userptr = false; struct mm_struct *usermm; uint32_t domain; @@ -351,6 +353,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; + /* Check if we have user pages and nobody bound the BO already */ + if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { + size_t size = sizeof(struct page *); + + size *= bo->tbo.ttm->num_pages; + memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); + binding_userptr = true; + } + if (bo->pin_count) continue; @@ -382,6 +393,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, } return r; } + + if (binding_userptr) { + drm_free_large(lobj->user_pages); + lobj->user_pages = NULL; + } } return 0; } @@ -389,8 +405,10 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) { struct amdgpu_cs_buckets buckets; + struct amdgpu_bo_list_entry *e; struct list_head duplicates; bool need_mmap_lock = false; + unsigned i, tries = 10; int i, r; @@ -413,9 +431,81 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) if (need_mmap_lock) down_read(¤t->mm->mmap_sem); - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); - if (unlikely(r != 0)) - goto error_reserve; + while (1) { + struct list_head need_pages; + unsigned i; + + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) + goto error_free_pages; + + /* Without a BO list we don't have userptr BOs */ + if (!p->bo_list) + break; + + INIT_LIST_HEAD(&need_pages); + for (i = p->bo_list->first_userptr; + i < p->bo_list->num_entries; ++i) { + + e = &p->bo_list->array[i]; + + if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, + &e->user_invalidated) && e->user_pages) { + + /* We acquired a page array, but somebody + * invalidated it. Free it an try again + */ + release_pages(e->user_pages, + e->robj->tbo.ttm->num_pages, + false); + drm_free_large(e->user_pages); + e->user_pages = NULL; + } + + if (e->robj->tbo.ttm->state != tt_bound && + !e->user_pages) { + list_del(&e->tv.head); + list_add(&e->tv.head, &need_pages); + + amdgpu_bo_unreserve(e->robj); + } + } + + if (list_empty(&need_pages)) + break; + + /* Unreserve everything again. */ + ttm_eu_backoff_reservation(&p->ticket, &p->validated); + + /* We tried to often, just abort */ + if (!--tries) { + r = -EDEADLK; + goto error_free_pages; + } + + /* Fill the page arrays for all useptrs. */ + list_for_each_entry(e, &need_pages, tv.head) { + struct ttm_tt *ttm = e->robj->tbo.ttm; + + e->user_pages = drm_calloc_large(ttm->num_pages, + sizeof(struct page*)); + if (!e->user_pages) { + r = -ENOMEM; + goto error_free_pages; + } + + r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); + if (r) { + drm_free_large(e->user_pages); + e->user_pages = NULL; + goto error_free_pages; + } + } + + /* And try again. */ + list_splice(&need_pages, &p->validated); + } amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); @@ -445,9 +535,25 @@ error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); -error_reserve: - if (need_mmap_lock) - up_read(¤t->mm->mmap_sem); +error_free_pages: + + if (need_mmap_lock) + up_read(¤t->mm->mmap_sem); + + if (p->bo_list) { + for (i = p->bo_list->first_userptr; + i < p->bo_list->num_entries; ++i) { + e = &p->bo_list->array[i]; + + if (!e->user_pages) + continue; + + release_pages(e->user_pages, + e->robj->tbo.ttm->num_pages, + false); + drm_free_large(e->user_pages); + } + } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6270a20..cbacf72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -274,18 +274,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { down_read(¤t->mm->mmap_sem); + + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, + bo->tbo.ttm->pages); + if (r) + goto unlock_mmap_sem; + r = amdgpu_bo_reserve(bo, true); - if (r) { - up_read(¤t->mm->mmap_sem); - goto release_object; - } + if (r) + goto free_pages; amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); amdgpu_bo_unreserve(bo); - up_read(¤t->mm->mmap_sem); if (r) - goto release_object; + goto free_pages; + + up_read(¤t->mm->mmap_sem); } r = drm_gem_handle_create(filp, gobj, &handle); @@ -297,6 +302,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, args->handle = handle; return 0; +free_pages: + release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false); + +unlock_mmap_sem: + up_read(¤t->mm->mmap_sem); + release_object: drm_gem_object_unreference_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 051cd39..6bbd395 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -508,22 +508,18 @@ struct amdgpu_ttm_tt { uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; + atomic_t mmu_invalidations; }; -/* prepare the sg table with the user pages */ -static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) +int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned pinned = 0, nents; - int r; - int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); - enum dma_data_direction direction = write ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + unsigned pinned = 0; + int r; if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* check that we only pin down anonymous memory + /* check that we only use anonymous memory to prevent problems with writeback */ unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; @@ -536,7 +532,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) do { unsigned num_pages = ttm->num_pages - pinned; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **pages = ttm->pages + pinned; + struct page **p = pages + pinned; struct amdgpu_ttm_gup_task_list guptask; guptask.task = current; @@ -545,7 +541,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) spin_unlock(>t->guptasklock); r = get_user_pages(current, current->mm, userptr, num_pages, - write, 0, pages, NULL); + write, 0, p, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); @@ -558,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) } while (pinned < ttm->num_pages); + return 0; + +release_pages: + release_pages(pages, pinned, 0); + return r; +} + +/* prepare the sg table with the user pages */ +static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) +{ + struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev); + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned nents; + int r; + + int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, ttm->num_pages << PAGE_SHIFT, GFP_KERNEL); @@ -576,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) release_sg: kfree(ttm->sg); - -release_pages: - release_pages(ttm->pages, pinned, 0); return r; } @@ -803,6 +815,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->userflags = flags; spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); + atomic_set(>t->mmu_invalidations, 0); return 0; } @@ -840,9 +853,21 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, } spin_unlock(>t->guptasklock); + atomic_inc(>t->mmu_invalidations); + return true; } +bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, + int *last_invalidated) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int prev_invalidated = *last_invalidated; + + *last_invalidated = atomic_read(>t->mmu_invalidations); + return prev_invalidated != *last_invalidated; +} + bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f139cea..99afc64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -94,6 +94,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, entry->priority = 0; entry->tv.bo = &vm->page_directory->tbo; entry->tv.shared = true; + entry->user_pages = NULL; list_add(&entry->tv.head, validated); } @@ -1191,6 +1192,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, entry->priority = 0; entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; + entry->user_pages = NULL; vm->page_tables[pt_idx].addr = 0; } -- 2.7.4