diff options
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/vdpa.c | 122 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 12 |
2 files changed, 80 insertions, 54 deletions
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index a54b60d6623f..e172c2efc663 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -527,6 +527,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, r = iommu_map(v->domain, iova, pa, size, perm_to_iommu_flags(perm)); + if (r) + vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1); + return r; } @@ -552,21 +555,19 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, struct vhost_dev *dev = &v->vdev; struct vhost_iotlb *iotlb = dev->iotlb; struct page **page_list; - unsigned long list_size = PAGE_SIZE / sizeof(struct page *); + struct vm_area_struct **vmas; unsigned int gup_flags = FOLL_LONGTERM; - unsigned long npages, cur_base, map_pfn, last_pfn = 0; - unsigned long locked, lock_limit, pinned, i; + unsigned long map_pfn, last_pfn = 0; + unsigned long npages, lock_limit; + unsigned long i, nmap = 0; u64 iova = msg->iova; + long pinned; int ret = 0; if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; - page_list = (struct page **) __get_free_page(GFP_KERNEL); - if (!page_list) - return -ENOMEM; - if (msg->perm & VHOST_ACCESS_WO) gup_flags |= FOLL_WRITE; @@ -574,61 +575,86 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, if (!npages) return -EINVAL; + page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); + vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!page_list || !vmas) { + ret = -ENOMEM; + goto free; + } + mmap_read_lock(dev->mm); - locked = atomic64_add_return(npages, &dev->mm->pinned_vm); lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (locked > lock_limit) { + if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) { ret = -ENOMEM; - goto out; + goto unlock; } - cur_base = msg->uaddr & PAGE_MASK; - iova &= PAGE_MASK; + pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, + page_list, vmas); + if (npages != pinned) { + if (pinned < 0) { + ret = pinned; + } else { + unpin_user_pages(page_list, pinned); + ret = -ENOMEM; + } + goto unlock; + } - while (npages) { - pinned = min_t(unsigned long, npages, list_size); - ret = pin_user_pages(cur_base, pinned, - gup_flags, page_list, NULL); - if (ret != pinned) - goto out; - - if (!last_pfn) - map_pfn = page_to_pfn(page_list[0]); - - for (i = 0; i < ret; i++) { - unsigned long this_pfn = page_to_pfn(page_list[i]); - u64 csize; - - if (last_pfn && (this_pfn != last_pfn + 1)) { - /* Pin a contiguous chunk of memory */ - csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; - if (vhost_vdpa_map(v, iova, csize, - map_pfn << PAGE_SHIFT, - msg->perm)) - goto out; - map_pfn = this_pfn; - iova += csize; + iova &= PAGE_MASK; + map_pfn = page_to_pfn(page_list[0]); + + /* One more iteration to avoid extra vdpa_map() call out of loop. */ + for (i = 0; i <= npages; i++) { + unsigned long this_pfn; + u64 csize; + + /* The last chunk may have no valid PFN next to it */ + this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; + + if (last_pfn && (this_pfn == -1UL || + this_pfn != last_pfn + 1)) { + /* Pin a contiguous chunk of memory */ + csize = last_pfn - map_pfn + 1; + ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT, + map_pfn << PAGE_SHIFT, + msg->perm); + if (ret) { + /* + * Unpin the rest chunks of memory on the + * flight with no corresponding vdpa_map() + * calls having been made yet. On the other + * hand, vdpa_unmap() in the failure path + * is in charge of accounting the number of + * pinned pages for its own. + * This asymmetrical pattern of accounting + * is for efficiency to pin all pages at + * once, while there is no other callsite + * of vdpa_map() than here above. + */ + unpin_user_pages(&page_list[nmap], + npages - nmap); + goto out; } - - last_pfn = this_pfn; + atomic64_add(csize, &dev->mm->pinned_vm); + nmap += csize; + iova += csize << PAGE_SHIFT; + map_pfn = this_pfn; } - - cur_base += ret << PAGE_SHIFT; - npages -= ret; + last_pfn = this_pfn; } - /* Pin the rest chunk */ - ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, - map_pfn << PAGE_SHIFT, msg->perm); + WARN_ON(nmap != npages); out: - if (ret) { + if (ret) vhost_vdpa_unmap(v, msg->iova, msg->size); - atomic64_sub(npages, &dev->mm->pinned_vm); - } +unlock: mmap_read_unlock(dev->mm); - free_page((unsigned long)page_list); +free: + kvfree(vmas); + kvfree(page_list); return ret; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index d7b8df3edffc..5f0030e08688 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1283,6 +1283,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, vring_used_t __user *used) { + /* If an IOTLB device is present, the vring addresses are + * GIOVAs. Access validation occurs at prefetch time. */ + if (vq->iotlb) + return true; + return access_ok(desc, vhost_get_desc_size(vq, num)) && access_ok(avail, vhost_get_avail_size(vq, num)) && access_ok(used, vhost_get_used_size(vq, num)); @@ -1376,10 +1381,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq) if (!vq_log_access_ok(vq, vq->log_base)) return false; - /* Access validation occurs at prefetch time with IOTLB */ - if (vq->iotlb) - return true; - return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); @@ -1511,8 +1512,7 @@ static long vhost_vring_set_addr(struct vhost_dev *d, /* Also validate log access for used ring if enabled. */ if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) && !log_access_ok(vq->log_base, a.log_guest_addr, - sizeof *vq->used + - vq->num * sizeof *vq->used->ring)) + vhost_get_used_size(vq, vq->num))) return -EINVAL; } |