aboutsummaryrefslogtreecommitdiffstats
path: root/lib/scatterlist.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/scatterlist.c')
-rw-r--r--lib/scatterlist.c555
1 files changed, 477 insertions, 78 deletions
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index eacb82468437..68b45c82c37a 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -9,6 +9,8 @@
#include <linux/scatterlist.h>
#include <linux/highmem.h>
#include <linux/kmemleak.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
/**
* sg_next - return the next scatterlist entry in a list
@@ -38,7 +40,7 @@ EXPORT_SYMBOL(sg_next);
* @sg: The scatterlist
*
* Description:
- * Allows to know how many entries are in sg, taking into acount
+ * Allows to know how many entries are in sg, taking into account
* chaining as well
*
**/
@@ -59,7 +61,7 @@ EXPORT_SYMBOL(sg_nents);
*
* Description:
* Determines the number of entries in sg that are required to meet
- * the supplied length, taking into acount chaining as well
+ * the supplied length, taking into account chaining as well
*
* Returns:
* the number of sg entries needed, negative error on failure
@@ -179,8 +181,10 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
* __sg_free_table - Free a previously mapped sg table
* @table: The sg table header to use
* @max_ents: The maximum number of entries per single scatterlist
- * @skip_first_chunk: don't free the (preallocated) first scatterlist chunk
+ * @nents_first_chunk: Number of entries int the (preallocated) first
+ * scatterlist chunk, 0 means no such preallocated first chunk
* @free_fn: Free function
+ * @num_ents: Number of entries in the table
*
* Description:
* Free an sg table previously allocated and setup with
@@ -189,16 +193,18 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents)
*
**/
void __sg_free_table(struct sg_table *table, unsigned int max_ents,
- bool skip_first_chunk, sg_free_fn *free_fn)
+ unsigned int nents_first_chunk, sg_free_fn *free_fn,
+ unsigned int num_ents)
{
struct scatterlist *sgl, *next;
+ unsigned curr_max_ents = nents_first_chunk ?: max_ents;
if (unlikely(!table->sgl))
return;
sgl = table->sgl;
- while (table->orig_nents) {
- unsigned int alloc_size = table->orig_nents;
+ while (num_ents) {
+ unsigned int alloc_size = num_ents;
unsigned int sg_size;
/*
@@ -207,21 +213,22 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
* sg_size is then one less than alloc size, since the last
* element is the chain pointer.
*/
- if (alloc_size > max_ents) {
- next = sg_chain_ptr(&sgl[max_ents - 1]);
- alloc_size = max_ents;
+ if (alloc_size > curr_max_ents) {
+ next = sg_chain_ptr(&sgl[curr_max_ents - 1]);
+ alloc_size = curr_max_ents;
sg_size = alloc_size - 1;
} else {
sg_size = alloc_size;
next = NULL;
}
- table->orig_nents -= sg_size;
- if (skip_first_chunk)
- skip_first_chunk = false;
+ num_ents -= sg_size;
+ if (nents_first_chunk)
+ nents_first_chunk = 0;
else
free_fn(sgl, alloc_size);
sgl = next;
+ curr_max_ents = max_ents;
}
table->sgl = NULL;
@@ -229,13 +236,27 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents,
EXPORT_SYMBOL(__sg_free_table);
/**
+ * sg_free_append_table - Free a previously allocated append sg table.
+ * @table: The mapped sg append table header
+ *
+ **/
+void sg_free_append_table(struct sg_append_table *table)
+{
+ __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
+ table->total_nents);
+}
+EXPORT_SYMBOL(sg_free_append_table);
+
+
+/**
* sg_free_table - Free a previously allocated sg table
* @table: The mapped sg table header
*
**/
void sg_free_table(struct sg_table *table)
{
- __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
+ __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree,
+ table->orig_nents);
}
EXPORT_SYMBOL(sg_free_table);
@@ -244,6 +265,9 @@ EXPORT_SYMBOL(sg_free_table);
* @table: The sg table header to use
* @nents: Number of entries in sg list
* @max_ents: The maximum number of entries the allocator returns per call
+ * @first_chunk: first SGL if preallocated (may be %NULL)
+ * @nents_first_chunk: Number of entries in the (preallocated) first
+ * scatterlist chunk, 0 means no such preallocated chunk provided by user
* @gfp_mask: GFP allocation mask
* @alloc_fn: Allocator to use
*
@@ -260,10 +284,13 @@ EXPORT_SYMBOL(sg_free_table);
**/
int __sg_alloc_table(struct sg_table *table, unsigned int nents,
unsigned int max_ents, struct scatterlist *first_chunk,
- gfp_t gfp_mask, sg_alloc_fn *alloc_fn)
+ unsigned int nents_first_chunk, gfp_t gfp_mask,
+ sg_alloc_fn *alloc_fn)
{
struct scatterlist *sg, *prv;
unsigned int left;
+ unsigned curr_max_ents = nents_first_chunk ?: max_ents;
+ unsigned prv_max_ents;
memset(table, 0, sizeof(*table));
@@ -279,8 +306,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
do {
unsigned int sg_size, alloc_size = left;
- if (alloc_size > max_ents) {
- alloc_size = max_ents;
+ if (alloc_size > curr_max_ents) {
+ alloc_size = curr_max_ents;
sg_size = alloc_size - 1;
} else
sg_size = alloc_size;
@@ -303,7 +330,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
if (prv)
table->nents = ++table->orig_nents;
- return -ENOMEM;
+ return -ENOMEM;
}
sg_init_table(sg, alloc_size);
@@ -314,7 +341,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
* If this is not the first mapping, chain previous part.
*/
if (prv)
- sg_chain(prv, max_ents, sg);
+ sg_chain(prv, prv_max_ents, sg);
else
table->sgl = sg;
@@ -325,6 +352,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
sg_mark_end(&sg[sg_size - 1]);
prv = sg;
+ prv_max_ents = curr_max_ents;
+ curr_max_ents = max_ents;
} while (left);
return 0;
@@ -347,66 +376,141 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
int ret;
ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
- NULL, gfp_mask, sg_kmalloc);
+ NULL, 0, gfp_mask, sg_kmalloc);
if (unlikely(ret))
- __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
-
+ sg_free_table(table);
return ret;
}
EXPORT_SYMBOL(sg_alloc_table);
+static struct scatterlist *get_next_sg(struct sg_append_table *table,
+ struct scatterlist *cur,
+ unsigned long needed_sges,
+ gfp_t gfp_mask)
+{
+ struct scatterlist *new_sg, *next_sg;
+ unsigned int alloc_size;
+
+ if (cur) {
+ next_sg = sg_next(cur);
+ /* Check if last entry should be keeped for chainning */
+ if (!sg_is_last(next_sg) || needed_sges == 1)
+ return next_sg;
+ }
+
+ alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
+ new_sg = sg_kmalloc(alloc_size, gfp_mask);
+ if (!new_sg)
+ return ERR_PTR(-ENOMEM);
+ sg_init_table(new_sg, alloc_size);
+ if (cur) {
+ table->total_nents += alloc_size - 1;
+ __sg_chain(next_sg, new_sg);
+ } else {
+ table->sgt.sgl = new_sg;
+ table->total_nents = alloc_size;
+ }
+ return new_sg;
+}
+
+static bool pages_are_mergeable(struct page *a, struct page *b)
+{
+ if (page_to_pfn(a) != page_to_pfn(b) + 1)
+ return false;
+ if (!zone_device_pages_have_same_pgmap(a, b))
+ return false;
+ return true;
+}
+
/**
- * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
- * an array of pages
- * @sgt: The sg table header to use
- * @pages: Pointer to an array of page pointers
- * @n_pages: Number of pages in the pages array
+ * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
+ * table from an array of pages
+ * @sgt_append: The sg append table to use
+ * @pages: Pointer to an array of page pointers
+ * @n_pages: Number of pages in the pages array
* @offset: Offset from start of the first page to the start of a buffer
* @size: Number of valid bytes in the buffer (after offset)
- * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @max_segment: Maximum size of a scatterlist element in bytes
+ * @left_pages: Left pages caller have to set after this call
* @gfp_mask: GFP allocation mask
*
- * Description:
- * Allocate and initialize an sg table from a list of pages. Contiguous
- * ranges of the pages are squashed into a single scatterlist node up to the
- * maximum size specified in @max_segment. An user may provide an offset at a
- * start and a size of valid data in a buffer specified by the page array.
- * The returned sg table is released by sg_free_table.
+ * Description:
+ * In the first call it allocate and initialize an sg table from a list of
+ * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
+ * the pages are squashed into a single scatterlist entry up to the maximum
+ * size specified in @max_segment. A user may provide an offset at a start
+ * and a size of valid data in a buffer specified by the page array. The
+ * returned sg table is released by sg_free_append_table
*
* Returns:
* 0 on success, negative error on failure
+ *
+ * Notes:
+ * If this function returns non-0 (eg failure), the caller must call
+ * sg_free_append_table() to cleanup any leftover allocations.
+ *
+ * In the fist call, sgt_append must by initialized.
*/
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- unsigned int n_pages, unsigned int offset,
- unsigned long size, unsigned int max_segment,
- gfp_t gfp_mask)
+int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
+ struct page **pages, unsigned int n_pages, unsigned int offset,
+ unsigned long size, unsigned int max_segment,
+ unsigned int left_pages, gfp_t gfp_mask)
{
- unsigned int chunks, cur_page, seg_len, i;
- int ret;
- struct scatterlist *s;
+ unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
+ unsigned int added_nents = 0;
+ struct scatterlist *s = sgt_append->prv;
+ struct page *last_pg;
- if (WARN_ON(!max_segment || offset_in_page(max_segment)))
+ /*
+ * The algorithm below requires max_segment to be aligned to PAGE_SIZE
+ * otherwise it can overshoot.
+ */
+ max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
+ if (WARN_ON(max_segment < PAGE_SIZE))
return -EINVAL;
+ if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
+ return -EOPNOTSUPP;
+
+ if (sgt_append->prv) {
+ unsigned long next_pfn = (page_to_phys(sg_page(sgt_append->prv)) +
+ sgt_append->prv->offset + sgt_append->prv->length) / PAGE_SIZE;
+
+ if (WARN_ON(offset))
+ return -EINVAL;
+
+ /* Merge contiguous pages into the last SG */
+ prv_len = sgt_append->prv->length;
+ if (page_to_pfn(pages[0]) == next_pfn) {
+ last_pg = pfn_to_page(next_pfn - 1);
+ while (n_pages && pages_are_mergeable(pages[0], last_pg)) {
+ if (sgt_append->prv->length + PAGE_SIZE > max_segment)
+ break;
+ sgt_append->prv->length += PAGE_SIZE;
+ last_pg = pages[0];
+ pages++;
+ n_pages--;
+ }
+ if (!n_pages)
+ goto out;
+ }
+ }
+
/* compute number of contiguous chunks */
chunks = 1;
seg_len = 0;
for (i = 1; i < n_pages; i++) {
seg_len += PAGE_SIZE;
if (seg_len >= max_segment ||
- page_to_pfn(pages[i]) != page_to_pfn(pages[i - 1]) + 1) {
+ !pages_are_mergeable(pages[i], pages[i - 1])) {
chunks++;
seg_len = 0;
}
}
- ret = sg_alloc_table(sgt, chunks, gfp_mask);
- if (unlikely(ret))
- return ret;
-
/* merging chunks and putting them into the scatterlist */
cur_page = 0;
- for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+ for (i = 0; i < chunks; i++) {
unsigned int j, chunk_size;
/* look for the end of the current chunk */
@@ -414,51 +518,82 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
for (j = cur_page + 1; j < n_pages; j++) {
seg_len += PAGE_SIZE;
if (seg_len >= max_segment ||
- page_to_pfn(pages[j]) !=
- page_to_pfn(pages[j - 1]) + 1)
+ !pages_are_mergeable(pages[j], pages[j - 1]))
break;
}
+ /* Pass how many chunks might be left */
+ s = get_next_sg(sgt_append, s, chunks - i + left_pages,
+ gfp_mask);
+ if (IS_ERR(s)) {
+ /*
+ * Adjust entry length to be as before function was
+ * called.
+ */
+ if (sgt_append->prv)
+ sgt_append->prv->length = prv_len;
+ return PTR_ERR(s);
+ }
chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
sg_set_page(s, pages[cur_page],
min_t(unsigned long, size, chunk_size), offset);
+ added_nents++;
size -= chunk_size;
offset = 0;
cur_page = j;
}
-
+ sgt_append->sgt.nents += added_nents;
+ sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
+ sgt_append->prv = s;
+out:
+ if (!left_pages)
+ sg_mark_end(s);
return 0;
}
-EXPORT_SYMBOL(__sg_alloc_table_from_pages);
+EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
/**
- * sg_alloc_table_from_pages - Allocate and initialize an sg table from
- * an array of pages
+ * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
+ * an array of pages and given maximum
+ * segment.
* @sgt: The sg table header to use
* @pages: Pointer to an array of page pointers
* @n_pages: Number of pages in the pages array
* @offset: Offset from start of the first page to the start of a buffer
* @size: Number of valid bytes in the buffer (after offset)
+ * @max_segment: Maximum size of a scatterlist element in bytes
* @gfp_mask: GFP allocation mask
*
* Description:
* Allocate and initialize an sg table from a list of pages. Contiguous
- * ranges of the pages are squashed into a single scatterlist node. A user
- * may provide an offset at a start and a size of valid data in a buffer
- * specified by the page array. The returned sg table is released by
- * sg_free_table.
+ * ranges of the pages are squashed into a single scatterlist node up to the
+ * maximum size specified in @max_segment. A user may provide an offset at a
+ * start and a size of valid data in a buffer specified by the page array.
*
- * Returns:
+ * The returned sg table is released by sg_free_table.
+ *
+ * Returns:
* 0 on success, negative error on failure
*/
-int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- unsigned int n_pages, unsigned int offset,
- unsigned long size, gfp_t gfp_mask)
+int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
+ unsigned int n_pages, unsigned int offset,
+ unsigned long size, unsigned int max_segment,
+ gfp_t gfp_mask)
{
- return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size,
- SCATTERLIST_MAX_SEGMENT, gfp_mask);
+ struct sg_append_table append = {};
+ int err;
+
+ err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
+ size, max_segment, 0, gfp_mask);
+ if (err) {
+ sg_free_append_table(&append);
+ return err;
+ }
+ memcpy(sgt, &append.sgt, sizeof(*sgt));
+ WARN_ON(append.total_nents != sgt->orig_nents);
+ return 0;
}
-EXPORT_SYMBOL(sg_alloc_table_from_pages);
+EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
#ifdef CONFIG_SGL_ALLOC
@@ -494,7 +629,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
nalloc++;
}
sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
- (gfp & ~GFP_DMA) | __GFP_ZERO);
+ gfp & ~GFP_DMA);
if (!sgl)
return NULL;
@@ -504,7 +639,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
elem_len = min_t(u64, length, PAGE_SIZE << order);
page = alloc_pages(gfp, order);
if (!page) {
- sgl_free(sgl);
+ sgl_free_order(sgl, order);
return NULL;
}
@@ -654,6 +789,7 @@ EXPORT_SYMBOL(__sg_page_iter_dma_next);
* @miter: sg mapping iter to be started
* @sgl: sg list to iterate over
* @nents: number of sg entries
+ * @flags: sg iterator flags
*
* Description:
* Starts mapping iterator @miter.
@@ -706,8 +842,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter)
* stops @miter.
*
* Context:
- * Don't care if @miter is stopped, or not proceeded yet.
- * Otherwise, preemption disabled if the SG_MITER_ATOMIC is set.
+ * Don't care.
*
* Returns:
* true if @miter contains the valid mapping. false if end of sg
@@ -743,8 +878,7 @@ EXPORT_SYMBOL(sg_miter_skip);
* @miter->addr and @miter->length point to the current mapping.
*
* Context:
- * Preemption disabled if SG_MITER_ATOMIC. Preemption must stay disabled
- * till @miter is stopped. May sleep if !SG_MITER_ATOMIC.
+ * May sleep if !SG_MITER_ATOMIC.
*
* Returns:
* true if @miter contains the next mapping. false if end of sg
@@ -784,8 +918,7 @@ EXPORT_SYMBOL(sg_miter_next);
* need to be released during iteration.
*
* Context:
- * Preemption disabled if the SG_MITER_ATOMIC is set. Don't care
- * otherwise.
+ * Don't care otherwise.
*/
void sg_miter_stop(struct sg_mapping_iter *miter)
{
@@ -796,12 +929,11 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
miter->__offset += miter->consumed;
miter->__remaining -= miter->consumed;
- if ((miter->__flags & SG_MITER_TO_SG) &&
- !PageSlab(miter->page))
- flush_kernel_dcache_page(miter->page);
+ if (miter->__flags & SG_MITER_TO_SG)
+ flush_dcache_page(miter->page);
if (miter->__flags & SG_MITER_ATOMIC) {
- WARN_ON_ONCE(preemptible());
+ WARN_ON_ONCE(!pagefault_disabled());
kunmap_atomic(miter->addr);
} else
kunmap(miter->page);
@@ -822,7 +954,7 @@ EXPORT_SYMBOL(sg_miter_stop);
* @buflen: The number of bytes to copy
* @skip: Number of bytes to skip before copying
* @to_buffer: transfer direction (true == from an sg list to a
- * buffer, false == from a buffer to an sg list
+ * buffer, false == from a buffer to an sg list)
*
* Returns the number of copied bytes.
*
@@ -842,7 +974,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
sg_miter_start(&miter, sgl, nents, sg_flags);
if (!sg_miter_skip(&miter, skip))
- return false;
+ return 0;
while ((offset < buflen) && sg_miter_next(&miter)) {
unsigned int len;
@@ -967,3 +1099,270 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
return offset;
}
EXPORT_SYMBOL(sg_zero_buffer);
+
+/*
+ * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
+ * iterators, and add them to the scatterlist.
+ */
+static ssize_t extract_user_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ iov_iter_extraction_t extraction_flags)
+{
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ struct page **pages;
+ unsigned int npages;
+ ssize_t ret = 0, res;
+ size_t len, off;
+
+ /* We decant the page list into the tail of the scatterlist */
+ pages = (void *)sgtable->sgl +
+ array_size(sg_max, sizeof(struct scatterlist));
+ pages -= sg_max;
+
+ do {
+ res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
+ extraction_flags, &off);
+ if (res < 0)
+ goto failed;
+
+ len = res;
+ maxsize -= len;
+ ret += len;
+ npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
+ sg_max -= npages;
+
+ for (; npages > 0; npages--) {
+ struct page *page = *pages;
+ size_t seg = min_t(size_t, PAGE_SIZE - off, len);
+
+ *pages++ = NULL;
+ sg_set_page(sg, page, seg, off);
+ sgtable->nents++;
+ sg++;
+ len -= seg;
+ off = 0;
+ }
+ } while (maxsize > 0 && sg_max > 0);
+
+ return ret;
+
+failed:
+ while (sgtable->nents > sgtable->orig_nents)
+ unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+ return res;
+}
+
+/*
+ * Extract up to sg_max pages from a BVEC-type iterator and add them to the
+ * scatterlist. The pages are not pinned.
+ */
+static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ iov_iter_extraction_t extraction_flags)
+{
+ const struct bio_vec *bv = iter->bvec;
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ unsigned long start = iter->iov_offset;
+ unsigned int i;
+ ssize_t ret = 0;
+
+ for (i = 0; i < iter->nr_segs; i++) {
+ size_t off, len;
+
+ len = bv[i].bv_len;
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ len = min_t(size_t, maxsize, len - start);
+ off = bv[i].bv_offset + start;
+
+ sg_set_page(sg, bv[i].bv_page, len, off);
+ sgtable->nents++;
+ sg++;
+ sg_max--;
+
+ ret += len;
+ maxsize -= len;
+ if (maxsize <= 0 || sg_max == 0)
+ break;
+ start = 0;
+ }
+
+ if (ret > 0)
+ iov_iter_advance(iter, ret);
+ return ret;
+}
+
+/*
+ * Extract up to sg_max pages from a KVEC-type iterator and add them to the
+ * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or
+ * static buffers. The pages are not pinned.
+ */
+static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ iov_iter_extraction_t extraction_flags)
+{
+ const struct kvec *kv = iter->kvec;
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ unsigned long start = iter->iov_offset;
+ unsigned int i;
+ ssize_t ret = 0;
+
+ for (i = 0; i < iter->nr_segs; i++) {
+ struct page *page;
+ unsigned long kaddr;
+ size_t off, len, seg;
+
+ len = kv[i].iov_len;
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ kaddr = (unsigned long)kv[i].iov_base + start;
+ off = kaddr & ~PAGE_MASK;
+ len = min_t(size_t, maxsize, len - start);
+ kaddr &= PAGE_MASK;
+
+ maxsize -= len;
+ ret += len;
+ do {
+ seg = min_t(size_t, len, PAGE_SIZE - off);
+ if (is_vmalloc_or_module_addr((void *)kaddr))
+ page = vmalloc_to_page((void *)kaddr);
+ else
+ page = virt_to_page((void *)kaddr);
+
+ sg_set_page(sg, page, len, off);
+ sgtable->nents++;
+ sg++;
+ sg_max--;
+
+ len -= seg;
+ kaddr += PAGE_SIZE;
+ off = 0;
+ } while (len > 0 && sg_max > 0);
+
+ if (maxsize <= 0 || sg_max == 0)
+ break;
+ start = 0;
+ }
+
+ if (ret > 0)
+ iov_iter_advance(iter, ret);
+ return ret;
+}
+
+/*
+ * Extract up to sg_max folios from an XARRAY-type iterator and add them to
+ * the scatterlist. The pages are not pinned.
+ */
+static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ iov_iter_extraction_t extraction_flags)
+{
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ struct xarray *xa = iter->xarray;
+ struct folio *folio;
+ loff_t start = iter->xarray_start + iter->iov_offset;
+ pgoff_t index = start / PAGE_SIZE;
+ ssize_t ret = 0;
+ size_t offset, len;
+ XA_STATE(xas, xa, index);
+
+ rcu_read_lock();
+
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ if (xas_retry(&xas, folio))
+ continue;
+ if (WARN_ON(xa_is_value(folio)))
+ break;
+ if (WARN_ON(folio_test_hugetlb(folio)))
+ break;
+
+ offset = offset_in_folio(folio, start);
+ len = min_t(size_t, maxsize, folio_size(folio) - offset);
+
+ sg_set_page(sg, folio_page(folio, 0), len, offset);
+ sgtable->nents++;
+ sg++;
+ sg_max--;
+
+ maxsize -= len;
+ ret += len;
+ if (maxsize <= 0 || sg_max == 0)
+ break;
+ }
+
+ rcu_read_unlock();
+ if (ret > 0)
+ iov_iter_advance(iter, ret);
+ return ret;
+}
+
+/**
+ * extract_iter_to_sg - Extract pages from an iterator and add to an sglist
+ * @iter: The iterator to extract from
+ * @maxsize: The amount of iterator to copy
+ * @sgtable: The scatterlist table to fill in
+ * @sg_max: Maximum number of elements in @sgtable that may be filled
+ * @extraction_flags: Flags to qualify the request
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * add them to a scatterlist that refers to all of those bits, to a maximum
+ * addition of @sg_max elements.
+ *
+ * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
+ * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
+ * and DISCARD-type are not supported.
+ *
+ * No end mark is placed on the scatterlist; that's left to the caller.
+ *
+ * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
+ * be allowed on the pages extracted.
+ *
+ * If successful, @sgtable->nents is updated to include the number of elements
+ * added and the number of bytes added is returned. @sgtable->orig_nents is
+ * left unaltered.
+ *
+ * The iov_iter_extract_mode() function should be used to query how cleanup
+ * should be performed.
+ */
+ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
+ struct sg_table *sgtable, unsigned int sg_max,
+ iov_iter_extraction_t extraction_flags)
+{
+ if (maxsize == 0)
+ return 0;
+
+ switch (iov_iter_type(iter)) {
+ case ITER_UBUF:
+ case ITER_IOVEC:
+ return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
+ extraction_flags);
+ case ITER_BVEC:
+ return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
+ extraction_flags);
+ case ITER_KVEC:
+ return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
+ extraction_flags);
+ case ITER_XARRAY:
+ return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
+ extraction_flags);
+ default:
+ pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+}
+EXPORT_SYMBOL_GPL(extract_iter_to_sg);