aboutsummaryrefslogtreecommitdiffstats
path: root/block/bio.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/bio.c')
-rw-r--r--block/bio.c331
1 files changed, 199 insertions, 132 deletions
diff --git a/block/bio.c b/block/bio.c
index 4db1008309ed..67bba12d273b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2001 Jens Axboe <axboe@kernel.dk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
- *
*/
#include <linux/mm.h>
#include <linux/swap.h>
@@ -647,26 +634,68 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
}
EXPORT_SYMBOL(bio_clone_fast);
+static inline bool page_is_mergeable(const struct bio_vec *bv,
+ struct page *page, unsigned int len, unsigned int off,
+ bool *same_page)
+{
+ phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) +
+ bv->bv_offset + bv->bv_len - 1;
+ phys_addr_t page_addr = page_to_phys(page);
+
+ if (vec_end_addr + 1 != page_addr + off)
+ return false;
+ if (xen_domain() && !xen_biovec_phys_mergeable(bv, page))
+ return false;
+
+ *same_page = ((vec_end_addr & PAGE_MASK) == page_addr);
+ if (!*same_page && pfn_to_page(PFN_DOWN(vec_end_addr)) + 1 != page)
+ return false;
+ return true;
+}
+
+/*
+ * Check if the @page can be added to the current segment(@bv), and make
+ * sure to call it only if page_is_mergeable(@bv, @page) is true
+ */
+static bool can_add_page_to_seg(struct request_queue *q,
+ struct bio_vec *bv, struct page *page, unsigned len,
+ unsigned offset)
+{
+ unsigned long mask = queue_segment_boundary(q);
+ phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
+ phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
+
+ if ((addr1 | mask) != (addr2 | mask))
+ return false;
+
+ if (bv->bv_len + len > queue_max_segment_size(q))
+ return false;
+
+ return true;
+}
+
/**
- * bio_add_pc_page - attempt to add page to bio
+ * __bio_add_pc_page - attempt to add page to passthrough bio
* @q: the target queue
* @bio: destination bio
* @page: page to add
* @len: vec entry length
* @offset: vec entry offset
+ * @put_same_page: put the page if it is same with last added page
*
* Attempt to add a page to the bio_vec maplist. This can fail for a
* number of reasons, such as the bio being full or target block device
* limitations. The target block device must allow bio's up to PAGE_SIZE,
* so it is always possible to add a single page to an empty bio.
*
- * This should only be used by REQ_PC bios.
+ * This should only be used by passthrough bios.
*/
-int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
- *page, unsigned int len, unsigned int offset)
+static int __bio_add_pc_page(struct request_queue *q, struct bio *bio,
+ struct page *page, unsigned int len, unsigned int offset,
+ bool put_same_page)
{
- int retried_segments = 0;
struct bio_vec *bvec;
+ bool same_page = false;
/*
* cloned bio must not modify vec list
@@ -677,18 +706,14 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
return 0;
- /*
- * For filesystems with a blocksize smaller than the pagesize
- * we will often be called with the same page as last time and
- * a consecutive offset. Optimize this special case.
- */
if (bio->bi_vcnt > 0) {
- struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
+ bvec = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if (page == prev->bv_page &&
- offset == prev->bv_offset + prev->bv_len) {
- prev->bv_len += len;
- bio->bi_iter.bi_size += len;
+ if (page == bvec->bv_page &&
+ offset == bvec->bv_offset + bvec->bv_len) {
+ if (put_same_page)
+ put_page(page);
+ bvec->bv_len += len;
goto done;
}
@@ -696,72 +721,59 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
*/
- if (bvec_gap_to_prev(q, prev, offset))
+ if (bvec_gap_to_prev(q, bvec, offset))
return 0;
+
+ if (page_is_mergeable(bvec, page, len, offset, &same_page) &&
+ can_add_page_to_seg(q, bvec, page, len, offset)) {
+ bvec->bv_len += len;
+ goto done;
+ }
}
- if (bio_full(bio))
+ if (bio_full(bio, len))
+ return 0;
+
+ if (bio->bi_phys_segments >= queue_max_segments(q))
return 0;
- /*
- * setup the new entry, we might clear it again later if we
- * cannot add the page
- */
bvec = &bio->bi_io_vec[bio->bi_vcnt];
bvec->bv_page = page;
bvec->bv_len = len;
bvec->bv_offset = offset;
bio->bi_vcnt++;
- bio->bi_phys_segments++;
- bio->bi_iter.bi_size += len;
-
- /*
- * Perform a recount if the number of segments is greater
- * than queue_max_segments(q).
- */
-
- while (bio->bi_phys_segments > queue_max_segments(q)) {
-
- if (retried_segments)
- goto failed;
-
- retried_segments = 1;
- blk_recount_segments(q, bio);
- }
-
- /* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec))
- bio_clear_flag(bio, BIO_SEG_VALID);
-
done:
+ bio->bi_iter.bi_size += len;
+ bio->bi_phys_segments = bio->bi_vcnt;
+ bio_set_flag(bio, BIO_SEG_VALID);
return len;
+}
- failed:
- bvec->bv_page = NULL;
- bvec->bv_len = 0;
- bvec->bv_offset = 0;
- bio->bi_vcnt--;
- bio->bi_iter.bi_size -= len;
- blk_recount_segments(q, bio);
- return 0;
+int bio_add_pc_page(struct request_queue *q, struct bio *bio,
+ struct page *page, unsigned int len, unsigned int offset)
+{
+ return __bio_add_pc_page(q, bio, page, len, offset, false);
}
EXPORT_SYMBOL(bio_add_pc_page);
/**
* __bio_try_merge_page - try appending data to an existing bvec.
* @bio: destination bio
- * @page: page to add
+ * @page: start page to add
* @len: length of the data to add
- * @off: offset of the data in @page
+ * @off: offset of the data relative to @page
+ * @same_page: return if the segment has been merged inside the same page
*
* Try to add the data at @page + @off to the last bvec of @bio. This is a
* a useful optimisation for file systems with a block size smaller than the
* page size.
*
+ * Warn if (@len, @off) crosses pages in case that @same_page is true.
+ *
* Return %true on success or %false on failure.
*/
bool __bio_try_merge_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int off)
+ unsigned int len, unsigned int off, bool *same_page)
{
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
@@ -769,7 +781,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
+ if (page_is_mergeable(bv, page, len, off, same_page)) {
bv->bv_len += len;
bio->bi_iter.bi_size += len;
return true;
@@ -780,11 +792,11 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
EXPORT_SYMBOL_GPL(__bio_try_merge_page);
/**
- * __bio_add_page - add page to a bio in a new segment
+ * __bio_add_page - add page(s) to a bio in a new segment
* @bio: destination bio
- * @page: page to add
- * @len: length of the data to add
- * @off: offset of the data in @page
+ * @page: start page to add
+ * @len: length of the data to add, may cross pages
+ * @off: offset of the data relative to @page, may cross pages
*
* Add the data at @page + @off to @bio as a new bvec. The caller must ensure
* that @bio has space for another bvec.
@@ -795,7 +807,7 @@ void __bio_add_page(struct bio *bio, struct page *page,
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
- WARN_ON_ONCE(bio_full(bio));
+ WARN_ON_ONCE(bio_full(bio, len));
bv->bv_page = page;
bv->bv_offset = off;
@@ -807,20 +819,22 @@ void __bio_add_page(struct bio *bio, struct page *page,
EXPORT_SYMBOL_GPL(__bio_add_page);
/**
- * bio_add_page - attempt to add page to bio
+ * bio_add_page - attempt to add page(s) to bio
* @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
+ * @page: start page to add
+ * @len: vec entry length, may cross pages
+ * @offset: vec entry offset relative to @page, may cross pages
*
- * Attempt to add a page to the bio_vec maplist. This will only fail
+ * Attempt to add page(s) to the bio_vec maplist. This will only fail
* if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
*/
int bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int offset)
{
- if (!__bio_try_merge_page(bio, page, len, offset)) {
- if (bio_full(bio))
+ bool same_page = false;
+
+ if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) {
+ if (bio_full(bio, len))
return 0;
__bio_add_page(bio, page, len, offset);
}
@@ -828,6 +842,42 @@ int bio_add_page(struct bio *bio, struct page *page,
}
EXPORT_SYMBOL(bio_add_page);
+static void bio_get_pages(struct bio *bio)
+{
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+
+ bio_for_each_segment_all(bvec, bio, iter_all)
+ get_page(bvec->bv_page);
+}
+
+static void bio_release_pages(struct bio *bio)
+{
+ struct bvec_iter_all iter_all;
+ struct bio_vec *bvec;
+
+ bio_for_each_segment_all(bvec, bio, iter_all)
+ put_page(bvec->bv_page);
+}
+
+static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
+{
+ const struct bio_vec *bv = iter->bvec;
+ unsigned int len;
+ size_t size;
+
+ if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
+ return -EINVAL;
+
+ len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
+ size = bio_add_page(bio, bv->bv_page, len,
+ bv->bv_offset + iter->iov_offset);
+ if (unlikely(size != len))
+ return -EINVAL;
+ iov_iter_advance(iter, size);
+ return 0;
+}
+
#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *))
/**
@@ -846,6 +896,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
+ bool same_page = false;
ssize_t size, left;
unsigned len, i;
size_t offset;
@@ -866,8 +917,15 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
struct page *page = pages[i];
len = min_t(size_t, PAGE_SIZE - offset, left);
- if (WARN_ON_ONCE(bio_add_page(bio, page, len, offset) != len))
- return -EINVAL;
+
+ if (__bio_try_merge_page(bio, page, len, offset, &same_page)) {
+ if (same_page)
+ put_page(page);
+ } else {
+ if (WARN_ON_ONCE(bio_full(bio, len)))
+ return -EINVAL;
+ __bio_add_page(bio, page, len, offset);
+ }
offset = 0;
}
@@ -876,30 +934,46 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
}
/**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * bio_iov_iter_get_pages - add user or kernel pages to a bio
* @bio: bio to add pages to
- * @iter: iov iterator describing the region to be mapped
+ * @iter: iov iterator describing the region to be added
+ *
+ * This takes either an iterator pointing to user memory, or one pointing to
+ * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
+ * map them into the kernel. On IO completion, the caller should put those
+ * pages. If we're adding kernel pages, and the caller told us it's safe to
+ * do so, we just have to add the pages to the bio directly. We don't grab an
+ * extra reference to those pages (the user should already have that), and we
+ * don't put the page on IO completion. The caller needs to check if the bio is
+ * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
+ * released.
*
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
* The function tries, but does not guarantee, to pin as many pages as
- * fit into the bio, or are requested in *iter, whatever is smaller.
- * If MM encounters an error pinning the requested pages, it stops.
- * Error is returned only if 0 pages could be pinned.
+ * fit into the bio, or are requested in *iter, whatever is smaller. If
+ * MM encounters an error pinning the requested pages, it stops. Error
+ * is returned only if 0 pages could be pinned.
*/
int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
{
- unsigned short orig_vcnt = bio->bi_vcnt;
+ const bool is_bvec = iov_iter_is_bvec(iter);
+ int ret;
- do {
- int ret = __bio_iov_iter_get_pages(bio, iter);
+ if (WARN_ON_ONCE(bio->bi_vcnt))
+ return -EINVAL;
- if (unlikely(ret))
- return bio->bi_vcnt > orig_vcnt ? 0 : ret;
+ do {
+ if (is_bvec)
+ ret = __bio_iov_bvec_add_pages(bio, iter);
+ else
+ ret = __bio_iov_iter_get_pages(bio, iter);
+ } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0));
- } while (iov_iter_count(iter) && !bio_full(bio));
+ if (iov_iter_bvec_no_ref(iter))
+ bio_set_flag(bio, BIO_NO_PAGE_REF);
+ else if (is_bvec)
+ bio_get_pages(bio);
- return 0;
+ return bio->bi_vcnt ? 0 : ret;
}
static void submit_bio_wait_endio(struct bio *bio)
@@ -1070,10 +1144,10 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
*/
static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
{
- int i;
struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
ssize_t ret;
ret = copy_page_from_iter(bvec->bv_page,
@@ -1101,10 +1175,10 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
*/
static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
{
- int i;
struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
ssize_t ret;
ret = copy_page_to_iter(bvec->bv_page,
@@ -1125,9 +1199,9 @@ static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
void bio_free_pages(struct bio *bio)
{
struct bio_vec *bvec;
- int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i)
+ bio_for_each_segment_all(bvec, bio, iter_all)
__free_page(bvec->bv_page);
}
EXPORT_SYMBOL(bio_free_pages);
@@ -1238,8 +1312,11 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
}
}
- if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes)
+ if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
+ if (!map_data)
+ __free_page(page);
break;
+ }
len -= bytes;
offset = 0;
@@ -1295,6 +1372,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
struct bio *bio;
int ret;
struct bio_vec *bvec;
+ struct bvec_iter_all iter_all;
if (!iov_iter_count(iter))
return ERR_PTR(-EINVAL);
@@ -1324,21 +1402,14 @@ struct bio *bio_map_user_iov(struct request_queue *q,
for (j = 0; j < npages; j++) {
struct page *page = pages[j];
unsigned int n = PAGE_SIZE - offs;
- unsigned short prev_bi_vcnt = bio->bi_vcnt;
if (n > bytes)
n = bytes;
- if (!bio_add_pc_page(q, bio, page, n, offs))
+ if (!__bio_add_pc_page(q, bio, page, n, offs,
+ true))
break;
- /*
- * check if vector was merged with previous
- * drop page reference if needed
- */
- if (bio->bi_vcnt == prev_bi_vcnt)
- put_page(page);
-
added += n;
bytes -= n;
offs = 0;
@@ -1368,7 +1439,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
return bio;
out_unmap:
- bio_for_each_segment_all(bvec, bio, j) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
put_page(bvec->bv_page);
}
bio_put(bio);
@@ -1378,12 +1449,12 @@ struct bio *bio_map_user_iov(struct request_queue *q,
static void __bio_unmap_user(struct bio *bio)
{
struct bio_vec *bvec;
- int i;
+ struct bvec_iter_all iter_all;
/*
* make sure we dirty pages we wrote to
*/
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
if (bio_data_dir(bio) == READ)
set_page_dirty_lock(bvec->bv_page);
@@ -1474,9 +1545,9 @@ static void bio_copy_kern_endio_read(struct bio *bio)
{
char *p = bio->bi_private;
struct bio_vec *bvec;
- int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
p += bvec->bv_len;
}
@@ -1584,23 +1655,14 @@ cleanup:
void bio_set_pages_dirty(struct bio *bio)
{
struct bio_vec *bvec;
- int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
if (!PageCompound(bvec->bv_page))
set_page_dirty_lock(bvec->bv_page);
}
}
-static void bio_release_pages(struct bio *bio)
-{
- struct bio_vec *bvec;
- int i;
-
- bio_for_each_segment_all(bvec, bio, i)
- put_page(bvec->bv_page);
-}
-
/*
* bio_check_pages_dirty() will check that all the BIO's pages are still dirty.
* If they are, then fine. If, however, some pages are clean then they must
@@ -1634,7 +1696,8 @@ static void bio_dirty_fn(struct work_struct *work)
next = bio->bi_private;
bio_set_pages_dirty(bio);
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ bio_release_pages(bio);
bio_put(bio);
}
}
@@ -1643,14 +1706,15 @@ void bio_check_pages_dirty(struct bio *bio)
{
struct bio_vec *bvec;
unsigned long flags;
- int i;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bvec, bio, i) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
goto defer;
}
- bio_release_pages(bio);
+ if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+ bio_release_pages(bio);
bio_put(bio);
return;
defer:
@@ -2132,6 +2196,9 @@ static int __init init_bio(void)
bio_slab_nr = 0;
bio_slabs = kcalloc(bio_slab_max, sizeof(struct bio_slab),
GFP_KERNEL);
+
+ BUILD_BUG_ON(BIO_FLAG_LAST > BVEC_POOL_OFFSET);
+
if (!bio_slabs)
panic("bio: can't allocate bios\n");