aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c119
1 files changed, 58 insertions, 61 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2a196bb134d9..e985e820724e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -629,7 +629,21 @@ cont:
btrfs_free_reserved_data_space_noquota(inode,
start,
end - start + 1);
- goto free_pages_out;
+
+ /*
+ * Ensure we only free the compressed pages if we have
+ * them allocated, as we can still reach here with
+ * inode_need_compress() == false.
+ */
+ if (pages) {
+ for (i = 0; i < nr_pages; i++) {
+ WARN_ON(pages[i]->mapping);
+ put_page(pages[i]);
+ }
+ kfree(pages);
+ }
+
+ return;
}
}
@@ -708,13 +722,6 @@ cleanup_and_bail_uncompressed:
*num_added += 1;
return;
-
-free_pages_out:
- for (i = 0; i < nr_pages; i++) {
- WARN_ON(pages[i]->mapping);
- put_page(pages[i]);
- }
- kfree(pages);
}
static void free_async_extent_pages(struct async_extent *async_extent)
@@ -978,8 +985,8 @@ static noinline int cow_file_range(struct inode *inode,
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
- u64 disk_num_bytes;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -996,7 +1003,6 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
@@ -1023,17 +1029,32 @@ static noinline int cow_file_range(struct inode *inode,
}
}
- BUG_ON(disk_num_bytes >
- btrfs_super_total_bytes(fs_info->super_copy));
+ BUG_ON(num_bytes > btrfs_super_total_bytes(fs_info->super_copy));
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
- while (disk_num_bytes > 0) {
- cur_alloc_size = disk_num_bytes;
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
+ while (num_bytes > 0) {
+ cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1097,11 +1118,10 @@ static noinline int cow_file_range(struct inode *inode,
delalloc_end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
- if (disk_num_bytes < cur_alloc_size)
- disk_num_bytes = 0;
+ if (num_bytes < cur_alloc_size)
+ num_bytes = 0;
else
- disk_num_bytes -= cur_alloc_size;
- num_bytes -= cur_alloc_size;
+ num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
extent_reserved = false;
@@ -1139,8 +1159,8 @@ out_unlock:
*/
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
- start + cur_alloc_size,
- start + cur_alloc_size,
+ start + cur_alloc_size - 1,
+ start + cur_alloc_size - 1,
locked_page,
clear_bits,
page_ops);
@@ -1315,7 +1335,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 disk_num_bytes;
u64 ram_bytes;
int extent_type;
- int ret, err;
+ int ret;
int type;
int nocow;
int check_prev = 1;
@@ -1440,11 +1460,8 @@ next_slot:
* if there are pending snapshots for this root,
* we fall into common COW way.
*/
- if (!nolock) {
- err = btrfs_start_write_no_snapshotting(root);
- if (!err)
- goto out_check;
- }
+ if (!nolock && atomic_read(&root->snapshot_force_cow))
+ goto out_check;
/*
* force cow if csum exists in the range.
* this ensure that csum for a given extent are
@@ -1453,9 +1470,6 @@ next_slot:
ret = csum_exist_in_range(fs_info, disk_bytenr,
num_bytes);
if (ret) {
- if (!nolock)
- btrfs_end_write_no_snapshotting(root);
-
/*
* ret could be -EIO if the above fails to read
* metadata.
@@ -1468,11 +1482,8 @@ next_slot:
WARN_ON_ONCE(nolock);
goto out_check;
}
- if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
- if (!nolock)
- btrfs_end_write_no_snapshotting(root);
+ if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
- }
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
@@ -1485,8 +1496,6 @@ next_slot:
out_check:
if (extent_end <= start) {
path->slots[0]++;
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
goto next_slot;
@@ -1508,8 +1517,6 @@ out_check:
end, page_started, nr_written, 1,
NULL);
if (ret) {
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info,
disk_bytenr);
@@ -1529,8 +1536,6 @@ out_check:
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info,
disk_bytenr);
@@ -1569,8 +1574,6 @@ out_check:
EXTENT_CLEAR_DATA_RESV,
PAGE_UNLOCK | PAGE_SET_PRIVATE2);
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
cur_offset = extent_end;
/*
@@ -8707,7 +8710,6 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
/* bio split */
ASSERT(map_length <= INT_MAX);
- atomic_inc(&dip->pending_bios);
do {
clone_len = min_t(int, submit_len, map_length);
@@ -8758,7 +8760,8 @@ submit:
if (!status)
return 0;
- bio_put(bio);
+ if (bio != orig_bio)
+ bio_put(bio);
out_err:
dip->errors = 1;
/*
@@ -8798,7 +8801,7 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
bio->bi_private = dip;
dip->orig_bio = bio;
dip->dio_bio = dio_bio;
- atomic_set(&dip->pending_bios, 0);
+ atomic_set(&dip->pending_bios, 1);
io_bio = btrfs_io_bio(bio);
io_bio->logical = file_offset;
@@ -8947,9 +8950,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
- goto out;
}
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
offset, count);
@@ -9187,20 +9187,17 @@ again:
/*
* Qgroup reserved space handler
* Page here will be either
- * 1) Already written to disk
- * In this case, its reserved space is released from data rsv map
- * and will be freed by delayed_ref handler finally.
- * So even we call qgroup_free_data(), it won't decrease reserved
- * space.
- * 2) Not written to disk
- * This means the reserved space should be freed here. However,
- * if a truncate invalidates the page (by clearing PageDirty)
- * and the page is accounted for while allocating extent
- * in btrfs_check_data_free_space() we let delayed_ref to
- * free the entire extent.
+ * 1) Already written to disk or ordered extent already submitted
+ * Then its QGROUP_RESERVED bit in io_tree is already cleaned.
+ * Qgroup will be handled by its qgroup_record then.
+ * btrfs_qgroup_free_data() call will do nothing here.
+ *
+ * 2) Not written to disk yet
+ * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
+ * bit of its io_tree, and free the qgroup reserved data space.
+ * Since the IO will never happen for this page.
*/
- if (PageDirty(page))
- btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |