summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c8
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/export.c10
-rw-r--r--fs/btrfs/extent-tree.c16
-rw-r--r--fs/btrfs/extent_io.c80
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c45
-rw-r--r--fs/btrfs/ioctl.c8
-rw-r--r--fs/btrfs/send.c8
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/volumes.h8
14 files changed, 171 insertions, 63 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index ecbc63d3143e..9a2ec79e8cfb 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1828,7 +1828,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
int found = 0;
struct extent_buffer *eb;
struct btrfs_inode_extref *extref;
- struct extent_buffer *leaf;
u32 item_size;
u32 cur_offset;
unsigned long ptr;
@@ -1856,9 +1855,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, slot);
- ptr = btrfs_item_ptr_offset(leaf, slot);
+ item_size = btrfs_item_size_nr(eb, slot);
+ ptr = btrfs_item_ptr_offset(eb, slot);
cur_offset = 0;
while (cur_offset < item_size) {
@@ -1872,7 +1870,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
if (ret)
break;
- cur_offset += btrfs_inode_extref_name_len(leaf, extref);
+ cur_offset += btrfs_inode_extref_name_len(eb, extref);
cur_offset += sizeof(*extref);
}
btrfs_tree_read_unlock_blocking(eb);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 81220b2203c6..0ef5cc13fae2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,8 +44,6 @@
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
#define BTRFS_INODE_HAS_PROPS 11
-/* DIO is ready to submit */
-#define BTRFS_INODE_DIO_READY 12
/*
* The following 3 bits are meant only for the btree inode.
* When any of them is set, it means an error happened while writing an
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0d98aee34fee..1e60d00d4ea7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2847,6 +2847,8 @@ int open_ctree(struct super_block *sb,
!extent_buffer_uptodate(chunk_root->node)) {
printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
sb->s_id);
+ if (!IS_ERR(chunk_root->node))
+ free_extent_buffer(chunk_root->node);
chunk_root->node = NULL;
goto fail_tree_roots;
}
@@ -2885,6 +2887,8 @@ retry_root_backup:
!extent_buffer_uptodate(tree_root->node)) {
printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
sb->s_id);
+ if (!IS_ERR(tree_root->node))
+ free_extent_buffer(tree_root->node);
tree_root->node = NULL;
goto recovery_tree_root;
}
@@ -3765,9 +3769,7 @@ void close_ctree(struct btrfs_root *root)
* block groups queued for removal, the deletion will be
* skipped when we quit the cleaner thread.
*/
- mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_delete_unused_bgs(root->fs_info);
- mutex_unlock(&root->fs_info->cleaner_mutex);
ret = btrfs_commit_super(root);
if (ret)
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 8d052209f473..2513a7f53334 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -112,11 +112,11 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
u32 generation;
if (fh_type == FILEID_BTRFS_WITH_PARENT) {
- if (fh_len != BTRFS_FID_SIZE_CONNECTABLE)
+ if (fh_len < BTRFS_FID_SIZE_CONNECTABLE)
return NULL;
root_objectid = fid->root_objectid;
} else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) {
- if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT)
+ if (fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT)
return NULL;
root_objectid = fid->parent_root_objectid;
} else
@@ -136,11 +136,11 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
u32 generation;
if ((fh_type != FILEID_BTRFS_WITH_PARENT ||
- fh_len != BTRFS_FID_SIZE_CONNECTABLE) &&
+ fh_len < BTRFS_FID_SIZE_CONNECTABLE) &&
(fh_type != FILEID_BTRFS_WITH_PARENT_ROOT ||
- fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
+ fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT) &&
(fh_type != FILEID_BTRFS_WITHOUT_PARENT ||
- fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE))
+ fh_len < BTRFS_FID_SIZE_NON_CONNECTABLE))
return NULL;
objectid = fid->objectid;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5411f0ab5683..601d7d45d164 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2828,6 +2828,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head;
int ret;
int run_all = count == (unsigned long)-1;
+ bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
/* We'll clean this up in btrfs_cleanup_transaction */
if (trans->aborted)
@@ -2844,6 +2845,7 @@ again:
#ifdef SCRAMBLE_DELAYED_REFS
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
+ trans->can_flush_pending_bgs = false;
ret = __btrfs_run_delayed_refs(trans, root, count);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
@@ -2893,6 +2895,7 @@ again:
}
out:
assert_qgroups_uptodate(trans);
+ trans->can_flush_pending_bgs = can_flush_pending_bgs;
return 0;
}
@@ -3742,10 +3745,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->bytes_reserved = 0;
found->bytes_readonly = 0;
found->bytes_may_use = 0;
- if (total_bytes > 0)
- found->full = 0;
- else
- found->full = 1;
+ found->full = 0;
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
found->chunk_alloc = 0;
found->flush = 0;
@@ -4309,7 +4309,8 @@ out:
* the block groups that were made dirty during the lifetime of the
* transaction.
*/
- if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
+ if (trans->can_flush_pending_bgs &&
+ trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) {
btrfs_create_pending_block_groups(trans, trans->root);
btrfs_trans_release_chunk_metadata(trans);
}
@@ -8668,7 +8669,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
}
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
- btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
+ btrfs_add_dropped_root(trans, root);
} else {
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
@@ -9563,7 +9564,9 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_block_group_item item;
struct btrfs_key key;
int ret = 0;
+ bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
+ trans->can_flush_pending_bgs = false;
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
if (ret)
goto next;
@@ -9584,6 +9587,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
next:
list_del_init(&block_group->bg_list);
}
+ trans->can_flush_pending_bgs = can_flush_pending_bgs;
}
int btrfs_make_block_group(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f1018cfbfefa..3915c9473e94 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_end_io_t end_io_func,
int mirror_num,
unsigned long prev_bio_flags,
- unsigned long bio_flags)
+ unsigned long bio_flags,
+ bool force_bio_submit)
{
int ret = 0;
struct bio *bio;
@@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
contig = bio_end_sector(bio) == sector;
if (prev_bio_flags != bio_flags || !contig ||
+ force_bio_submit ||
merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
ret = submit_one_bio(rw, bio, mirror_num,
@@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int rw,
+ u64 *prev_em_start)
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
@@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree,
}
while (cur <= end) {
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+ bool force_bio_submit = false;
if (cur >= last_byte) {
char *userpage;
@@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree,
block_start = em->block_start;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
block_start = EXTENT_MAP_HOLE;
+
+ /*
+ * If we have a file range that points to a compressed extent
+ * and it's followed by a consecutive file range that points to
+ * to the same compressed extent (possibly with a different
+ * offset and/or length, so it either points to the whole extent
+ * or only part of it), we must make sure we do not submit a
+ * single bio to populate the pages for the 2 ranges because
+ * this makes the compressed extent read zero out the pages
+ * belonging to the 2nd range. Imagine the following scenario:
+ *
+ * File layout
+ * [0 - 8K] [8K - 24K]
+ * | |
+ * | |
+ * points to extent X, points to extent X,
+ * offset 4K, length of 8K offset 0, length 16K
+ *
+ * [extent X, compressed length = 4K uncompressed length = 16K]
+ *
+ * If the bio to read the compressed extent covers both ranges,
+ * it will decompress extent X into the pages belonging to the
+ * first range and then it will stop, zeroing out the remaining
+ * pages that belong to the other range that points to extent X.
+ * So here we make sure we submit 2 bios, one for the first
+ * range and another one for the third range. Both will target
+ * the same physical extent from disk, but we can't currently
+ * make the compressed bio endio callback populate the pages
+ * for both ranges because each compressed bio is tightly
+ * coupled with a single extent map, and each range can have
+ * an extent map with a different offset value relative to the
+ * uncompressed data of our extent and different lengths. This
+ * is a corner case so we prioritize correctness over
+ * non-optimal behavior (submitting 2 bios for the same extent).
+ */
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
+ prev_em_start && *prev_em_start != (u64)-1 &&
+ *prev_em_start != em->orig_start)
+ force_bio_submit = true;
+
+ if (prev_em_start)
+ *prev_em_start = em->orig_start;
+
free_extent_map(em);
em = NULL;
@@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
- this_bio_flag);
+ this_bio_flag,
+ force_bio_submit);
if (!ret) {
nr++;
*bio_flags = this_bio_flag;
@@ -3084,7 +3132,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int rw,
+ u64 *prev_em_start)
{
struct inode *inode;
struct btrfs_ordered_extent *ordered;
@@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], get_extent, em_cached, bio,
- mirror_num, bio_flags, rw);
+ mirror_num, bio_flags, rw, prev_em_start);
page_cache_release(pages[index]);
}
}
@@ -3114,7 +3163,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
int nr_pages, get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int rw,
+ u64 *prev_em_start)
{
u64 start = 0;
u64 end = 0;
@@ -3135,7 +3185,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
index - first_index, start,
end, get_extent, em_cached,
bio, mirror_num, bio_flags,
- rw);
+ rw, prev_em_start);
start = page_start;
end = start + PAGE_CACHE_SIZE - 1;
first_index = index;
@@ -3146,7 +3196,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
end, get_extent, em_cached, bio,
- mirror_num, bio_flags, rw);
+ mirror_num, bio_flags, rw,
+ prev_em_start);
}
static int __extent_read_full_page(struct extent_io_tree *tree,
@@ -3172,7 +3223,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
- bio_flags, rw);
+ bio_flags, rw, NULL);
return ret;
}
@@ -3198,7 +3249,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
int ret;
ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
- &bio_flags, READ);
+ &bio_flags, READ, NULL);
if (bio)
ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
return ret;
@@ -3451,7 +3502,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
sector, iosize, pg_offset,
bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
- 0, 0, 0);
+ 0, 0, 0, false);
if (ret)
SetPageError(page);
}
@@ -3754,7 +3805,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
-1, end_bio_extent_buffer_writepage,
- 0, epd->bio_flags, bio_flags);
+ 0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
if (ret) {
set_btree_ioerr(p);
@@ -4158,6 +4209,7 @@ int extent_readpages(struct extent_io_tree *tree,
struct page *page;
struct extent_map *em_cached = NULL;
int nr = 0;
+ u64 prev_em_start = (u64)-1;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
page = list_entry(pages->prev, struct page, lru);
@@ -4174,12 +4226,12 @@ int extent_readpages(struct extent_io_tree *tree,
if (nr < ARRAY_SIZE(pagepool))
continue;
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ);
+ &bio, 0, &bio_flags, READ, &prev_em_start);
nr = 0;
}
if (nr)
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ);
+ &bio, 0, &bio_flags, READ, &prev_em_start);
if (em_cached)
free_extent_map(em_cached);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b823fac91c92..8c6f247ba81d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2584,7 +2584,7 @@ static long btrfs_fallocate(struct file *file, int mode,
alloc_start);
if (ret)
goto out;
- } else {
+ } else if (offset + len > inode->i_size) {
/*
* If we are fallocating from the end of the file onward we
* need to zero out the end of the page if i_size lands in the
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0fa7253a2d7..611b66d73e80 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (!special_file(inode->i_mode))
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_free_io_failure_record(inode, 0, (u64)-1);
@@ -7408,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
return em;
}
+struct btrfs_dio_data {
+ u64 outstanding_extents;
+ u64 reserve;
+};
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -7415,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL;
+ struct btrfs_dio_data *dio_data = NULL;
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
- u64 *outstanding_extents = NULL;
int unlock_bits = EXTENT_LOCKED;
int ret = 0;
@@ -7436,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
* that anything that needs to check if there's a transction doesn't get
* confused.
*/
- outstanding_extents = current->journal_info;
+ dio_data = current->journal_info;
current->journal_info = NULL;
}
@@ -7568,17 +7573,18 @@ unlock:
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
- if (*outstanding_extents) {
- (*outstanding_extents)--;
+ if (dio_data->outstanding_extents) {
+ (dio_data->outstanding_extents)--;
} else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
- current->journal_info = outstanding_extents;
btrfs_free_reserved_data_space(inode, len);
- set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
+ WARN_ON(dio_data->reserve < len);
+ dio_data->reserve -= len;
+ current->journal_info = dio_data;
}
/*
@@ -7601,8 +7607,8 @@ unlock:
unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
- if (outstanding_extents)
- current->journal_info = outstanding_extents;
+ if (dio_data)
+ current->journal_info = dio_data;
return ret;
}
@@ -8329,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- u64 outstanding_extents = 0;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_dio_data dio_data = { 0 };
size_t count = 0;
int flags = 0;
bool wakeup = true;
@@ -8367,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
ret = btrfs_delalloc_reserve_space(inode, count);
if (ret)
goto out;
- outstanding_extents = div64_u64(count +
+ dio_data.outstanding_extents = div64_u64(count +
BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
@@ -8376,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* do the accounting properly if we go over the number we
* originally calculated. Abuse current->journal_info for this.
*/
- current->journal_info = &outstanding_extents;
+ dio_data.reserve = round_up(count, root->sectorsize);
+ current->journal_info = &dio_data;
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) {
inode_dio_end(inode);
@@ -8391,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (iov_iter_rw(iter) == WRITE) {
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) {
- /*
- * If the error comes from submitting stage,
- * btrfs_get_blocsk_direct() has free'd data space,
- * and metadata space will be handled by
- * finish_ordered_fn, don't do that again to make
- * sure bytes_may_use is correct.
- */
- if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
- &BTRFS_I(inode)->runtime_flags))
- btrfs_delalloc_release_space(inode, count);
+ if (dio_data.reserve)
+ btrfs_delalloc_release_space(inode,
+ dio_data.reserve);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode,
count - (size_t)ret);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0adf5422fce9..8d20f3b1cab0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4639,6 +4639,11 @@ locked:
bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
}
+ if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
+ ret = -EINVAL;
+ goto out_bctl;
+ }
+
do_balance:
/*
* Ownership of bctl and mutually_exclusive_operation_running
@@ -4650,12 +4655,15 @@ do_balance:
need_unlock = false;
ret = btrfs_balance(bctl, bargs);
+ bctl = NULL;
if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
}
+out_bctl:
+ kfree(bctl);
out_bargs:
kfree(bargs);
out_unlock:
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index aa72bfd28f7d..a739b825bdd3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1920,10 +1920,12 @@ static int did_overwrite_ref(struct send_ctx *sctx,
/*
* We know that it is or will be overwritten. Check this now.
* The current inode being processed might have been the one that caused
- * inode 'ino' to be orphanized, therefore ow_inode can actually be the
- * same as sctx->send_progress.
+ * inode 'ino' to be orphanized, therefore check if ow_inode matches
+ * the current inode being processed.
*/
- if (ow_inode <= sctx->send_progress)
+ if ((ow_inode < sctx->send_progress) ||
+ (ino != sctx->cur_ino && ow_inode == sctx->cur_ino &&
+ gen == sctx->cur_inode_gen))
ret = 1;
else
ret = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 2b07b3581781..11d1eab9234d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
* groups on disk until we're mounted read-write again
* unless we clean them up here.
*/
- mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_delete_unused_bgs(fs_info);
- mutex_unlock(&root->fs_info->cleaner_mutex);
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8f259b3a66b3..a5b06442f0bf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
btrfs_unpin_free_ino(root);
clear_btree_io_tree(&root->dirty_log_pages);
}
+
+ /* We can free old roots now. */
+ spin_lock(&trans->dropped_roots_lock);
+ while (!list_empty(&trans->dropped_roots)) {
+ root = list_first_entry(&trans->dropped_roots,
+ struct btrfs_root, root_list);
+ list_del_init(&root->root_list);
+ spin_unlock(&trans->dropped_roots_lock);
+ btrfs_drop_and_free_fs_root(fs_info, root);
+ spin_lock(&trans->dropped_roots_lock);
+ }
+ spin_unlock(&trans->dropped_roots_lock);
up_write(&fs_info->commit_root_sem);
}
@@ -255,11 +267,13 @@ loop:
INIT_LIST_HEAD(&cur_trans->pending_ordered);
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
INIT_LIST_HEAD(&cur_trans->io_bgs);
+ INIT_LIST_HEAD(&cur_trans->dropped_roots);
mutex_init(&cur_trans->cache_write_mutex);
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->deleted_bgs_lock);
+ spin_lock_init(&cur_trans->dropped_roots_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);
@@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
}
+void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_transaction *cur_trans = trans->transaction;
+
+ /* Add ourselves to the transaction dropped list */
+ spin_lock(&cur_trans->dropped_roots_lock);
+ list_add_tail(&root->root_list, &cur_trans->dropped_roots);
+ spin_unlock(&cur_trans->dropped_roots_lock);
+
+ /* Make sure we don't try to update the root at commit time */
+ spin_lock(&root->fs_info->fs_roots_radix_lock);
+ radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&root->fs_info->fs_roots_radix_lock);
+}
+
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
@@ -525,6 +557,7 @@ again:
h->delayed_ref_elem.seq = 0;
h->type = type;
h->allocating_chunk = false;
+ h->can_flush_pending_bgs = true;
h->reloc_reserved = false;
h->sync = false;
INIT_LIST_HEAD(&h->qgroup_ref_list);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index edc2fbc262d7..a994bb097ee5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -65,6 +65,7 @@ struct btrfs_transaction {
struct list_head switch_commits;
struct list_head dirty_bgs;
struct list_head io_bgs;
+ struct list_head dropped_roots;
u64 num_dirty_bgs;
/*
@@ -76,6 +77,7 @@ struct btrfs_transaction {
spinlock_t dirty_bgs_lock;
struct list_head deleted_bgs;
spinlock_t deleted_bgs_lock;
+ spinlock_t dropped_roots_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
int dirty_bg_run;
@@ -116,6 +118,7 @@ struct btrfs_trans_handle {
short aborted;
short adding_csums;
bool allocating_chunk;
+ bool can_flush_pending_bgs;
bool reloc_reserved;
bool sync;
unsigned int type;
@@ -216,5 +219,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
void btrfs_put_transaction(struct btrfs_transaction *transaction);
void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
-
+void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
#endif
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2ca784a14e84..595279a8b99f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -376,6 +376,14 @@ struct map_lookup {
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
+#define BTRFS_BALANCE_ARGS_MASK \
+ (BTRFS_BALANCE_ARGS_PROFILES | \
+ BTRFS_BALANCE_ARGS_USAGE | \
+ BTRFS_BALANCE_ARGS_DEVID | \
+ BTRFS_BALANCE_ARGS_DRANGE | \
+ BTRFS_BALANCE_ARGS_VRANGE | \
+ BTRFS_BALANCE_ARGS_LIMIT)
+
/*
* Profile changing flags. When SOFT is set we won't relocate chunk if
* it already has the target profile (even though it may be