aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/backref.c103
-rw-r--r--fs/btrfs/ctree.c34
-rw-r--r--fs/btrfs/delayed-inode.c19
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/export.c2
-rw-r--r--fs/btrfs/export.h2
-rw-r--r--fs/btrfs/extent-tree.c12
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/extent_io.h2
-rw-r--r--fs/btrfs/free-space-cache.c7
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ioctl.c60
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/qgroup.c26
-rw-r--r--fs/btrfs/raid56.c74
-rw-r--r--fs/btrfs/rcu-string.h6
-rw-r--r--fs/btrfs/relocation.c14
-rw-r--r--fs/btrfs/send.c23
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/tests/btrfs-tests.c2
-rw-r--r--fs/btrfs/tests/qgroup-tests.c36
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/tree-log.c38
-rw-r--r--fs/btrfs/volumes.c19
-rw-r--r--fs/btrfs/xattr.c3
25 files changed, 394 insertions, 158 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 6b8824de2abb..a1eb573fbb10 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -136,6 +136,7 @@ struct share_check {
u64 root_objectid;
u64 inum;
int share_count;
+ bool have_delayed_delete_refs;
};
static inline int extent_is_shared(struct share_check *sc)
@@ -588,6 +589,18 @@ unode_aux_to_inode_list(struct ulist_node *node)
return (struct extent_inode_elem *)(uintptr_t)node->aux;
}
+static void free_leaf_list(struct ulist *ulist)
+{
+ struct ulist_node *node;
+ struct ulist_iterator uiter;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((node = ulist_next(ulist, &uiter)))
+ free_inode_elem_list(unode_aux_to_inode_list(node));
+
+ ulist_free(ulist);
+}
+
/*
* We maintain three seperate rbtrees: one for direct refs, one for
* indirect refs which have a key, and one for indirect refs which do not
@@ -702,7 +715,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
cond_resched();
}
out:
- ulist_free(parents);
+ /*
+ * We may have inode lists attached to refs in the parents ulist, so we
+ * must free them before freeing the ulist and its refs.
+ */
+ free_leaf_list(parents);
return ret;
}
@@ -760,16 +777,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
struct share_check *sc)
{
struct btrfs_delayed_ref_node *node;
- struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct btrfs_key key;
- struct btrfs_key tmp_op_key;
struct rb_node *n;
int count;
int ret = 0;
- if (extent_op && extent_op->update_key)
- btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key);
-
spin_lock(&head->lock);
for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) {
node = rb_entry(n, struct btrfs_delayed_ref_node,
@@ -796,10 +808,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
case BTRFS_TREE_BLOCK_REF_KEY: {
/* NORMAL INDIRECT METADATA backref */
struct btrfs_delayed_tree_ref *ref;
+ struct btrfs_key *key_ptr = NULL;
+
+ if (head->extent_op && head->extent_op->update_key) {
+ btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
+ key_ptr = &key;
+ }
ref = btrfs_delayed_node_to_tree_ref(node);
ret = add_indirect_ref(fs_info, preftrees, ref->root,
- &tmp_op_key, ref->level + 1,
+ key_ptr, ref->level + 1,
node->bytenr, count, sc,
GFP_ATOMIC);
break;
@@ -825,13 +843,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
key.offset = ref->offset;
/*
- * Found a inum that doesn't match our known inum, we
- * know it's shared.
+ * If we have a share check context and a reference for
+ * another inode, we can't exit immediately. This is
+ * because even if this is a BTRFS_ADD_DELAYED_REF
+ * reference we may find next a BTRFS_DROP_DELAYED_REF
+ * which cancels out this ADD reference.
+ *
+ * If this is a DROP reference and there was no previous
+ * ADD reference, then we need to signal that when we
+ * process references from the extent tree (through
+ * add_inline_refs() and add_keyed_refs()), we should
+ * not exit early if we find a reference for another
+ * inode, because one of the delayed DROP references
+ * may cancel that reference in the extent tree.
*/
- if (sc && sc->inum && ref->objectid != sc->inum) {
- ret = BACKREF_FOUND_SHARED;
- goto out;
- }
+ if (sc && count < 0)
+ sc->have_delayed_delete_refs = true;
ret = add_indirect_ref(fs_info, preftrees, ref->root,
&key, 0, node->bytenr, count, sc,
@@ -861,7 +888,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
}
if (!ret)
ret = extent_is_shared(sc);
-out:
+
spin_unlock(&head->lock);
return ret;
}
@@ -965,7 +992,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -975,6 +1003,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
ret = add_indirect_ref(fs_info, preftrees, root,
&key, 0, bytenr, count,
sc, GFP_NOFS);
+
break;
}
default:
@@ -1064,7 +1093,8 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info,
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
- if (sc && sc->inum && key.objectid != sc->inum) {
+ if (sc && sc->inum && key.objectid != sc->inum &&
+ !sc->have_delayed_delete_refs) {
ret = BACKREF_FOUND_SHARED;
break;
}
@@ -1160,7 +1190,12 @@ again:
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret == 0);
+ if (ret == 0) {
+ /* This shouldn't happen, indicates a bug or fs corruption. */
+ ASSERT(ret != 0);
+ ret = -EUCLEAN;
+ goto out;
+ }
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (trans && likely(trans->type != __TRANS_DUMMY) &&
@@ -1308,10 +1343,18 @@ again:
goto out;
if (!ret && extent_item_pos) {
/*
- * we've recorded that parent, so we must extend
- * its inode list here
+ * We've recorded that parent, so we must extend
+ * its inode list here.
+ *
+ * However if there was corruption we may not
+ * have found an eie, return an error in this
+ * case.
*/
- BUG_ON(!eie);
+ ASSERT(eie);
+ if (!eie) {
+ ret = -EUCLEAN;
+ goto out;
+ }
while (eie->next)
eie = eie->next;
eie->next = ref->inode_list;
@@ -1333,24 +1376,6 @@ out:
return ret;
}
-static void free_leaf_list(struct ulist *blocks)
-{
- struct ulist_node *node = NULL;
- struct extent_inode_elem *eie;
- struct ulist_iterator uiter;
-
- ULIST_ITER_INIT(&uiter);
- while ((node = ulist_next(blocks, &uiter))) {
- if (!node->aux)
- continue;
- eie = unode_aux_to_inode_list(node);
- free_inode_elem_list(eie);
- node->aux = 0;
- }
-
- ulist_free(blocks);
-}
-
/*
* Finds all leafs with a reference to the specified combination of bytenr and
* offset. key_list_head will point to a list of corresponding keys (caller must
@@ -1477,6 +1502,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
.root_objectid = root->objectid,
.inum = inum,
.share_count = 0,
+ .have_delayed_delete_refs = false,
};
tmp = ulist_alloc(GFP_NOFS);
@@ -1515,6 +1541,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
break;
bytenr = node->val;
shared.share_count = 0;
+ shared.have_delayed_delete_refs = false;
cond_resched();
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 00dc1b5c8737..e48c6d7a860f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -3590,6 +3590,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
if (ret) {
+ btrfs_tree_unlock(split);
+ free_extent_buffer(split);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -5151,10 +5153,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
+ struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
if (key.offset > 0) {
key.offset--;
@@ -5171,8 +5175,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret <= 0)
return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 1fbe2dee1e70..469a90b07d3f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1171,20 +1171,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
- btrfs_release_delayed_node(curr_node);
- curr_node = NULL;
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
+ /*
+ * See the comment below about releasing path before releasing
+ * node. If the commit of delayed items was successful the path
+ * should always be released, but in case of an error, it may
+ * point to locked extent buffers (a leaf at the very least).
+ */
+ ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
}
+ /*
+ * Release the path to avoid a potential deadlock and lockdep splat when
+ * releasing the delayed node, as that requires taking the delayed node's
+ * mutex. If another task starts running delayed items before we take
+ * the mutex, it will first lock the mutex and then it may try to lock
+ * the same btree path (leaf).
+ */
+ btrfs_free_path(path);
+
if (curr_node)
btrfs_release_delayed_node(curr_node);
- btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 2ac920bdf4df..437ca4691967 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2855,7 +2855,7 @@ int open_ctree(struct super_block *sb,
~BTRFS_FEATURE_INCOMPAT_SUPP;
if (features) {
btrfs_err(fs_info,
- "cannot mount because of unsupported optional features (%llx)",
+ "cannot mount because of unsupported optional features (0x%llx)",
features);
err = -EINVAL;
goto fail_alloc;
@@ -2915,11 +2915,25 @@ int open_ctree(struct super_block *sb,
~BTRFS_FEATURE_COMPAT_RO_SUPP;
if (!sb_rdonly(sb) && features) {
btrfs_err(fs_info,
- "cannot mount read-write because of unsupported optional features (%llx)",
+ "cannot mount read-write because of unsupported optional features (0x%llx)",
features);
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We have unsupported RO compat features, although RO mounted, we
+ * should not cause any metadata write, including log replay.
+ * Or we could screw up whatever the new feature requires.
+ */
+ if (unlikely(features && btrfs_super_log_root(disk_super) &&
+ !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
+ btrfs_err(fs_info,
+"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
+ features);
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
ret = btrfs_init_workqueues(fs_info, fs_devices);
if (ret) {
@@ -4334,7 +4348,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
*/
inode = igrab(&btrfs_inode->vfs_inode);
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
iput(inode);
}
spin_lock(&root->delalloc_lock);
@@ -4452,7 +4470,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache)
inode = cache->io_ctl.inode;
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
+
BTRFS_I(inode)->generation = 0;
cache->io_ctl.inode = NULL;
iput(inode);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 665ec85cb09b..ecc33e3a3c06 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -58,7 +58,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
}
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
+ u64 root_objectid, u64 generation,
int check_generation)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index f32f4113c976..5afb7ca42828 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -19,7 +19,7 @@ struct btrfs_fid {
} __attribute__ ((packed));
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
+ u64 root_objectid, u64 generation,
int check_generation);
struct dentry *btrfs_get_parent(struct dentry *child);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bf46ed74eae6..902ab00bfd7a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1549,6 +1549,11 @@ again:
err = -ENOENT;
goto out;
} else if (WARN_ON(ret)) {
+ btrfs_print_leaf(path->nodes[0]);
+ btrfs_err(fs_info,
+"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
+ bytenr, num_bytes, parent, root_objectid, owner,
+ offset);
err = -EIO;
goto out;
}
@@ -2322,12 +2327,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent = ref->parent;
ref_root = ref->root;
- if (node->ref_mod != 1) {
+ if (unlikely(node->ref_mod != 1)) {
btrfs_err(trans->fs_info,
- "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+ "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
node->bytenr, node->ref_mod, node->action, ref_root,
parent);
- return -EIO;
+ return -EUCLEAN;
}
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
@@ -8327,6 +8332,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
out_free_delayed:
btrfs_free_delayed_extent_op(extent_op);
out_free_buf:
+ btrfs_tree_unlock(buf);
free_extent_buffer(buf);
out_free_reserved:
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index dabf153843e9..504d63fb81fa 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3928,11 +3928,12 @@ retry:
free_extent_buffer(eb);
/*
- * the filesystem may choose to bump up nr_to_write.
+ * The filesystem may choose to bump up nr_to_write.
* We have to make sure to honor the new nr_to_write
- * at any time
+ * at any time.
*/
- nr_to_write_done = wbc->nr_to_write <= 0;
+ nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
+ wbc->nr_to_write <= 0);
}
pagevec_release(&pvec);
cond_resched();
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index d5089cadd7c4..35a21a602e3a 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -197,7 +197,7 @@ struct extent_buffer {
*/
struct extent_changeset {
/* How many bytes are set/cleared in this operation */
- unsigned int bytes_changed;
+ u64 bytes_changed;
/* Changed ranges */
struct ulist range_changed;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6511cb71986c..b623e9f3b4c4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -783,15 +783,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
- ctl->total_bitmaps++;
- ctl->op->recalc_thresholds(ctl);
- spin_unlock(&ctl->tree_lock);
if (ret) {
+ spin_unlock(&ctl->tree_lock);
btrfs_err(fs_info,
"Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
+ ctl->total_bitmaps++;
+ ctl->op->recalc_thresholds(ctl);
+ spin_unlock(&ctl->tree_lock);
list_add_tail(&e->list, &bitmaps);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f314b2c2d148..e4a4074ef33d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6620,7 +6620,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -6684,7 +6684,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -6837,7 +6837,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_fail;
@@ -9819,7 +9819,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
u64 objectid;
u64 index;
- ret = btrfs_find_free_ino(root, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret)
return ret;
@@ -10316,7 +10316,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -10600,7 +10600,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_find_free_ino(root, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret)
goto out;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 717385b7f66f..c76277ccf03b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1789,6 +1789,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
* are limited to own subvolumes only
*/
ret = -EPERM;
+ } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * Snapshots must be made with the src_inode referring
+ * to the subvolume inode, otherwise the permission
+ * checking above is useless because we may have
+ * permission on a lower directory but not the subvol
+ * itself.
+ */
+ ret = -EINVAL;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
@@ -2045,7 +2054,7 @@ static noinline int key_in_sk(struct btrfs_key *key,
static noinline int copy_to_sk(struct btrfs_path *path,
struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf,
unsigned long *sk_offset,
int *num_found)
@@ -2177,7 +2186,7 @@ out:
static noinline int search_ioctl(struct inode *inode,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf)
{
struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
@@ -2249,7 +2258,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
struct btrfs_ioctl_search_key sk;
struct inode *inode;
int ret;
- size_t buf_size;
+ u64 buf_size;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2283,8 +2292,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
struct btrfs_ioctl_search_args_v2 args;
struct inode *inode;
int ret;
- size_t buf_size;
- const size_t buf_limit = SZ_16M;
+ u64 buf_size;
+ const u64 buf_limit = SZ_16M;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2759,6 +2768,8 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
}
}
+ btrfs_free_path(path);
+ path = NULL;
if (copy_to_user(argp, subvol_info, sizeof(*subvol_info)))
ret = -EFAULT;
@@ -2849,6 +2860,8 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
}
out:
+ btrfs_free_path(path);
+
if (!ret || ret == -EOVERFLOW) {
rootrefs->num_items = found;
/* update min_treeid for next search */
@@ -2860,7 +2873,6 @@ out:
}
kfree(rootrefs);
- btrfs_free_path(path);
return ret;
}
@@ -3231,13 +3243,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
di_args->bytes_used = btrfs_device_get_bytes_used(dev);
di_args->total_bytes = btrfs_device_get_total_bytes(dev);
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
- if (dev->name) {
- strncpy(di_args->path, rcu_str_deref(dev->name),
- sizeof(di_args->path) - 1);
- di_args->path[sizeof(di_args->path) - 1] = 0;
- } else {
+ if (dev->name)
+ strscpy(di_args->path, rcu_str_deref(dev->name), sizeof(di_args->path));
+ else
di_args->path[0] = '\0';
- }
out:
rcu_read_unlock();
@@ -4526,7 +4535,7 @@ static void get_block_group_info(struct list_head *groups_list,
static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
void __user *arg)
{
- struct btrfs_ioctl_space_args space_args;
+ struct btrfs_ioctl_space_args space_args = { 0 };
struct btrfs_ioctl_space_info space;
struct btrfs_ioctl_space_info *dest;
struct btrfs_ioctl_space_info *dest_orig;
@@ -4722,6 +4731,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
if (IS_ERR(sa))
return PTR_ERR(sa);
+ if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
ret = mnt_want_write_file(file);
if (ret)
@@ -4886,6 +4900,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
ipath->fspath->val[i] = rel_ptr;
}
+ btrfs_free_path(path);
+ path = NULL;
ret = copy_to_user((void __user *)(unsigned long)ipa->fspath,
ipath->fspath, size);
if (ret) {
@@ -4956,21 +4972,20 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
size = min_t(u32, loi->size, SZ_16M);
}
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
inodes = init_data_container(size);
if (IS_ERR(inodes)) {
ret = PTR_ERR(inodes);
- inodes = NULL;
- goto out;
+ goto out_loi;
}
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
ret = iterate_inodes_from_logical(loi->logical, fs_info, path,
build_ino_list, inodes, ignore_offset);
+ btrfs_free_path(path);
if (ret == -EINVAL)
ret = -ENOENT;
if (ret < 0)
@@ -4982,7 +4997,6 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info,
ret = -EFAULT;
out:
- btrfs_free_path(path);
kvfree(inodes);
out_loi:
kfree(loi);
@@ -5879,7 +5893,7 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
- struct btrfs_ioctl_send_args_32 args32;
+ struct btrfs_ioctl_send_args_32 args32 = { 0 };
ret = copy_from_user(&args32, argp, sizeof(args32));
if (ret)
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 4b217e9a581c..e3de0c4ecbfc 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -109,10 +109,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
pr_cont("shared data backref parent %llu count %u\n",
offset, btrfs_shared_data_ref_count(eb, sref));
/*
- * offset is supposed to be a tree block which
- * must be aligned to nodesize.
+ * Offset is supposed to be a tree block which must be
+ * aligned to sectorsize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
pr_info(
"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
offset, eb->fs_info->sectorsize);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 7bda5586e433..ef95525fa6cd 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1035,6 +1035,21 @@ out_add_root:
fs_info->qgroup_rescan_running = true;
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
+ } else {
+ /*
+ * We have set both BTRFS_FS_QUOTA_ENABLED and
+ * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with
+ * -EINPROGRESS. That can happen because someone started the
+ * rescan worker by calling quota rescan ioctl before we
+ * attempted to initialize the rescan worker. Failure due to
+ * quotas disabled in the meanwhile is not possible, because
+ * we are holding a write lock on fs_info->subvol_sem, which
+ * is also acquired when disabling quotas.
+ * Ignore such error, and any other error would need to undo
+ * everything we did in the transaction we just committed.
+ */
+ ASSERT(ret == -EINPROGRESS);
+ ret = 0;
}
out_free_path:
@@ -1100,7 +1115,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
goto end_trans;
}
+ spin_lock(&fs_info->trans_lock);
list_del(&quota_root->dirty_list);
+ spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(quota_root->node);
clean_tree_block(fs_info, quota_root->node);
@@ -2353,14 +2370,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
dstgroup->rsv_excl = inherit->lim.rsv_excl;
- ret = update_qgroup_limit_item(trans, dstgroup);
- if (ret) {
- fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
- btrfs_info(fs_info,
- "unable to update quota limit for %llu",
- dstgroup->qgroupid);
- goto unlock;
- }
+ qgroup_dirty(fs_info, dstgroup);
}
if (srcid) {
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index a91f74cf5cd1..0ce7ab8f875a 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -318,6 +318,9 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
{
bio_list_merge(&dest->bio_list, &victim->bio_list);
dest->bio_list_bytes += victim->bio_list_bytes;
+ /* Also inherit the bitmaps from @victim. */
+ bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
+ dest->stripe_npages);
dest->generic_bio_cnt += victim->generic_bio_cnt;
bio_list_init(&victim->bio_list);
}
@@ -862,6 +865,12 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
+ /*
+ * Clear the data bitmap, as the rbio may be cached for later usage.
+ * do this before before unlock_stripe() so there will be no new bio
+ * for this bio.
+ */
+ bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);
/*
* At this moment, rbio->bio_list is empty, however since rbio does not
@@ -1196,6 +1205,9 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
else
BUG();
+ /* We should have at least one data sector. */
+ ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));
+
/* at this point we either have a full stripe,
* or we've read the full stripe from the drive.
* recalculate the parity and write the new results.
@@ -1269,6 +1281,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
+
+ /* This vertical stripe has no data, skip it. */
+ if (!test_bit(pagenr, rbio->dbitmap))
+ continue;
+
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
if (!page)
@@ -1293,6 +1310,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
+
+ /* This vertical stripe has no data, skip it. */
+ if (!test_bit(pagenr, rbio->dbitmap))
+ continue;
+
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
if (!page)
@@ -1733,6 +1755,33 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
run_plug(plug);
}
+/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
+static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
+{
+ const struct btrfs_fs_info *fs_info = rbio->fs_info;
+ const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ const u64 full_stripe_start = rbio->bbio->raid_map[0];
+ const u32 orig_len = orig_bio->bi_iter.bi_size;
+ const u32 sectorsize = fs_info->sectorsize;
+ u64 cur_logical;
+
+ ASSERT(orig_logical >= full_stripe_start &&
+ orig_logical + orig_len <= full_stripe_start +
+ rbio->nr_data * rbio->stripe_len);
+
+ bio_list_add(&rbio->bio_list, orig_bio);
+ rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
+
+ /* Update the dbitmap. */
+ for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
+ cur_logical += sectorsize) {
+ int bit = ((u32)(cur_logical - full_stripe_start) >>
+ PAGE_SHIFT) % rbio->stripe_npages;
+
+ set_bit(bit, rbio->dbitmap);
+ }
+}
+
/*
* our main entry point for writes from the rest of the FS.
*/
@@ -1749,9 +1798,8 @@ int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
btrfs_put_bbio(bbio);
return PTR_ERR(rbio);
}
- bio_list_add(&rbio->bio_list, bio);
- rbio->bio_list_bytes = bio->bi_iter.bi_size;
rbio->operation = BTRFS_RBIO_WRITE;
+ rbio_add_bio(rbio, bio);
btrfs_bio_counter_inc_noblocked(fs_info);
rbio->generic_bio_cnt = 1;
@@ -2053,9 +2101,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
atomic_set(&rbio->error, 0);
/*
- * read everything that hasn't failed. Thanks to the
- * stripe cache, it is possible that some or all of these
- * pages are going to be uptodate.
+ * Read everything that hasn't failed. However this time we will
+ * not trust any cached sector.
+ * As we may read out some stale data but higher layer is not reading
+ * that stale part.
+ *
+ * So here we always re-read everything in recovery path.
*/
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
if (rbio->faila == stripe || rbio->failb == stripe) {
@@ -2064,16 +2115,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
}
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
- struct page *p;
-
- /*
- * the rmw code may have already read this
- * page in
- */
- p = rbio_stripe_page(rbio, stripe, pagenr);
- if (PageUptodate(p))
- continue;
-
ret = rbio_add_io_page(rbio, &bio_list,
rbio_stripe_page(rbio, stripe, pagenr),
stripe, pagenr, rbio->stripe_len);
@@ -2155,8 +2196,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
}
rbio->operation = BTRFS_RBIO_READ_REBUILD;
- bio_list_add(&rbio->bio_list, bio);
- rbio->bio_list_bytes = bio->bi_iter.bi_size;
+ rbio_add_bio(rbio, bio);
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
index a97dc74a4d3d..02f15321cecc 100644
--- a/fs/btrfs/rcu-string.h
+++ b/fs/btrfs/rcu-string.h
@@ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
(len * sizeof(char)), mask);
if (!ret)
return ret;
- strncpy(ret->str, src, len);
+ /* Warn if the source got unexpectedly truncated. */
+ if (WARN_ON(strscpy(ret->str, src, len) < 0)) {
+ kfree(ret);
+ return NULL;
+ }
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 06c6a66a991f..3b9318a3d421 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2341,7 +2341,7 @@ again:
list_splice(&reloc_roots, &rc->reloc_roots);
if (!err)
- btrfs_commit_transaction(trans);
+ err = btrfs_commit_transaction(trans);
else
btrfs_end_transaction(trans);
return err;
@@ -3930,8 +3930,12 @@ int prepare_to_relocate(struct reloc_control *rc)
*/
return PTR_ERR(trans);
}
- btrfs_commit_transaction(trans);
- return 0;
+
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+ unset_reloc_control(rc);
+
+ return ret;
}
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
@@ -4097,7 +4101,9 @@ restart:
err = PTR_ERR(trans);
goto out_free;
}
- btrfs_commit_transaction(trans);
+ ret = btrfs_commit_transaction(trans);
+ if (ret && !err)
+ err = ret;
out_free:
btrfs_free_block_rsv(fs_info, rc->block_rsv);
btrfs_free_path(path);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1967d5fa681a..cb584c044f8a 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1306,6 +1306,7 @@ static int find_extent_clone(struct send_ctx *sctx,
u64 disk_byte;
u64 num_bytes;
u64 extent_item_pos;
+ u64 extent_refs;
u64 flags = 0;
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb = path->nodes[0];
@@ -1373,14 +1374,22 @@ static int find_extent_clone(struct send_ctx *sctx,
ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
struct btrfs_extent_item);
+ extent_refs = btrfs_extent_refs(tmp_path->nodes[0], ei);
/*
* Backreference walking (iterate_extent_inodes() below) is currently
* too expensive when an extent has a large number of references, both
* in time spent and used memory. So for now just fallback to write
* operations instead of clone operations when an extent has more than
* a certain amount of references.
+ *
+ * Also, if we have only one reference and only the send root as a clone
+ * source - meaning no clone roots were given in the struct
+ * btrfs_ioctl_send_args passed to the send ioctl - then it's our
+ * reference and there's no point in doing backref walking which is
+ * expensive, so exit early.
*/
- if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
+ if ((extent_refs == 1 && sctx->clone_roots_cnt == 1) ||
+ extent_refs > SEND_MAX_EXTENT_REFS) {
ret = -ENOENT;
goto out;
}
@@ -4956,6 +4965,10 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
+ btrfs_err(fs_info,
+ "send: IO error at offset %llu for inode %llu root %llu",
+ page_offset(page), sctx->cur_ino,
+ sctx->send_root->root_key.objectid);
put_page(page);
ret = -EIO;
break;
@@ -6813,10 +6826,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
/*
* Check that we don't overflow at later allocations, we request
* clone_sources_count + 1 items, and compare to unsigned long inside
- * access_ok.
+ * access_ok. Also set an upper limit for allocation size so this can't
+ * easily exhaust memory. Max number of clone sources is about 200K.
*/
- if (arg->clone_sources_count >
- ULONG_MAX / sizeof(struct clone_root) - 1) {
+ if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
ret = -EINVAL;
goto out;
}
@@ -6847,7 +6860,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
sctx->flags = arg->flags;
sctx->send_filp = fget(arg->send_fd);
- if (!sctx->send_filp) {
+ if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 521f6c2091ad..a59543951851 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2196,7 +2196,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* calculated f_bavail.
*/
if (!mixed && block_rsv->space_info->full &&
- total_free_meta - thresh < block_rsv->size)
+ (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC;
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 82d874b10438..86c6ff2cc689 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -174,7 +174,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
void btrfs_free_dummy_root(struct btrfs_root *root)
{
- if (!root)
+ if (IS_ERR_OR_NULL(root))
return;
/* Will be freed by btrfs_free_fs_roots */
if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index d07dd26194b1..9b43907324a3 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -230,21 +230,21 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
BTRFS_FS_TREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -256,31 +256,33 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
}
+ /* btrfs_qgroup_account_extent() always frees the ulists passed to it. */
+ old_roots = NULL;
+ new_roots = NULL;
+
if (btrfs_verify_qgroup_counts(fs_info, BTRFS_FS_TREE_OBJECTID,
nodesize, nodesize)) {
test_err("qgroup counts didn't match expected values");
return -EINVAL;
}
- old_roots = NULL;
- new_roots = NULL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = remove_extent_item(root, nodesize, nodesize);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return -EINVAL;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -331,21 +333,21 @@ static int test_multiple_refs(struct btrfs_root *root,
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = insert_normal_tree_ref(root, nodesize, nodesize, 0,
BTRFS_FS_TREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -366,21 +368,21 @@ static int test_multiple_refs(struct btrfs_root *root,
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = add_tree_ref(root, nodesize, nodesize, 0,
BTRFS_FIRST_FREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
@@ -407,21 +409,21 @@ static int test_multiple_refs(struct btrfs_root *root,
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
if (ret) {
- ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
ret = remove_extent_ref(root, nodesize, nodesize, 0,
BTRFS_FIRST_FREE_OBJECTID);
- if (ret)
+ if (ret) {
+ ulist_free(old_roots);
return ret;
+ }
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
if (ret) {
ulist_free(old_roots);
- ulist_free(new_roots);
test_err("couldn't find old roots: %d", ret);
return ret;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 049535115c9d..a34c0436ebb1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -200,10 +200,11 @@ loop:
spin_unlock(&fs_info->trans_lock);
/*
- * If we are ATTACH, we just want to catch the current transaction,
- * and commit it. If there is no transaction, just return ENOENT.
+ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
+ * current transaction, and commit it. If there is no transaction, just
+ * return ENOENT.
*/
- if (type == TRANS_ATTACH)
+ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
return -ENOENT;
/*
@@ -703,8 +704,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
trans = start_transaction(root, 0, TRANS_ATTACH,
BTRFS_RESERVE_NO_FLUSH, true);
- if (trans == ERR_PTR(-ENOENT))
- btrfs_wait_for_commit(root->fs_info, 0);
+ if (trans == ERR_PTR(-ENOENT)) {
+ int ret;
+
+ ret = btrfs_wait_for_commit(root->fs_info, 0);
+ if (ret)
+ return ERR_PTR(ret);
+ }
return trans;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1f5facdd19dc..23ec766eeb0a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1081,7 +1081,9 @@ again:
extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
inode_objectid, parent_objectid, 0,
0);
- if (!IS_ERR_OR_NULL(extref)) {
+ if (IS_ERR(extref)) {
+ return PTR_ERR(extref);
+ } else if (extref) {
u32 item_size;
u32 cur_offset = 0;
unsigned long base;
@@ -1289,6 +1291,15 @@ again:
inode, name, namelen);
kfree(name);
iput(dir);
+ /*
+ * Whenever we need to check if a name exists or not, we
+ * check the subvolume tree. So after an unlink we must
+ * run delayed items, so that future checks for a name
+ * during log replay see that the name does not exists
+ * anymore.
+ */
+ if (!ret)
+ ret = btrfs_run_delayed_items(trans);
if (ret)
goto out;
goto again;
@@ -1480,6 +1491,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
*/
if (!ret && inode->i_nlink == 0)
inc_nlink(inode);
+ /*
+ * Whenever we need to check if a name exists or
+ * not, we check the subvolume tree. So after an
+ * unlink we must run delayed items, so that future
+ * checks for a name during log replay see that the
+ * name does not exists anymore.
+ */
+ if (!ret)
+ ret = btrfs_run_delayed_items(trans);
}
if (ret < 0)
goto out;
@@ -4216,7 +4236,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
int slot;
int ins_nr = 0;
- int start_slot;
+ int start_slot = 0;
int ret;
if (!(inode->flags & BTRFS_INODE_PREALLOC))
@@ -4889,6 +4909,18 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
}
/*
+ * For symlinks, we must always log their content, which is stored in an
+ * inline extent, otherwise we could end up with an empty symlink after
+ * log replay, which is invalid on linux (symlink(2) returns -ENOENT if
+ * one attempts to create an empty symlink).
+ * We don't need to worry about flushing delalloc, because when we create
+ * the inline extent when the symlink is created (we never have delalloc
+ * for symlinks).
+ */
+ if (S_ISLNK(inode->vfs_inode.i_mode))
+ inode_only = LOG_INODE_ALL;
+
+ /*
* a brute force approach to making sure we get the most uptodate
* copies of everything.
*/
@@ -5444,7 +5476,7 @@ process_leaf:
}
ctx->log_new_dentries = false;
- if (type == BTRFS_FT_DIR || type == BTRFS_FT_SYMLINK)
+ if (type == BTRFS_FT_DIR)
log_mode = LOG_INODE_ALL;
ret = btrfs_log_inode(trans, root, BTRFS_I(di_inode),
log_mode, 0, LLONG_MAX, ctx);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8f05e6a472c3..ceced5e56c5a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1418,7 +1418,7 @@ again:
goto out;
}
- while (1) {
+ while (search_start < search_end) {
l = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(l)) {
@@ -1441,6 +1441,9 @@ again:
if (key.type != BTRFS_DEV_EXTENT_KEY)
goto next;
+ if (key.offset > search_end)
+ break;
+
if (key.offset > search_start) {
hole_size = key.offset - search_start;
@@ -1515,6 +1518,7 @@ next:
else
ret = 0;
+ ASSERT(max_hole_start + max_hole_size <= search_end);
out:
btrfs_free_path(path);
*start = max_hole_start;
@@ -2756,7 +2760,7 @@ static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
return ERR_PTR(-EINVAL);
}
- if (em->start > logical || em->start + em->len < logical) {
+ if (em->start > logical || em->start + em->len <= logical) {
btrfs_crit(fs_info,
"found a bad mapping, wanted %llu-%llu, found %llu-%llu",
logical, length, em->start, em->start + em->len);
@@ -4102,8 +4106,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
}
}
- BUG_ON(fs_info->balance_ctl ||
- test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
return 0;
@@ -6925,12 +6928,12 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
* do another round of validation checks.
*/
if (total_dev != fs_info->fs_devices->total_devices) {
- btrfs_err(fs_info,
- "super_num_devices %llu mismatch with num_devices %llu found here",
+ btrfs_warn(fs_info,
+"super block num_devices %llu mismatch with DEV_ITEM count %llu, will be repaired on next transaction commit",
btrfs_super_num_devices(fs_info->super_copy),
total_dev);
- ret = -EINVAL;
- goto error;
+ fs_info->fs_devices->total_devices = total_dev;
+ btrfs_set_super_num_devices(fs_info->super_copy, total_dev);
}
if (btrfs_super_total_bytes(fs_info->super_copy) <
fs_info->fs_devices->total_rw_bytes) {
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index f141b45ce349..6adee94637a9 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -369,6 +369,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
const char *name, const void *buffer,
size_t size, int flags)
{
+ if (btrfs_root_readonly(BTRFS_I(inode)->root))
+ return -EROFS;
+
name = xattr_full_name(handler, name);
return btrfs_setxattr(NULL, inode, name, buffer, size, flags);
}