aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c2
-rw-r--r--fs/afs/dir.c108
-rw-r--r--fs/afs/dir_silly.c22
-rw-r--r--fs/afs/fsclient.c27
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/rotate.c6
-rw-r--r--fs/afs/server.c7
-rw-r--r--fs/afs/volume.c8
-rw-r--r--fs/afs/yfsclient.c26
-rw-r--r--fs/block_dev.c4
-rw-r--r--fs/btrfs/async-thread.c8
-rw-r--r--fs/btrfs/async-thread.h1
-rw-r--r--fs/btrfs/block-group.c2
-rw-r--r--fs/btrfs/delayed-inode.c13
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent_io.c35
-rw-r--r--fs/btrfs/file.c11
-rw-r--r--fs/btrfs/qgroup.c11
-rw-r--r--fs/btrfs/relocation.c66
-rw-r--r--fs/btrfs/space-info.c43
-rw-r--r--fs/buffer.c11
-rw-r--r--fs/ceph/caps.c8
-rw-r--r--fs/ceph/export.c5
-rw-r--r--fs/ceph/file.c173
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/inode.c23
-rw-r--r--fs/cifs/smb2misc.c14
-rw-r--r--fs/cifs/smb2ops.c9
-rw-r--r--fs/cifs/transport.c28
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/debugfs/file.c18
-rw-r--r--fs/erofs/utils.c2
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext2/xattr.c8
-rw-r--r--fs/ext4/extents.c8
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c6
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/super.c14
-rw-r--r--fs/f2fs/checkpoint.c24
-rw-r--r--fs/f2fs/compress.c63
-rw-r--r--fs/f2fs/data.c35
-rw-r--r--fs/f2fs/f2fs.h102
-rw-r--r--fs/f2fs/file.c13
-rw-r--r--fs/f2fs/gc.c27
-rw-r--r--fs/f2fs/inode.c2
-rw-r--r--fs/f2fs/node.c7
-rw-r--r--fs/f2fs/super.c14
-rw-r--r--fs/filesystems.c4
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/glops.c27
-rw-r--r--fs/gfs2/log.c19
-rw-r--r--fs/gfs2/log.h1
-rw-r--r--fs/hfsplus/attributes.c4
-rw-r--r--fs/io_uring.c36
-rw-r--r--fs/jbd2/commit.c7
-rw-r--r--fs/libfs.c8
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/fs_context.c5
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/namespace.c12
-rw-r--r--fs/nfs/nfs4file.c3
-rw-r--r--fs/nfs/pagelist.c59
-rw-r--r--fs/nfs/write.c1
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/overlayfs/inode.c4
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/pstore/inode.c5
-rw-r--r--fs/pstore/platform.c4
-rw-r--r--fs/ubifs/orphan.c4
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c2
-rw-r--r--fs/xfs/xfs_attr_inactive.c2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c12
-rw-r--r--fs/xfs/xfs_icache.c10
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_log.c13
-rw-r--r--fs/xfs/xfs_reflink.c1
-rw-r--r--fs/xfs/xfs_super.c3
-rw-r--r--fs/xfs/xfs_trans_ail.c4
85 files changed, 911 insertions, 457 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 6765949b3aab..380ad5ace7cf 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -169,7 +169,7 @@ static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
spin_lock(&server->probe_lock);
- if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
+ if (!test_and_set_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
server->cm_epoch = call->epoch;
server->probe.cm_epoch = call->epoch;
goto out;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5c794f4b051a..d1e1caa23c8b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1032,7 +1032,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
struct dentry *parent;
struct inode *inode;
struct key *key;
- afs_dataversion_t dir_version;
+ afs_dataversion_t dir_version, invalid_before;
long de_version;
int ret;
@@ -1084,8 +1084,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
if (de_version == (long)dir_version)
goto out_valid_noupdate;
- dir_version = dir->invalid_before;
- if (de_version - (long)dir_version >= 0)
+ invalid_before = dir->invalid_before;
+ if (de_version - (long)invalid_before >= 0)
goto out_valid;
_debug("dir modified");
@@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
+ afs_dataversion_t data_version;
int ret;
mode |= S_IFDIR;
@@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
- afs_dataversion_t data_version = dvnode->status.data_version + 1;
+ data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto error_key;
}
- if (ret == 0 &&
- test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
- afs_edit_dir_for_create);
+ if (ret == 0) {
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == data_version)
+ afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
+ afs_edit_dir_for_create);
+ up_write(&dvnode->validate_lock);
+ }
key_put(key);
kfree(scb);
@@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key;
+ afs_dataversion_t data_version;
int ret;
_enter("{%llx:%llu},{%pd}",
@@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
- afs_dataversion_t data_version = dvnode->status.data_version + 1;
+ data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = afs_end_vnode_operation(&fc);
if (ret == 0) {
afs_dir_remove_subdir(dentry);
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == data_version)
afs_edit_dir_remove(dvnode, &dentry->d_name,
afs_edit_dir_for_rmdir);
+ up_write(&dvnode->validate_lock);
}
}
@@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
ret = afs_end_vnode_operation(&fc);
if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
ret = afs_dir_remove_link(dvnode, dentry, key);
- if (ret == 0 &&
- test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_remove(dvnode, &dentry->d_name,
- afs_edit_dir_for_unlink);
+
+ if (ret == 0) {
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == data_version)
+ afs_edit_dir_remove(dvnode, &dentry->d_name,
+ afs_edit_dir_for_unlink);
+ up_write(&dvnode->validate_lock);
+ }
}
if (need_rehash && ret < 0 && ret != -ENOENT)
@@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
+ afs_dataversion_t data_version;
int ret;
mode |= S_IFREG;
@@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
- afs_dataversion_t data_version = dvnode->status.data_version + 1;
+ data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
goto error_key;
}
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
+ up_write(&dvnode->validate_lock);
kfree(scb);
key_put(key);
@@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
struct key *key;
+ afs_dataversion_t data_version;
int ret;
_enter("{%llx:%llu},{%llx:%llu},{%pd}",
@@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
- afs_dataversion_t data_version = dvnode->status.data_version + 1;
+ data_version = dvnode->status.data_version + 1;
if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc);
@@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir,
goto error_key;
}
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid,
afs_edit_dir_for_link);
+ up_write(&dvnode->validate_lock);
key_put(key);
kfree(scb);
@@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key;
+ afs_dataversion_t data_version;
int ret;
_enter("{%llx:%llu},{%pd},%s",
@@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
- afs_dataversion_t data_version = dvnode->status.data_version + 1;
+ data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
goto error_key;
}
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_symlink);
+ up_write(&dvnode->validate_lock);
key_put(key);
kfree(scb);
@@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *tmp = NULL, *rehash = NULL;
struct inode *new_inode;
struct key *key;
+ afs_dataversion_t orig_data_version;
+ afs_dataversion_t new_data_version;
bool new_negative = d_is_negative(new_dentry);
int ret;
@@ -1890,10 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
- afs_dataversion_t orig_data_version;
- afs_dataversion_t new_data_version;
- struct afs_status_cb *new_scb = &scb[1];
-
orig_data_version = orig_dvnode->status.data_version + 1;
if (orig_dvnode != new_dvnode) {
@@ -1904,7 +1928,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_data_version = new_dvnode->status.data_version + 1;
} else {
new_data_version = orig_data_version;
- new_scb = &scb[0];
}
while (afs_select_fileserver(&fc)) {
@@ -1912,7 +1935,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode);
afs_fs_rename(&fc, old_dentry->d_name.name,
new_dvnode, new_dentry->d_name.name,
- &scb[0], new_scb);
+ &scb[0], &scb[1]);
}
afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break,
@@ -1930,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret == 0) {
if (rehash)
d_rehash(rehash);
- if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
- afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
- afs_edit_dir_for_rename_0);
+ down_write(&orig_dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
+ orig_dvnode->status.data_version == orig_data_version)
+ afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
+ afs_edit_dir_for_rename_0);
+ if (orig_dvnode != new_dvnode) {
+ up_write(&orig_dvnode->validate_lock);
- if (!new_negative &&
- test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
- afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
- afs_edit_dir_for_rename_1);
+ down_write(&new_dvnode->validate_lock);
+ }
+ if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
+ orig_dvnode->status.data_version == new_data_version) {
+ if (!new_negative)
+ afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
+ afs_edit_dir_for_rename_1);
- if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
&vnode->fid, afs_edit_dir_for_rename_2);
+ }
new_inode = d_inode(new_dentry);
if (new_inode) {
@@ -1957,14 +1987,10 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
* Note that if we ever implement RENAME_EXCHANGE, we'll have
* to update both dentries with opposing dir versions.
*/
- if (new_dvnode != orig_dvnode) {
- afs_update_dentry_version(&fc, old_dentry, &scb[1]);
- afs_update_dentry_version(&fc, new_dentry, &scb[1]);
- } else {
- afs_update_dentry_version(&fc, old_dentry, &scb[0]);
- afs_update_dentry_version(&fc, new_dentry, &scb[0]);
- }
+ afs_update_dentry_version(&fc, old_dentry, &scb[1]);
+ afs_update_dentry_version(&fc, new_dentry, &scb[1]);
d_move(old_dentry, new_dentry);
+ up_write(&new_dvnode->validate_lock);
goto error_tmp;
}
diff --git a/fs/afs/dir_silly.c b/fs/afs/dir_silly.c
index 361088a5edb9..d94e2b7cddff 100644
--- a/fs/afs/dir_silly.c
+++ b/fs/afs/dir_silly.c
@@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
{
struct afs_fs_cursor fc;
struct afs_status_cb *scb;
+ afs_dataversion_t dir_data_version;
int ret = -ERESTARTSYS;
_enter("%pd,%pd", old, new);
@@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
trace_afs_silly_rename(vnode, false);
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
- afs_dataversion_t dir_data_version = dvnode->status.data_version + 1;
+ dir_data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode);
@@ -54,12 +55,15 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
dvnode->silly_key = key_get(key);
}
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == dir_data_version) {
afs_edit_dir_remove(dvnode, &old->d_name,
afs_edit_dir_for_silly_0);
- if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_add(dvnode, &new->d_name,
&vnode->fid, afs_edit_dir_for_silly_1);
+ }
+ up_write(&dvnode->validate_lock);
}
kfree(scb);
@@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
}
}
- if (ret == 0 &&
- test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- afs_edit_dir_remove(dvnode, &dentry->d_name,
- afs_edit_dir_for_unlink);
+ if (ret == 0) {
+ down_write(&dvnode->validate_lock);
+ if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ dvnode->status.data_version == dir_data_version)
+ afs_edit_dir_remove(dvnode, &dentry->d_name,
+ afs_edit_dir_for_unlink);
+ up_write(&dvnode->validate_lock);
+ }
}
kfree(scb);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 1f9c5d8e6fe5..68fc46634346 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -65,6 +65,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus);
u64 data_version, size;
u32 type, abort_code;
+ int ret;
abort_code = ntohl(xdr->abort_code);
@@ -78,7 +79,7 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
*/
status->abort_code = abort_code;
scb->have_error = true;
- return 0;
+ goto good;
}
pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
@@ -87,7 +88,8 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
if (abort_code != 0 && inline_error) {
status->abort_code = abort_code;
- return 0;
+ scb->have_error = true;
+ goto good;
}
type = ntohl(xdr->type);
@@ -123,13 +125,16 @@ static int xdr_decode_AFSFetchStatus(const __be32 **_bp,
data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
status->data_version = data_version;
scb->have_status = true;
-
+good:
+ ret = 0;
+advance:
*_bp = (const void *)*_bp + sizeof(*xdr);
- return 0;
+ return ret;
bad:
xdr_dump_bad(*_bp);
- return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+ ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+ goto advance;
}
static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
@@ -981,16 +986,16 @@ static int afs_deliver_fs_rename(struct afs_call *call)
if (ret < 0)
return ret;
- /* unmarshall the reply once we've received all of it */
+ /* If the two dirs are the same, we have two copies of the same status
+ * report, so we just decode it twice.
+ */
bp = call->buffer;
ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- if (call->out_dir_scb != call->out_scb) {
- ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- }
+ ret = xdr_decode_AFSFetchStatus(&bp, call, call->out_scb);
+ if (ret < 0)
+ return ret;
xdr_decode_AFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ef732dd4e7ef..15ae9c7f9c00 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1335,7 +1335,7 @@ extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
extern void afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *);
extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
-extern int afs_check_volume_status(struct afs_volume *, struct key *);
+extern int afs_check_volume_status(struct afs_volume *, struct afs_fs_cursor *);
/*
* write.c
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 172ba569cd60..2a3305e42b14 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -192,7 +192,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
write_unlock(&vnode->volume->servers_lock);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
- error = afs_check_volume_status(vnode->volume, fc->key);
+ error = afs_check_volume_status(vnode->volume, fc);
if (error < 0)
goto failed_set_error;
@@ -281,7 +281,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
- error = afs_check_volume_status(vnode->volume, fc->key);
+ error = afs_check_volume_status(vnode->volume, fc);
if (error < 0)
goto failed_set_error;
@@ -341,7 +341,7 @@ start:
/* See if we need to do an update of the volume record. Note that the
* volume may have moved or even have been deleted.
*/
- error = afs_check_volume_status(vnode->volume, fc->key);
+ error = afs_check_volume_status(vnode->volume, fc);
if (error < 0)
goto failed_set_error;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index b7f3cb2130ca..11b90ac7ea30 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -594,12 +594,9 @@ retry:
}
ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
- TASK_INTERRUPTIBLE);
+ (fc->flags & AFS_FS_CURSOR_INTR) ?
+ TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
if (ret == -ERESTARTSYS) {
- if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
- _leave(" = t [intr]");
- return true;
- }
fc->error = ret;
_leave(" = f [intr]");
return false;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 92ca5e27573b..4310336b9bb8 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -281,7 +281,7 @@ error:
/*
* Make sure the volume record is up to date.
*/
-int afs_check_volume_status(struct afs_volume *volume, struct key *key)
+int afs_check_volume_status(struct afs_volume *volume, struct afs_fs_cursor *fc)
{
time64_t now = ktime_get_real_seconds();
int ret, retries = 0;
@@ -299,7 +299,7 @@ retry:
}
if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
- ret = afs_update_volume_status(volume, key);
+ ret = afs_update_volume_status(volume, fc->key);
clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
@@ -312,7 +312,9 @@ retry:
return 0;
}
- ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, TASK_INTERRUPTIBLE);
+ ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT,
+ (fc->flags & AFS_FS_CURSOR_INTR) ?
+ TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
if (ret == -ERESTARTSYS) {
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index a26126ac7bf1..b5b45c57e1b1 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -165,15 +165,15 @@ static void xdr_dump_bad(const __be32 *bp)
int i;
pr_notice("YFS XDR: Bad status record\n");
- for (i = 0; i < 5 * 4 * 4; i += 16) {
+ for (i = 0; i < 6 * 4 * 4; i += 16) {
memcpy(x, bp, 16);
bp += 4;
pr_notice("%03x: %08x %08x %08x %08x\n",
i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
}
- memcpy(x, bp, 4);
- pr_notice("0x50: %08x\n", ntohl(x[0]));
+ memcpy(x, bp, 8);
+ pr_notice("0x60: %08x %08x\n", ntohl(x[0]), ntohl(x[1]));
}
/*
@@ -186,13 +186,14 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
struct afs_file_status *status = &scb->status;
u32 type;
+ int ret;
status->abort_code = ntohl(xdr->abort_code);
if (status->abort_code != 0) {
if (status->abort_code == VNOVNODE)
status->nlink = 0;
scb->have_error = true;
- return 0;
+ goto good;
}
type = ntohl(xdr->type);
@@ -220,13 +221,16 @@ static int xdr_decode_YFSFetchStatus(const __be32 **_bp,
status->size = xdr_to_u64(xdr->size);
status->data_version = xdr_to_u64(xdr->data_version);
scb->have_status = true;
-
+good:
+ ret = 0;
+advance:
*_bp += xdr_size(xdr);
- return 0;
+ return ret;
bad:
xdr_dump_bad(*_bp);
- return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+ ret = afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+ goto advance;
}
/*
@@ -1153,11 +1157,9 @@ static int yfs_deliver_fs_rename(struct afs_call *call)
ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_dir_scb);
if (ret < 0)
return ret;
- if (call->out_dir_scb != call->out_scb) {
- ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
- if (ret < 0)
- return ret;
- }
+ ret = xdr_decode_YFSFetchStatus(&bp, call, call->out_scb);
+ if (ret < 0)
+ return ret;
xdr_decode_YFSVolSync(&bp, call->out_volsync);
_leave(" = 0 [done]");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d612468ee66b..34644ce4b502 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -34,6 +34,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/falloc.h>
#include <linux/uaccess.h>
+#include <linux/suspend.h>
#include "internal.h"
struct bdev_inode {
@@ -1975,7 +1976,8 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (bdev_read_only(I_BDEV(bd_inode)))
return -EPERM;
- if (IS_SWAPFILE(bd_inode))
+ /* uswsusp needs write permission to the swap */
+ if (IS_SWAPFILE(bd_inode) && !hibernation_available())
return -ETXTBSY;
if (!iov_iter_count(from))
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 1d32a07bb2d1..309516e6a968 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -395,3 +395,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work)
{
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}
+
+void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
+{
+ if (wq->high)
+ flush_workqueue(wq->high->normal_wq);
+
+ flush_workqueue(wq->normal->normal_wq);
+}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index a4434301d84d..3204daa51b95 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -44,5 +44,6 @@ void btrfs_set_work_high_priority(struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
+void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
#endif
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 7f09147872dc..c9a3bbc8c6af 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1987,6 +1987,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
btrfs_release_path(path);
}
+ rcu_read_lock();
list_for_each_entry_rcu(space_info, &info->space_info, list) {
if (!(btrfs_get_alloc_profile(info, space_info->flags) &
(BTRFS_BLOCK_GROUP_RAID10 |
@@ -2007,6 +2008,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
list)
inc_block_group_ro(cache, 1);
}
+ rcu_read_unlock();
btrfs_init_global_block_rsv(info);
ret = check_chunk_block_group_mappings(info);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index d3e15e1d4a91..18509746208b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -6,6 +6,7 @@
#include <linux/slab.h>
#include <linux/iversion.h>
+#include <linux/sched/mm.h>
#include "misc.h"
#include "delayed-inode.h"
#include "disk-io.h"
@@ -805,11 +806,14 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_delayed_item *delayed_item)
{
struct extent_buffer *leaf;
+ unsigned int nofs_flag;
char *ptr;
int ret;
+ nofs_flag = memalloc_nofs_save();
ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
delayed_item->data_len);
+ memalloc_nofs_restore(nofs_flag);
if (ret < 0 && ret != -EEXIST)
return ret;
@@ -937,6 +941,7 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_delayed_item *curr, *prev;
+ unsigned int nofs_flag;
int ret = 0;
do_again:
@@ -945,7 +950,9 @@ do_again:
if (!curr)
goto delete_fail;
+ nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
+ memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto delete_fail;
else if (ret > 0) {
@@ -1012,6 +1019,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
+ unsigned int nofs_flag;
int mod;
int ret;
@@ -1024,7 +1032,9 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
else
mod = 1;
+ nofs_flag = memalloc_nofs_save();
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
+ memalloc_nofs_restore(nofs_flag);
if (ret > 0) {
btrfs_release_path(path);
return -ENOENT;
@@ -1075,7 +1085,10 @@ search:
key.type = BTRFS_INODE_EXTREF_KEY;
key.offset = -1;
+
+ nofs_flag = memalloc_nofs_save();
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ memalloc_nofs_restore(nofs_flag);
if (ret < 0)
goto err_out;
ASSERT(ret);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c6c9a6a8e6c8..29fc96dfa508 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3061,6 +3061,18 @@ int __cold open_ctree(struct super_block *sb,
if (ret)
goto fail_tree_roots;
+ /*
+ * If we have a uuid root and we're not being told to rescan we need to
+ * check the generation here so we can set the
+ * BTRFS_FS_UPDATE_UUID_TREE_GEN bit. Otherwise we could commit the
+ * transaction during a balance or the log replay without updating the
+ * uuid generation, and then if we crash we would rescan the uuid tree,
+ * even though it was perfectly fine.
+ */
+ if (fs_info->uuid_root && !btrfs_test_opt(fs_info, RESCAN_UUID_TREE) &&
+ fs_info->generation == btrfs_super_uuid_tree_generation(disk_super))
+ set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
+
ret = btrfs_verify_dev_extents(fs_info);
if (ret) {
btrfs_err(fs_info,
@@ -3285,8 +3297,6 @@ int __cold open_ctree(struct super_block *sb,
close_ctree(fs_info);
return ret;
}
- } else {
- set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
}
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
@@ -3990,6 +4000,19 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
*/
btrfs_delete_unused_bgs(fs_info);
+ /*
+ * There might be existing delayed inode workers still running
+ * and holding an empty delayed inode item. We must wait for
+ * them to complete first because they can create a transaction.
+ * This happens when someone calls btrfs_balance_delayed_items()
+ * and then a transaction commit runs the same delayed nodes
+ * before any delayed worker has done something with the nodes.
+ * We must wait for any worker here and not at transaction
+ * commit time since that could cause a deadlock.
+ * This is a very rare case.
+ */
+ btrfs_flush_workqueue(fs_info->delayed_workers);
+
ret = btrfs_commit_super(fs_info);
if (ret)
btrfs_err(fs_info, "commit super ret %d", ret);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c0f202741e09..a4128dedbbf4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3905,6 +3905,7 @@ int btree_write_cache_pages(struct address_space *mapping,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
+ struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
int ret = 0;
int done = 0;
int nr_to_write_done = 0;
@@ -4018,7 +4019,39 @@ retry:
end_write_bio(&epd, ret);
return ret;
}
- ret = flush_write_bio(&epd);
+ /*
+ * If something went wrong, don't allow any metadata write bio to be
+ * submitted.
+ *
+ * This would prevent use-after-free if we had dirty pages not
+ * cleaned up, which can still happen by fuzzed images.
+ *
+ * - Bad extent tree
+ * Allowing existing tree block to be allocated for other trees.
+ *
+ * - Log tree operations
+ * Exiting tree blocks get allocated to log tree, bumps its
+ * generation, then get cleaned in tree re-balance.
+ * Such tree block will not be written back, since it's clean,
+ * thus no WRITTEN flag set.
+ * And after log writes back, this tree block is not traced by
+ * any dirty extent_io_tree.
+ *
+ * - Offending tree block gets re-dirtied from its original owner
+ * Since it has bumped generation, no WRITTEN flag, it can be
+ * reused without COWing. This tree block will not be traced
+ * by btrfs_transaction::dirty_pages.
+ *
+ * Now such dirty tree block will not be cleaned by any dirty
+ * extent io tree. Thus we don't want to submit such wild eb
+ * if the fs already has error.
+ */
+ if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+ ret = flush_write_bio(&epd);
+ } else {
+ ret = -EUCLEAN;
+ end_write_bio(&epd, ret);
+ }
return ret;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a16da274c9aa..3aa31bd7d4ad 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2071,6 +2071,16 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
btrfs_init_log_ctx(&ctx, inode);
/*
+ * Set the range to full if the NO_HOLES feature is not enabled.
+ * This is to avoid missing file extent items representing holes after
+ * replaying the log.
+ */
+ if (!btrfs_fs_incompat(fs_info, NO_HOLES)) {
+ start = 0;
+ end = LLONG_MAX;
+ }
+
+ /*
* We write the dirty pages in the range and wait until they complete
* out of the ->i_mutex. If so, we can flush the dirty pages by
* multi-task, and make the performance up. See
@@ -2124,6 +2134,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
ret = start_ordered_ops(inode, start, end);
if (ret) {
+ up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
goto out;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index ff1870ff3474..afc9752e984c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1030,6 +1030,7 @@ out_add_root:
ret = qgroup_rescan_init(fs_info, 0, 1);
if (!ret) {
qgroup_rescan_zero_tracking(fs_info);
+ fs_info->qgroup_rescan_running = true;
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
}
@@ -3263,7 +3264,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
sizeof(fs_info->qgroup_rescan_progress));
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
init_completion(&fs_info->qgroup_rescan_completion);
- fs_info->qgroup_rescan_running = true;
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
@@ -3326,8 +3326,11 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
qgroup_rescan_zero_tracking(fs_info);
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_rescan_running = true;
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
return 0;
}
@@ -3363,9 +3366,13 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
void
btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
{
- if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_rescan_running = true;
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ }
}
/*
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 995d4b8b1cfd..696e769d069a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -561,8 +561,8 @@ static int should_ignore_root(struct btrfs_root *root)
if (!reloc_root)
return 0;
- if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
- root->fs_info->running_transaction->transid - 1)
+ if (btrfs_header_generation(reloc_root->commit_root) ==
+ root->fs_info->running_transaction->transid)
return 0;
/*
* if there is reloc tree and it was created in previous
@@ -1186,7 +1186,7 @@ out:
free_backref_node(cache, lower);
}
- free_backref_node(cache, node);
+ remove_backref_node(cache, node);
return ERR_PTR(err);
}
ASSERT(!node || !node->detached);
@@ -1298,7 +1298,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
if (!node)
return -ENOMEM;
- node->bytenr = root->node->start;
+ node->bytenr = root->commit_root->start;
node->data = root;
spin_lock(&rc->reloc_root_tree.lock);
@@ -1329,10 +1329,11 @@ static void __del_reloc_root(struct btrfs_root *root)
if (rc && root->node) {
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
+ RB_CLEAR_NODE(&node->rb_node);
}
spin_unlock(&rc->reloc_root_tree.lock);
if (!node)
@@ -1350,7 +1351,7 @@ static void __del_reloc_root(struct btrfs_root *root)
* helper to update the 'address of tree root -> reloc tree'
* mapping
*/
-static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
+static int __update_reloc_root(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *rb_node;
@@ -1359,7 +1360,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
spin_lock(&rc->reloc_root_tree.lock);
rb_node = tree_search(&rc->reloc_root_tree.rb_root,
- root->node->start);
+ root->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct mapping_node, rb_node);
rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
@@ -1371,7 +1372,7 @@ static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr)
BUG_ON((struct btrfs_root *)node->data != root);
spin_lock(&rc->reloc_root_tree.lock);
- node->bytenr = new_bytenr;
+ node->bytenr = root->node->start;
rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
@@ -1529,6 +1530,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
}
if (reloc_root->commit_root != reloc_root->node) {
+ __update_reloc_root(reloc_root);
btrfs_set_root_node(root_item, reloc_root->node);
free_extent_buffer(reloc_root->commit_root);
reloc_root->commit_root = btrfs_root_node(reloc_root);
@@ -2561,7 +2563,21 @@ out:
free_reloc_roots(&reloc_roots);
}
- BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ /*
+ * We used to have
+ *
+ * BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
+ *
+ * here, but it's wrong. If we fail to start the transaction in
+ * prepare_to_merge() we will have only 0 ref reloc roots, none of which
+ * have actually been removed from the reloc_root_tree rb tree. This is
+ * fine because we're bailing here, and we hold a reference on the root
+ * for the list that holds it, so these roots will be cleaned up when we
+ * do the reloc_dirty_list afterwards. Meanwhile the root->reloc_root
+ * will be cleaned up on unmount.
+ *
+ * The remaining nodes will be cleaned up by free_reloc_control.
+ */
}
static void free_block_list(struct rb_root *blocks)
@@ -3161,9 +3177,8 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
ret = relocate_tree_block(trans, rc, node, &block->key,
path);
if (ret < 0) {
- if (ret != -EAGAIN || &block->rb_node == rb_first(blocks))
- err = ret;
- goto out;
+ err = ret;
+ break;
}
}
out:
@@ -4137,12 +4152,6 @@ restart:
if (!RB_EMPTY_ROOT(&blocks)) {
ret = relocate_tree_blocks(trans, rc, &blocks);
if (ret < 0) {
- /*
- * if we fail to relocate tree blocks, force to update
- * backref cache when committing transaction.
- */
- rc->backref_cache.last_trans = trans->transid - 1;
-
if (ret != -EAGAIN) {
err = ret;
break;
@@ -4212,10 +4221,10 @@ restart:
goto out_free;
}
btrfs_commit_transaction(trans);
+out_free:
ret = clean_dirty_subvols(rc);
if (ret < 0 && !err)
err = ret;
-out_free:
btrfs_free_block_rsv(fs_info, rc->block_rsv);
btrfs_free_path(path);
return err;
@@ -4584,9 +4593,8 @@ int btrfs_recover_relocation(struct btrfs_root *root)
trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) {
- unset_reloc_control(rc);
err = PTR_ERR(trans);
- goto out_free;
+ goto out_unset;
}
rc->merge_reloc_tree = 1;
@@ -4606,7 +4614,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
list_add_tail(&reloc_root->root_list, &reloc_roots);
- goto out_free;
+ goto out_unset;
}
err = __add_reloc_root(reloc_root);
@@ -4616,7 +4624,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
err = btrfs_commit_transaction(trans);
if (err)
- goto out_free;
+ goto out_unset;
merge_reloc_roots(rc);
@@ -4625,14 +4633,15 @@ int btrfs_recover_relocation(struct btrfs_root *root)
trans = btrfs_join_transaction(rc->extent_root);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto out_free;
+ goto out_clean;
}
err = btrfs_commit_transaction(trans);
-
+out_clean:
ret = clean_dirty_subvols(rc);
if (ret < 0 && !err)
err = ret;
-out_free:
+out_unset:
+ unset_reloc_control(rc);
kfree(rc);
out:
if (!list_empty(&reloc_roots))
@@ -4720,11 +4729,6 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
- if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
- if (buf == root->node)
- __update_reloc_root(root, cow->start);
- }
-
level = btrfs_header_level(buf);
if (btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item))
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 01297c5b2666..8c03f6737882 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -159,25 +159,19 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
return (global->size << 1);
}
-int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info, u64 bytes,
- enum btrfs_reserve_flush_enum flush)
+static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ enum btrfs_reserve_flush_enum flush)
{
u64 profile;
u64 avail;
- u64 used;
int factor;
- /* Don't overcommit when in mixed mode. */
- if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
- return 0;
-
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
profile = btrfs_system_alloc_profile(fs_info);
else
profile = btrfs_metadata_alloc_profile(fs_info);
- used = btrfs_space_info_used(space_info, true);
avail = atomic64_read(&fs_info->free_chunk_space);
/*
@@ -198,6 +192,22 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
avail >>= 3;
else
avail >>= 1;
+ return avail;
+}
+
+int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info, u64 bytes,
+ enum btrfs_reserve_flush_enum flush)
+{
+ u64 avail;
+ u64 used;
+
+ /* Don't overcommit when in mixed mode */
+ if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
+ return 0;
+
+ used = btrfs_space_info_used(space_info, true);
+ avail = calc_available_free_space(fs_info, space_info, flush);
if (used + bytes < space_info->total_bytes + avail)
return 1;
@@ -629,6 +639,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
{
struct reserve_ticket *ticket;
u64 used;
+ u64 avail;
u64 expected;
u64 to_reclaim = 0;
@@ -636,6 +647,20 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
to_reclaim += ticket->bytes;
list_for_each_entry(ticket, &space_info->priority_tickets, list)
to_reclaim += ticket->bytes;
+
+ avail = calc_available_free_space(fs_info, space_info,
+ BTRFS_RESERVE_FLUSH_ALL);
+ used = btrfs_space_info_used(space_info, true);
+
+ /*
+ * We may be flushing because suddenly we have less space than we had
+ * before, and now we're well over-committed based on our current free
+ * space. If that's the case add in our overage so we make sure to put
+ * appropriate pressure on the flushing state machine.
+ */
+ if (space_info->total_bytes + avail < used)
+ to_reclaim += used - (space_info->total_bytes + avail);
+
if (to_reclaim)
return to_reclaim;
diff --git a/fs/buffer.c b/fs/buffer.c
index b8d28370cfd7..a50d928af641 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1377,6 +1377,17 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
}
EXPORT_SYMBOL(__breadahead);
+void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
+ gfp_t gfp)
+{
+ struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
+ if (likely(bh)) {
+ ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
+ brelse(bh);
+ }
+}
+EXPORT_SYMBOL(__breadahead_gfp);
+
/**
* __bread_gfp() - reads a specified block and returns the bh
* @bdev: the block_device to read from
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 28ae0c134700..d050acc1fd5d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1973,8 +1973,12 @@ retry_locked:
}
/* want more caps from mds? */
- if (want & ~(cap->mds_wanted | cap->issued))
- goto ack;
+ if (want & ~cap->mds_wanted) {
+ if (want & ~(cap->mds_wanted | cap->issued))
+ goto ack;
+ if (!__cap_is_valid(cap))
+ goto ack;
+ }
/* things we might delay */
if ((cap->issued & ~retain) == 0)
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index b6bfa94332c3..79dc06881e78 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -315,6 +315,11 @@ static struct dentry *__get_parent(struct super_block *sb,
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err) {
+ ceph_mdsc_put_request(req);
+ return ERR_PTR(err);
+ }
+
inode = req->r_target_inode;
if (inode)
ihold(inode);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5a478cd06e11..7f8c4e308301 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1944,6 +1944,71 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
return 0;
}
+static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off,
+ struct ceph_inode_info *dst_ci, u64 *dst_off,
+ struct ceph_fs_client *fsc,
+ size_t len, unsigned int flags)
+{
+ struct ceph_object_locator src_oloc, dst_oloc;
+ struct ceph_object_id src_oid, dst_oid;
+ size_t bytes = 0;
+ u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
+ u32 src_objlen, dst_objlen;
+ u32 object_size = src_ci->i_layout.object_size;
+ int ret;
+
+ src_oloc.pool = src_ci->i_layout.pool_id;
+ src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
+ dst_oloc.pool = dst_ci->i_layout.pool_id;
+ dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
+
+ while (len >= object_size) {
+ ceph_calc_file_object_mapping(&src_ci->i_layout, *src_off,
+ object_size, &src_objnum,
+ &src_objoff, &src_objlen);
+ ceph_calc_file_object_mapping(&dst_ci->i_layout, *dst_off,
+ object_size, &dst_objnum,
+ &dst_objoff, &dst_objlen);
+ ceph_oid_init(&src_oid);
+ ceph_oid_printf(&src_oid, "%llx.%08llx",
+ src_ci->i_vino.ino, src_objnum);
+ ceph_oid_init(&dst_oid);
+ ceph_oid_printf(&dst_oid, "%llx.%08llx",
+ dst_ci->i_vino.ino, dst_objnum);
+ /* Do an object remote copy */
+ ret = ceph_osdc_copy_from(&fsc->client->osdc,
+ src_ci->i_vino.snap, 0,
+ &src_oid, &src_oloc,
+ CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+ CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
+ &dst_oid, &dst_oloc,
+ CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+ CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
+ dst_ci->i_truncate_seq,
+ dst_ci->i_truncate_size,
+ CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+ if (ret) {
+ if (ret == -EOPNOTSUPP) {
+ fsc->have_copy_from2 = false;
+ pr_notice("OSDs don't support copy-from2; disabling copy offload\n");
+ }
+ dout("ceph_osdc_copy_from returned %d\n", ret);
+ if (!bytes)
+ bytes = ret;
+ goto out;
+ }
+ len -= object_size;
+ bytes += object_size;
+ *src_off += object_size;
+ *dst_off += object_size;
+ }
+
+out:
+ ceph_oloc_destroy(&src_oloc);
+ ceph_oloc_destroy(&dst_oloc);
+ return bytes;
+}
+
static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
struct file *dst_file, loff_t dst_off,
size_t len, unsigned int flags)
@@ -1954,14 +2019,11 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
struct ceph_inode_info *dst_ci = ceph_inode(dst_inode);
struct ceph_cap_flush *prealloc_cf;
struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode);
- struct ceph_object_locator src_oloc, dst_oloc;
- struct ceph_object_id src_oid, dst_oid;
- loff_t endoff = 0, size;
- ssize_t ret = -EIO;
+ loff_t size;
+ ssize_t ret = -EIO, bytes;
u64 src_objnum, dst_objnum, src_objoff, dst_objoff;
- u32 src_objlen, dst_objlen, object_size;
+ u32 src_objlen, dst_objlen;
int src_got = 0, dst_got = 0, err, dirty;
- bool do_final_copy = false;
if (src_inode->i_sb != dst_inode->i_sb) {
struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode);
@@ -2039,22 +2101,14 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
if (ret < 0)
goto out_caps;
- size = i_size_read(dst_inode);
- endoff = dst_off + len;
-
/* Drop dst file cached pages */
ret = invalidate_inode_pages2_range(dst_inode->i_mapping,
dst_off >> PAGE_SHIFT,
- endoff >> PAGE_SHIFT);
+ (dst_off + len) >> PAGE_SHIFT);
if (ret < 0) {
dout("Failed to invalidate inode pages (%zd)\n", ret);
ret = 0; /* XXX */
}
- src_oloc.pool = src_ci->i_layout.pool_id;
- src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
- dst_oloc.pool = dst_ci->i_layout.pool_id;
- dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
-
ceph_calc_file_object_mapping(&src_ci->i_layout, src_off,
src_ci->i_layout.object_size,
&src_objnum, &src_objoff, &src_objlen);
@@ -2073,6 +2127,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
* starting at the src_off
*/
if (src_objoff) {
+ dout("Initial partial copy of %u bytes\n", src_objlen);
+
/*
* we need to temporarily drop all caps as we'll be calling
* {read,write}_iter, which will get caps again.
@@ -2080,8 +2136,9 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got);
ret = do_splice_direct(src_file, &src_off, dst_file,
&dst_off, src_objlen, flags);
- if (ret < 0) {
- dout("do_splice_direct returned %d\n", err);
+ /* Abort on short copies or on error */
+ if (ret < src_objlen) {
+ dout("Failed partial copy (%zd)\n", ret);
goto out;
}
len -= ret;
@@ -2094,62 +2151,29 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
if (err < 0)
goto out_caps;
}
- object_size = src_ci->i_layout.object_size;
- while (len >= object_size) {
- ceph_calc_file_object_mapping(&src_ci->i_layout, src_off,
- object_size, &src_objnum,
- &src_objoff, &src_objlen);
- ceph_calc_file_object_mapping(&dst_ci->i_layout, dst_off,
- object_size, &dst_objnum,
- &dst_objoff, &dst_objlen);
- ceph_oid_init(&src_oid);
- ceph_oid_printf(&src_oid, "%llx.%08llx",
- src_ci->i_vino.ino, src_objnum);
- ceph_oid_init(&dst_oid);
- ceph_oid_printf(&dst_oid, "%llx.%08llx",
- dst_ci->i_vino.ino, dst_objnum);
- /* Do an object remote copy */
- err = ceph_osdc_copy_from(
- &src_fsc->client->osdc,
- src_ci->i_vino.snap, 0,
- &src_oid, &src_oloc,
- CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
- CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
- &dst_oid, &dst_oloc,
- CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
- CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
- dst_ci->i_truncate_seq, dst_ci->i_truncate_size,
- CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
- if (err) {
- if (err == -EOPNOTSUPP) {
- src_fsc->have_copy_from2 = false;
- pr_notice("OSDs don't support copy-from2; disabling copy offload\n");
- }
- dout("ceph_osdc_copy_from returned %d\n", err);
- if (!ret)
- ret = err;
- goto out_caps;
- }
- len -= object_size;
- src_off += object_size;
- dst_off += object_size;
- ret += object_size;
- }
- if (len)
- /* We still need one final local copy */
- do_final_copy = true;
+ size = i_size_read(dst_inode);
+ bytes = ceph_do_objects_copy(src_ci, &src_off, dst_ci, &dst_off,
+ src_fsc, len, flags);
+ if (bytes <= 0) {
+ if (!ret)
+ ret = bytes;
+ goto out_caps;
+ }
+ dout("Copied %zu bytes out of %zu\n", bytes, len);
+ len -= bytes;
+ ret += bytes;
file_update_time(dst_file);
inode_inc_iversion_raw(dst_inode);
- if (endoff > size) {
+ if (dst_off > size) {
int caps_flags = 0;
/* Let the MDS know about dst file size change */
- if (ceph_quota_is_max_bytes_approaching(dst_inode, endoff))
+ if (ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
caps_flags |= CHECK_CAPS_NODELAY;
- if (ceph_inode_set_size(dst_inode, endoff))
+ if (ceph_inode_set_size(dst_inode, dst_off))
caps_flags |= CHECK_CAPS_AUTHONLY;
if (caps_flags)
ceph_check_caps(dst_ci, caps_flags, NULL);
@@ -2165,15 +2189,18 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
out_caps:
put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got);
- if (do_final_copy) {
- err = do_splice_direct(src_file, &src_off, dst_file,
- &dst_off, len, flags);
- if (err < 0) {
- dout("do_splice_direct returned %d\n", err);
- goto out;
- }
- len -= err;
- ret += err;
+ /*
+ * Do the final manual copy if we still have some bytes left, unless
+ * there were errors in remote object copies (len >= object_size).
+ */
+ if (len && (len < src_ci->i_layout.object_size)) {
+ dout("Final partial copy of %zu bytes\n", len);
+ bytes = do_splice_direct(src_file, &src_off, dst_file,
+ &dst_off, len, flags);
+ if (bytes > 0)
+ ret += bytes;
+ else
+ dout("Failed partial copy (%zd)\n", bytes);
}
out:
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8f9d849a0012..5920820bfbd0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3841,7 +3841,7 @@ again:
if (rc == -ENODATA)
rc = 0;
- ctx->rc = (rc == 0) ? ctx->total_len : rc;
+ ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
mutex_unlock(&ctx->aio_mutex);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b16f8d23e97b..9458b1582342 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2516,25 +2516,26 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
/*
* Attempt to flush data before changing attributes. We need to do
- * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the
- * ownership or mode then we may also need to do this. Here, we take
- * the safe way out and just do the flush on all setattr requests. If
- * the flush returns error, store it to report later and continue.
+ * this for ATTR_SIZE and ATTR_MTIME. If the flush of the data
+ * returns error, store it to report later and continue.
*
* BB: This should be smarter. Why bother flushing pages that
* will be truncated anyway? Also, should we error out here if
- * the flush returns error?
+ * the flush returns error? Do we need to check for ATTR_MTIME_SET flag?
*/
- rc = filemap_write_and_wait(inode->i_mapping);
- if (is_interrupt_error(rc)) {
- rc = -ERESTARTSYS;
- goto cifs_setattr_exit;
+ if (attrs->ia_valid & (ATTR_MTIME | ATTR_SIZE | ATTR_CTIME)) {
+ rc = filemap_write_and_wait(inode->i_mapping);
+ if (is_interrupt_error(rc)) {
+ rc = -ERESTARTSYS;
+ goto cifs_setattr_exit;
+ }
+ mapping_set_error(inode->i_mapping, rc);
}
- mapping_set_error(inode->i_mapping, rc);
rc = 0;
- if (attrs->ia_valid & ATTR_MTIME) {
+ if ((attrs->ia_valid & ATTR_MTIME) &&
+ !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile);
if (!rc) {
tcon = tlink_tcon(wfile->tlink);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 0511aaf451d4..497afb0b9960 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -766,6 +766,20 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
spin_lock(&cifs_tcp_ses_lock);
+ if (tcon->tc_count <= 0) {
+ struct TCP_Server_Info *server = NULL;
+
+ WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative");
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ if (tcon->ses)
+ server = tcon->ses->server;
+
+ cifs_server_dbg(FYI, "tid=%u: tcon is closing, skipping async close retry of fid %llu %llu\n",
+ tcon->tid, persistent_fid, volatile_fid);
+
+ return 0;
+ }
tcon->tc_count++;
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index cfe9b800ea8c..cd0e7f5005cb 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -693,6 +693,11 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
if (smb3_encryption_required(tcon))
flags |= CIFS_TRANSFORM_REQ;
+ if (!server->ops->new_lease_key)
+ return -EIO;
+
+ server->ops->new_lease_key(pfid);
+
memset(rqst, 0, sizeof(rqst));
resp_buftype[0] = resp_buftype[1] = CIFS_NO_BUFFER;
memset(rsp_iov, 0, sizeof(rsp_iov));
@@ -3248,6 +3253,10 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
* Extending the file
*/
if ((keep_size == false) && i_size_read(inode) < off + len) {
+ rc = inode_newsize_ok(inode, off + len);
+ if (rc)
+ goto out;
+
if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0)
smb2_set_sparse(xid, tcon, cfile, inode, false);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index cb3ee916f527..c97570eb2c18 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -466,7 +466,7 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
struct smb_rqst *rqst, int flags)
{
struct kvec iov;
- struct smb2_transform_hdr tr_hdr;
+ struct smb2_transform_hdr *tr_hdr;
struct smb_rqst cur_rqst[MAX_COMPOUND];
int rc;
@@ -476,28 +476,34 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
if (num_rqst > MAX_COMPOUND - 1)
return -ENOMEM;
- memset(&cur_rqst[0], 0, sizeof(cur_rqst));
- memset(&iov, 0, sizeof(iov));
- memset(&tr_hdr, 0, sizeof(tr_hdr));
-
- iov.iov_base = &tr_hdr;
- iov.iov_len = sizeof(tr_hdr);
- cur_rqst[0].rq_iov = &iov;
- cur_rqst[0].rq_nvec = 1;
-
if (!server->ops->init_transform_rq) {
cifs_server_dbg(VFS, "Encryption requested but transform "
"callback is missing\n");
return -EIO;
}
+ tr_hdr = kmalloc(sizeof(*tr_hdr), GFP_NOFS);
+ if (!tr_hdr)
+ return -ENOMEM;
+
+ memset(&cur_rqst[0], 0, sizeof(cur_rqst));
+ memset(&iov, 0, sizeof(iov));
+ memset(tr_hdr, 0, sizeof(*tr_hdr));
+
+ iov.iov_base = tr_hdr;
+ iov.iov_len = sizeof(*tr_hdr);
+ cur_rqst[0].rq_iov = &iov;
+ cur_rqst[0].rq_nvec = 1;
+
rc = server->ops->init_transform_rq(server, num_rqst + 1,
&cur_rqst[0], rqst);
if (rc)
- return rc;
+ goto out;
rc = __smb_send_rqst(server, num_rqst + 1, &cur_rqst[0]);
smb3_free_compound_rqst(num_rqst, &cur_rqst[1]);
+out:
+ kfree(tr_hdr);
return rc;
}
diff --git a/fs/coredump.c b/fs/coredump.c
index f8296a82d01d..408418e6aa13 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -211,6 +211,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
return -ENOMEM;
(*argv)[(*argc)++] = 0;
++pat_ptr;
+ if (!(*pat_ptr))
+ return -ENOMEM;
}
/* Repeat as long as we have more pattern to process and more output
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index db987b5110a9..f34757e8f25f 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -175,8 +175,13 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
if (r)
goto out;
- real_fops = fops_get(real_fops);
- if (!real_fops) {
+ if (!fops_get(real_fops)) {
+#ifdef MODULE
+ if (real_fops->owner &&
+ real_fops->owner->state == MODULE_STATE_GOING)
+ goto out;
+#endif
+
/* Huh? Module did not clean up after itself at exit? */
WARN(1, "debugfs file owner did not clean up at exit: %pd",
dentry);
@@ -305,8 +310,13 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
if (r)
goto out;
- real_fops = fops_get(real_fops);
- if (!real_fops) {
+ if (!fops_get(real_fops)) {
+#ifdef MODULE
+ if (real_fops->owner &&
+ real_fops->owner->state == MODULE_STATE_GOING)
+ goto out;
+#endif
+
/* Huh? Module did not cleanup after itself at exit? */
WARN(1, "debugfs file owner did not clean up at exit: %pd",
dentry);
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index fddc5059c930..df42ea552a44 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -286,7 +286,7 @@ static unsigned long erofs_shrink_scan(struct shrinker *shrink,
spin_unlock(&erofs_sb_list_lock);
sbi->shrinker_run_no = run_no;
- freed += erofs_shrink_workstation(sbi, nr);
+ freed += erofs_shrink_workstation(sbi, nr - freed);
spin_lock(&erofs_sb_list_lock);
/* Get the next list element before we move this one */
diff --git a/fs/exec.c b/fs/exec.c
index 4bb61917a29a..60d0598ae3ad 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,7 +1387,7 @@ void setup_new_exec(struct linux_binprm * bprm)
/* An exec changes our domain. We are no longer part of the thread
group */
- current->self_exec_id++;
+ WRITE_ONCE(current->self_exec_id, current->self_exec_id + 1);
flush_signal_handlers(current, 0);
}
EXPORT_SYMBOL(setup_new_exec);
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 0456bc990b5e..62acbe27d8bf 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -56,6 +56,7 @@
#include <linux/buffer_head.h>
#include <linux/init.h>
+#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/mbcache.h>
#include <linux/quotaops.h>
@@ -84,8 +85,8 @@
printk("\n"); \
} while (0)
#else
-# define ea_idebug(f...)
-# define ea_bdebug(f...)
+# define ea_idebug(inode, f...) no_printk(f)
+# define ea_bdebug(bh, f...) no_printk(f)
#endif
static int ext2_xattr_set2(struct inode *, struct buffer_head *,
@@ -864,8 +865,7 @@ ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
true);
if (error) {
if (error == -EBUSY) {
- ea_bdebug(bh, "already in cache (%d cache entries)",
- atomic_read(&ext2_xattr_cache->c_entry_count));
+ ea_bdebug(bh, "already in cache");
error = 0;
}
} else
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 954013d6076b..c5e190fd4589 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3532,8 +3532,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
(unsigned long long)map->m_lblk, map_len);
sbi = EXT4_SB(inode->i_sb);
- eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
- inode->i_sb->s_blocksize_bits;
+ eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
+ >> inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map_len)
eof_block = map->m_lblk + map_len;
@@ -3785,8 +3785,8 @@ static int ext4_split_convert_extents(handle_t *handle,
__func__, inode->i_ino,
(unsigned long long)map->m_lblk, map->m_len);
- eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
- inode->i_sb->s_blocksize_bits;
+ eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
+ >> inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
eof_block = map->m_lblk + map->m_len;
/*
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f95ee99091e4..eab18b7b56e7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -663,7 +663,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* block has been written back to disk. (Yes, these values are
* somewhat arbitrary...)
*/
-#define RECENTCY_MIN 5
+#define RECENTCY_MIN 60
#define RECENTCY_DIRTY 300
static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fa0ff78dc033..4d3c81fd0902 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1974,7 +1974,7 @@ static int ext4_writepage(struct page *page,
bool keep_towrite = false;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
- ext4_invalidatepage(page, 0, PAGE_SIZE);
+ inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return -EIO;
}
@@ -4348,7 +4348,7 @@ make_io:
if (end > table)
end = table;
while (b <= end)
- sb_breadahead(sb, b++);
+ sb_breadahead_unmovable(sb, b++);
}
/*
@@ -4812,7 +4812,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode_info *ei)
{
struct inode *inode = &(ei->vfs_inode);
- u64 i_blocks = inode->i_blocks;
+ u64 i_blocks = READ_ONCE(inode->i_blocks);
struct super_block *sb = inode->i_sb;
if (i_blocks <= ~0U) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 51a78eb65f3c..2f7aebee1a7b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1936,7 +1936,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
int free;
free = e4b->bd_info->bb_free;
- BUG_ON(free <= 0);
+ if (WARN_ON(free <= 0))
+ return;
i = e4b->bd_info->bb_first_free;
@@ -1959,7 +1960,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
}
mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
- BUG_ON(ex.fe_len <= 0);
+ if (WARN_ON(ex.fe_len <= 0))
+ break;
if (free < ex.fe_len) {
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
"%d free clusters as per "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0c7c4adb664e..446158ab507d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -372,7 +372,8 @@ static void save_error_info(struct super_block *sb, const char *func,
unsigned int line)
{
__save_error_info(sb, func, line);
- ext4_commit_super(sb, 1);
+ if (!bdev_read_only(sb->s_bdev))
+ ext4_commit_super(sb, 1);
}
/*
@@ -3609,7 +3610,8 @@ int ext4_calculate_overhead(struct super_block *sb)
*/
if (sbi->s_journal && !sbi->journal_bdev)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
- else if (ext4_has_feature_journal(sb) && !sbi->s_journal) {
+ else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
+ /* j_inum for internal journal is non-zero */
j_inode = ext4_get_journal_inode(sb, j_inum);
if (j_inode) {
j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
@@ -4157,7 +4159,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
- sbi->s_blocks_per_group);
+ sbi->s_inodes_per_group);
goto failed_mount;
}
sbi->s_itb_per_group = sbi->s_inodes_per_group /
@@ -4286,9 +4288,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
EXT4_BLOCKS_PER_GROUP(sb) - 1);
do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
- ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
+ ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
"(block count %llu, first data block %u, "
- "blocks per group %lu)", sbi->s_groups_count,
+ "blocks per group %lu)", blocks_count,
ext4_blocks_count(es),
le32_to_cpu(es->s_first_data_block),
EXT4_BLOCKS_PER_GROUP(sb));
@@ -4331,7 +4333,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Pre-read the descriptors into the buffer cache */
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logical_sb_block, i);
- sb_breadahead(sb, block);
+ sb_breadahead_unmovable(sb, block);
}
for (i = 0; i < db_count; i++) {
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 44e84ac5c941..79aaf06004f6 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1250,20 +1250,20 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
}
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
{
DEFINE_WAIT(wait);
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
- if (!get_pages(sbi, F2FS_WB_CP_DATA))
+ if (!get_pages(sbi, type))
break;
if (unlikely(f2fs_cp_error(sbi)))
break;
- io_schedule_timeout(5*HZ);
+ io_schedule_timeout(HZ/50);
}
finish_wait(&sbi->cp_wait, &wait);
}
@@ -1301,10 +1301,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
- is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+ if (is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
+ __set_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
+ else
+ __clear_ckpt_flags(ckpt, CP_RESIZEFS_FLAG);
+
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
__set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
else
@@ -1384,8 +1388,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT/SIT pages */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
- !f2fs_cp_error(sbi));
/*
* modify checkpoint
@@ -1493,11 +1495,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
- f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_META) &&
- !f2fs_cp_error(sbi));
+ /* Wait for all dirty meta pages to be submitted for IO */
+ f2fs_wait_on_all_pages(sbi, F2FS_DIRTY_META);
/* wait for previous submitted meta pages writeback */
- f2fs_wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1506,7 +1508,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
- f2fs_wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
/*
* invalidate intermediate page cache borrowed from meta inode which are
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index d8a64be90a50..837e14b7ef52 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -385,16 +385,22 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++)
cc->cbuf->reserved[i] = cpu_to_le32(0);
+ nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
+
+ /* zero out any unused part of the last page */
+ memset(&cc->cbuf->cdata[cc->clen], 0,
+ (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE));
+
vunmap(cc->cbuf);
vunmap(cc->rbuf);
- nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
-
for (i = nr_cpages; i < cc->nr_cpages; i++) {
f2fs_put_compressed_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
+ cops->destroy_compress_ctx(cc);
+
cc->nr_cpages = nr_cpages;
trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
@@ -474,6 +480,8 @@ out_vunmap_cbuf:
out_vunmap_rbuf:
vunmap(dic->rbuf);
out_free_dic:
+ if (verity)
+ refcount_add(dic->nr_cpages - 1, &dic->ref);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
@@ -532,8 +540,7 @@ static bool __cluster_may_compress(struct compress_ctx *cc)
return true;
}
-/* return # of compressed block addresses */
-static int f2fs_compressed_blocks(struct compress_ctx *cc)
+static int __f2fs_cluster_blocks(struct compress_ctx *cc, bool compr)
{
struct dnode_of_data dn;
int ret;
@@ -556,8 +563,13 @@ static int f2fs_compressed_blocks(struct compress_ctx *cc)
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node + i);
- if (blkaddr != NULL_ADDR)
- ret++;
+ if (compr) {
+ if (__is_valid_data_blkaddr(blkaddr))
+ ret++;
+ } else {
+ if (blkaddr != NULL_ADDR)
+ ret++;
+ }
}
}
fail:
@@ -565,6 +577,18 @@ fail:
return ret;
}
+/* return # of compressed blocks in compressed cluster */
+static int f2fs_compressed_blocks(struct compress_ctx *cc)
+{
+ return __f2fs_cluster_blocks(cc, true);
+}
+
+/* return # of valid blocks in compressed cluster */
+static int f2fs_cluster_blocks(struct compress_ctx *cc, bool compr)
+{
+ return __f2fs_cluster_blocks(cc, false);
+}
+
int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index)
{
struct compress_ctx cc = {
@@ -574,7 +598,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index)
.cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size,
};
- return f2fs_compressed_blocks(&cc);
+ return f2fs_cluster_blocks(&cc, false);
}
static bool cluster_may_compress(struct compress_ctx *cc)
@@ -623,7 +647,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
bool prealloc;
retry:
- ret = f2fs_compressed_blocks(cc);
+ ret = f2fs_cluster_blocks(cc, false);
if (ret <= 0)
return ret;
@@ -772,7 +796,6 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
.encrypted_page = NULL,
.compressed_page = NULL,
.submitted = false,
- .need_lock = LOCK_RETRY,
.io_type = io_type,
.io_wbc = wbc,
.encrypted = f2fs_encrypted_file(cc->inode),
@@ -785,9 +808,10 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
loff_t psize;
int i, err;
- set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
+ if (!f2fs_trylock_op(sbi))
+ return -EAGAIN;
- f2fs_lock_op(sbi);
+ set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
if (err)
@@ -845,7 +869,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
blkaddr = datablock_addr(dn.inode, dn.node_page,
dn.ofs_in_node);
- fio.page = cic->rpages[i];
+ fio.page = cc->rpages[i];
fio.old_blkaddr = blkaddr;
/* cluster header */
@@ -984,6 +1008,15 @@ retry_write:
unlock_page(cc->rpages[i]);
ret = 0;
} else if (ret == -EAGAIN) {
+ /*
+ * for quota file, just redirty left pages to
+ * avoid deadlock caused by cluster update race
+ * from foreground operation.
+ */
+ if (IS_NOQUOTA(cc->inode)) {
+ err = 0;
+ goto out_err;
+ }
ret = 0;
cond_resched();
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -992,16 +1025,12 @@ retry_write:
goto retry_write;
}
err = ret;
- goto out_fail;
+ goto out_err;
}
*submitted += _submitted;
}
return 0;
-
-out_fail:
- /* TODO: revoke partially updated block addresses */
- BUG_ON(compr_blocks);
out_err:
for (++i; i < cc->cluster_size; i++) {
if (!cc->rpages[i])
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b27b72107911..34990866cfe9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -191,12 +191,37 @@ static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
static void f2fs_verify_bio(struct bio *bio)
{
- struct page *page = bio_first_page_all(bio);
- struct decompress_io_ctx *dic =
- (struct decompress_io_ctx *)page_private(page);
+ struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
+
+ bio_for_each_segment_all(bv, bio, iter_all) {
+ struct page *page = bv->bv_page;
+ struct decompress_io_ctx *dic;
+
+ dic = (struct decompress_io_ctx *)page_private(page);
+
+ if (dic) {
+ if (refcount_dec_not_one(&dic->ref))
+ continue;
+ f2fs_verify_pages(dic->rpages,
+ dic->cluster_size);
+ f2fs_free_dic(dic);
+ continue;
+ }
+
+ if (bio->bi_status || PageError(page))
+ goto clear_uptodate;
- f2fs_verify_pages(dic->rpages, dic->cluster_size);
- f2fs_free_dic(dic);
+ if (fsverity_verify_page(page)) {
+ SetPageUptodate(page);
+ goto unlock;
+ }
+clear_uptodate:
+ ClearPageUptodate(page);
+ ClearPageError(page);
+unlock:
+ unlock_page(page);
+ }
}
#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5355be6b6755..71801a1709f0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -100,6 +100,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
+#define F2FS_MOUNT_NORECOVERY 0x04000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -675,6 +676,44 @@ enum {
MAX_GC_FAILURE
};
+/* used for f2fs_inode_info->flags */
+enum {
+ FI_NEW_INODE, /* indicate newly allocated inode */
+ FI_DIRTY_INODE, /* indicate inode is dirty or not */
+ FI_AUTO_RECOVER, /* indicate inode is recoverable */
+ FI_DIRTY_DIR, /* indicate directory has dirty pages */
+ FI_INC_LINK, /* need to increment i_nlink */
+ FI_ACL_MODE, /* indicate acl mode */
+ FI_NO_ALLOC, /* should not allocate any blocks */
+ FI_FREE_NID, /* free allocated nide */
+ FI_NO_EXTENT, /* not to use the extent cache */
+ FI_INLINE_XATTR, /* used for inline xattr */
+ FI_INLINE_DATA, /* used for inline data*/
+ FI_INLINE_DENTRY, /* used for inline dentry */
+ FI_APPEND_WRITE, /* inode has appended data */
+ FI_UPDATE_WRITE, /* inode has in-place-update data */
+ FI_NEED_IPU, /* used for ipu per file */
+ FI_ATOMIC_FILE, /* indicate atomic file */
+ FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
+ FI_VOLATILE_FILE, /* indicate volatile file */
+ FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
+ FI_DROP_CACHE, /* drop dirty page cache */
+ FI_DATA_EXIST, /* indicate data exists */
+ FI_INLINE_DOTS, /* indicate inline dot dentries */
+ FI_DO_DEFRAG, /* indicate defragment is running */
+ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
+ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
+ FI_HOT_DATA, /* indicate file is hot */
+ FI_EXTRA_ATTR, /* indicate file has extra attribute */
+ FI_PROJ_INHERIT, /* indicate file inherits projectid */
+ FI_PIN_FILE, /* indicate file should not be gced */
+ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
+ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
+ FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
+ FI_MMAP_FILE, /* indicate file was mmapped */
+ FI_MAX, /* max flag, never be used */
+};
+
struct f2fs_inode_info {
struct inode vfs_inode; /* serve a vfs inode */
unsigned long i_flags; /* keep an inode flags for ioctl */
@@ -687,7 +726,7 @@ struct f2fs_inode_info {
umode_t i_acl_mode; /* keep file acl mode temporarily */
/* Use below internally in f2fs*/
- unsigned long flags; /* use to pass per-file flags */
+ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */
struct rw_semaphore i_sem; /* protect fi info */
atomic_t dirty_pages; /* # of dirty pages */
f2fs_hash_t chash; /* hash value of given file name */
@@ -2497,43 +2536,6 @@ static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags)
return flags & F2FS_OTHER_FLMASK;
}
-/* used for f2fs_inode_info->flags */
-enum {
- FI_NEW_INODE, /* indicate newly allocated inode */
- FI_DIRTY_INODE, /* indicate inode is dirty or not */
- FI_AUTO_RECOVER, /* indicate inode is recoverable */
- FI_DIRTY_DIR, /* indicate directory has dirty pages */
- FI_INC_LINK, /* need to increment i_nlink */
- FI_ACL_MODE, /* indicate acl mode */
- FI_NO_ALLOC, /* should not allocate any blocks */
- FI_FREE_NID, /* free allocated nide */
- FI_NO_EXTENT, /* not to use the extent cache */
- FI_INLINE_XATTR, /* used for inline xattr */
- FI_INLINE_DATA, /* used for inline data*/
- FI_INLINE_DENTRY, /* used for inline dentry */
- FI_APPEND_WRITE, /* inode has appended data */
- FI_UPDATE_WRITE, /* inode has in-place-update data */
- FI_NEED_IPU, /* used for ipu per file */
- FI_ATOMIC_FILE, /* indicate atomic file */
- FI_ATOMIC_COMMIT, /* indicate the state of atomical committing */
- FI_VOLATILE_FILE, /* indicate volatile file */
- FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */
- FI_DROP_CACHE, /* drop dirty page cache */
- FI_DATA_EXIST, /* indicate data exists */
- FI_INLINE_DOTS, /* indicate inline dot dentries */
- FI_DO_DEFRAG, /* indicate defragment is running */
- FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
- FI_NO_PREALLOC, /* indicate skipped preallocated blocks */
- FI_HOT_DATA, /* indicate file is hot */
- FI_EXTRA_ATTR, /* indicate file has extra attribute */
- FI_PROJ_INHERIT, /* indicate file inherits projectid */
- FI_PIN_FILE, /* indicate file should not be gced */
- FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
- FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
- FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
- FI_MMAP_FILE, /* indicate file was mmapped */
-};
-
static inline void __mark_inode_dirty_flag(struct inode *inode,
int flag, bool set)
{
@@ -2555,20 +2557,18 @@ static inline void __mark_inode_dirty_flag(struct inode *inode,
static inline void set_inode_flag(struct inode *inode, int flag)
{
- if (!test_bit(flag, &F2FS_I(inode)->flags))
- set_bit(flag, &F2FS_I(inode)->flags);
+ test_and_set_bit(flag, F2FS_I(inode)->flags);
__mark_inode_dirty_flag(inode, flag, true);
}
static inline int is_inode_flag_set(struct inode *inode, int flag)
{
- return test_bit(flag, &F2FS_I(inode)->flags);
+ return test_bit(flag, F2FS_I(inode)->flags);
}
static inline void clear_inode_flag(struct inode *inode, int flag)
{
- if (test_bit(flag, &F2FS_I(inode)->flags))
- clear_bit(flag, &F2FS_I(inode)->flags);
+ test_and_clear_bit(flag, F2FS_I(inode)->flags);
__mark_inode_dirty_flag(inode, flag, false);
}
@@ -2659,19 +2659,19 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri)
struct f2fs_inode_info *fi = F2FS_I(inode);
if (ri->i_inline & F2FS_INLINE_XATTR)
- set_bit(FI_INLINE_XATTR, &fi->flags);
+ set_bit(FI_INLINE_XATTR, fi->flags);
if (ri->i_inline & F2FS_INLINE_DATA)
- set_bit(FI_INLINE_DATA, &fi->flags);
+ set_bit(FI_INLINE_DATA, fi->flags);
if (ri->i_inline & F2FS_INLINE_DENTRY)
- set_bit(FI_INLINE_DENTRY, &fi->flags);
+ set_bit(FI_INLINE_DENTRY, fi->flags);
if (ri->i_inline & F2FS_DATA_EXIST)
- set_bit(FI_DATA_EXIST, &fi->flags);
+ set_bit(FI_DATA_EXIST, fi->flags);
if (ri->i_inline & F2FS_INLINE_DOTS)
- set_bit(FI_INLINE_DOTS, &fi->flags);
+ set_bit(FI_INLINE_DOTS, fi->flags);
if (ri->i_inline & F2FS_EXTRA_ATTR)
- set_bit(FI_EXTRA_ATTR, &fi->flags);
+ set_bit(FI_EXTRA_ATTR, fi->flags);
if (ri->i_inline & F2FS_PIN_FILE)
- set_bit(FI_PIN_FILE, &fi->flags);
+ set_bit(FI_PIN_FILE, fi->flags);
}
static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri)
@@ -3308,7 +3308,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
void f2fs_update_dirty_page(struct inode *inode, struct page *page);
void f2fs_remove_dirty_inode(struct inode *inode);
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
-void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
+void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
int __init f2fs_create_checkpoint_caches(void);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0d4da644df3b..a41c633ac6cf 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1787,12 +1787,15 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
+ u32 masked_flags = fi->i_flags & mask;
+
+ f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask));
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
return -EPERM;
- if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) {
+ if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
if (!f2fs_empty_dir(inode))
@@ -1806,9 +1809,9 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
return -EINVAL;
}
- if ((iflags ^ fi->i_flags) & F2FS_COMPR_FL) {
+ if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
if (S_ISREG(inode->i_mode) &&
- (fi->i_flags & F2FS_COMPR_FL || i_size_read(inode) ||
+ (masked_flags & F2FS_COMPR_FL || i_size_read(inode) ||
F2FS_HAS_BLOCKS(inode)))
return -EINVAL;
if (iflags & F2FS_NOCOMP_FL)
@@ -1825,8 +1828,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
set_compress_context(inode);
}
}
- if ((iflags ^ fi->i_flags) & F2FS_NOCOMP_FL) {
- if (fi->i_flags & F2FS_COMPR_FL)
+ if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) {
+ if (masked_flags & F2FS_COMPR_FL)
return -EINVAL;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index db8725d473b5..3cced15efebc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1018,8 +1018,8 @@ next_step:
* race condition along with SSR block allocation.
*/
if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
- get_valid_blocks(sbi, segno, false) ==
- sbi->blocks_per_seg)
+ get_valid_blocks(sbi, segno, true) ==
+ BLKS_PER_SEC(sbi))
return submitted;
if (check_valid_map(sbi, segno, off) == 0)
@@ -1434,12 +1434,19 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
{
struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi);
- int section_count = le32_to_cpu(raw_sb->section_count);
- int segment_count = le32_to_cpu(raw_sb->segment_count);
- int segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
- long long block_count = le64_to_cpu(raw_sb->block_count);
+ int section_count;
+ int segment_count;
+ int segment_count_main;
+ long long block_count;
int segs = secs * sbi->segs_per_sec;
+ down_write(&sbi->sb_lock);
+
+ section_count = le32_to_cpu(raw_sb->section_count);
+ segment_count = le32_to_cpu(raw_sb->segment_count);
+ segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
+ block_count = le64_to_cpu(raw_sb->block_count);
+
raw_sb->section_count = cpu_to_le32(section_count + secs);
raw_sb->segment_count = cpu_to_le32(segment_count + segs);
raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
@@ -1453,6 +1460,8 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
raw_sb->devs[last_dev].total_segments =
cpu_to_le32(dev_segs + segs);
}
+
+ up_write(&sbi->sb_lock);
}
static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
@@ -1570,11 +1579,17 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
goto out;
}
+ mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, -secs);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+ mutex_unlock(&sbi->cp_mutex);
+
err = f2fs_sync_fs(sbi->sb, 1);
if (err) {
+ mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, secs);
+ mutex_unlock(&sbi->cp_mutex);
update_sb_metadata(sbi, secs);
f2fs_commit_super(sbi, false);
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 78c3f1d70f1d..901e9f4ce12b 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -345,7 +345,7 @@ static int do_read_inode(struct inode *inode)
fi->i_flags = le32_to_cpu(ri->i_flags);
if (S_ISREG(inode->i_mode))
fi->i_flags &= ~F2FS_PROJINHERIT_FL;
- fi->flags = 0;
+ bitmap_zero(fi->flags, FI_MAX);
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9d02cdcdbb07..e58c4c628834 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1562,15 +1562,16 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
- set_page_writeback(page);
- ClearPageError(page);
-
+ /* should add to global list before clearing PAGECACHE status */
if (f2fs_in_warm_node_list(sbi, page)) {
seq = f2fs_add_fsync_node_entry(sbi, page);
if (seq_id)
*seq_id = seq;
}
+ set_page_writeback(page);
+ ClearPageError(page);
+
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 65a7a432dfee..8deb0a260d92 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -446,7 +446,7 @@ static int parse_options(struct super_block *sb, char *options)
break;
case Opt_norecovery:
/* this option mounts f2fs with ro */
- set_opt(sbi, DISABLE_ROLL_FORWARD);
+ set_opt(sbi, NORECOVERY);
if (!f2fs_readonly(sb))
return -EINVAL;
break;
@@ -1172,7 +1172,7 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
- f2fs_wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages(sbi, F2FS_WB_CP_DATA);
f2fs_bug_on(sbi, sbi->fsync_node_num);
@@ -1446,6 +1446,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
}
if (test_opt(sbi, DISABLE_ROLL_FORWARD))
seq_puts(seq, ",disable_roll_forward");
+ if (test_opt(sbi, NORECOVERY))
+ seq_puts(seq, ",norecovery");
if (test_opt(sbi, DISCARD))
seq_puts(seq, ",discard");
else
@@ -1927,6 +1929,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
int offset = off & (sb->s_blocksize - 1);
size_t towrite = len;
struct page *page;
+ void *fsdata = NULL;
char *kaddr;
int err = 0;
int tocopy;
@@ -1936,7 +1939,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
towrite);
retry:
err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
- &page, NULL);
+ &page, &fsdata);
if (unlikely(err)) {
if (err == -ENOMEM) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
@@ -1952,7 +1955,7 @@ retry:
flush_dcache_page(page);
a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
- page, NULL);
+ page, fsdata);
offset = 0;
towrite -= tocopy;
off += tocopy;
@@ -3598,7 +3601,8 @@ try_onemore:
goto reset_checkpoint;
/* recover fsynced data */
- if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
+ if (!test_opt(sbi, DISABLE_ROLL_FORWARD) &&
+ !test_opt(sbi, NORECOVERY)) {
/*
* mount should be failed, when device has readonly mode, and
* previous checkpoint was not done by clean system shutdown.
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 77bf5f95362d..90b8d879fbaf 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -272,7 +272,9 @@ struct file_system_type *get_fs_type(const char *name)
fs = __get_fs_type(name, len);
if (!fs && (request_module("fs-%.*s", len, name) == 0)) {
fs = __get_fs_type(name, len);
- WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name);
+ if (!fs)
+ pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n",
+ len, name);
}
if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) {
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d0eceaff3cea..19ebc6cd0f2b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -645,6 +645,9 @@ __acquires(&gl->gl_lockref.lock)
goto out_unlock;
if (nonblock)
goto out_sched;
+ smp_mb();
+ if (atomic_read(&gl->gl_revokes) != 0)
+ goto out_sched;
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
gl->gl_target = gl->gl_demote_state;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 061d22e1ceb6..efc899a3876b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -89,8 +89,32 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
INIT_LIST_HEAD(&tr.tr_databuf);
tr.tr_revokes = atomic_read(&gl->gl_ail_count);
- if (!tr.tr_revokes)
+ if (!tr.tr_revokes) {
+ bool have_revokes;
+ bool log_in_flight;
+
+ /*
+ * We have nothing on the ail, but there could be revokes on
+ * the sdp revoke queue, in which case, we still want to flush
+ * the log and wait for it to finish.
+ *
+ * If the sdp revoke list is empty too, we might still have an
+ * io outstanding for writing revokes, so we should wait for
+ * it before returning.
+ *
+ * If none of these conditions are true, our revokes are all
+ * flushed and we can return.
+ */
+ gfs2_log_lock(sdp);
+ have_revokes = !list_empty(&sdp->sd_log_revokes);
+ log_in_flight = atomic_read(&sdp->sd_log_in_flight);
+ gfs2_log_unlock(sdp);
+ if (have_revokes)
+ goto flush;
+ if (log_in_flight)
+ log_flush_wait(sdp);
return;
+ }
/* A shortened, inline version of gfs2_trans_begin()
* tr->alloced is not set since the transaction structure is
@@ -105,6 +129,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
__gfs2_ail_flush(gl, 0, tr.tr_revokes);
gfs2_trans_end(sdp);
+flush:
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_AIL_EMPTY_GL);
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 00a2e721a374..60d911e293e6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -104,16 +104,22 @@ __acquires(&sdp->sd_ail_lock)
gfs2_assert(sdp, bd->bd_tr == tr);
if (!buffer_busy(bh)) {
- if (!buffer_uptodate(bh) &&
- !test_and_set_bit(SDF_AIL1_IO_ERROR,
+ if (buffer_uptodate(bh)) {
+ list_move(&bd->bd_ail_st_list,
+ &tr->tr_ail2_list);
+ continue;
+ }
+ if (!test_and_set_bit(SDF_AIL1_IO_ERROR,
&sdp->sd_flags)) {
gfs2_io_error_bh(sdp, bh);
*withdraw = true;
}
- list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
- continue;
}
+ if (gfs2_withdrawn(sdp)) {
+ gfs2_remove_from_ail(bd);
+ continue;
+ }
if (!buffer_dirty(bh))
continue;
if (gl == bd->bd_gl)
@@ -512,7 +518,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
}
-static void log_flush_wait(struct gfs2_sbd *sdp)
+void log_flush_wait(struct gfs2_sbd *sdp)
{
DEFINE_WAIT(wait);
@@ -862,6 +868,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
if (gfs2_ail1_empty(sdp))
break;
}
+ if (gfs2_withdrawn(sdp))
+ goto out;
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, flags);
@@ -874,6 +882,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
}
+out:
trace_gfs2_log_flush(sdp, 0, flags);
up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index c0a65e5a126b..c1cd6ae17659 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -73,6 +73,7 @@ extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
u32 type);
extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
+extern void log_flush_wait(struct gfs2_sbd *sdp);
extern int gfs2_logd(void *data);
extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c
index e6d554476db4..eeebe80c6be4 100644
--- a/fs/hfsplus/attributes.c
+++ b/fs/hfsplus/attributes.c
@@ -292,6 +292,10 @@ static int __hfsplus_delete_attr(struct inode *inode, u32 cnid,
return -ENOENT;
}
+ /* Avoid btree corruption */
+ hfs_bnode_read(fd->bnode, fd->search_key,
+ fd->keyoffset, fd->keylength);
+
err = hfs_brec_remove(fd);
if (err)
return err;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 3affd96a98ba..a46de2cfc28e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -565,6 +565,7 @@ struct io_kiocb {
struct list_head link_list;
unsigned int flags;
refcount_t refs;
+ unsigned long fsize;
u64 user_data;
u32 result;
u32 sequence;
@@ -1242,7 +1243,6 @@ fallback:
req = io_get_fallback_req(ctx);
if (req)
goto got_it;
- percpu_ref_put(&ctx->refs);
return NULL;
}
@@ -2295,6 +2295,8 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
+ req->fsize = rlimit(RLIMIT_FSIZE);
+
/* either don't need iovec imported or already have it */
if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
return 0;
@@ -2367,10 +2369,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
}
kiocb->ki_flags |= IOCB_WRITE;
+ if (!force_nonblock)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+
if (req->file->f_op->write_iter)
ret2 = call_write_iter(req->file, kiocb, &iter);
else
ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
+
+ if (!force_nonblock)
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+
/*
* Raw bdev writes will -EOPNOTSUPP for IOCB_NOWAIT. Just
* retry them without IOCB_NOWAIT.
@@ -2513,8 +2522,10 @@ static void io_fallocate_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req))
return;
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
req->sync.len);
+ current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
if (ret < 0)
req_set_fail_links(req);
io_cqring_add_event(req, ret);
@@ -2532,6 +2543,7 @@ static int io_fallocate_prep(struct io_kiocb *req,
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->addr);
req->sync.mode = READ_ONCE(sqe->len);
+ req->fsize = rlimit(RLIMIT_FSIZE);
return 0;
}
@@ -2571,6 +2583,8 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
req->open.how.mode = READ_ONCE(sqe->len);
fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
req->open.how.flags = READ_ONCE(sqe->open_flags);
+ if (force_o_largefile())
+ req->open.how.flags |= O_LARGEFILE;
req->open.filename = getname(fname);
if (IS_ERR(req->open.filename)) {
@@ -5424,13 +5438,6 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
struct sk_buff *skb;
int i, nr_files;
- if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
- unsigned long inflight = ctx->user->unix_inflight + nr;
-
- if (inflight > task_rlimit(current, RLIMIT_NOFILE))
- return -EMFILE;
- }
-
fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
@@ -5607,7 +5614,7 @@ static void io_ring_file_put(struct io_ring_ctx *ctx, struct file *file)
struct io_file_put {
struct llist_node llist;
struct file *file;
- struct completion *done;
+ bool free_pfile;
};
static void io_ring_file_ref_flush(struct fixed_file_data *data)
@@ -5618,9 +5625,7 @@ static void io_ring_file_ref_flush(struct fixed_file_data *data)
while ((node = llist_del_all(&data->put_llist)) != NULL) {
llist_for_each_entry_safe(pfile, tmp, node, llist) {
io_ring_file_put(data->ctx, pfile->file);
- if (pfile->done)
- complete(pfile->done);
- else
+ if (pfile->free_pfile)
kfree(pfile);
}
}
@@ -5820,7 +5825,6 @@ static bool io_queue_file_removal(struct fixed_file_data *data,
struct file *file)
{
struct io_file_put *pfile, pfile_stack;
- DECLARE_COMPLETION_ONSTACK(done);
/*
* If we fail allocating the struct we need for doing async reomval
@@ -5829,15 +5833,15 @@ static bool io_queue_file_removal(struct fixed_file_data *data,
pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
if (!pfile) {
pfile = &pfile_stack;
- pfile->done = &done;
- }
+ pfile->free_pfile = false;
+ } else
+ pfile->free_pfile = true;
pfile->file = file;
llist_add(&pfile->llist, &data->put_llist);
if (pfile == &pfile_stack) {
percpu_ref_switch_to_atomic(&data->refs, io_atomic_switch);
- wait_for_completion(&done);
flush_work(&data->ref_work);
return false;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 27373f5792a4..e855d8260433 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -997,9 +997,10 @@ restart_loop:
* journalled data) we need to unmap buffer and clear
* more bits. We also need to be careful about the check
* because the data page mapping can get cleared under
- * out hands, which alse need not to clear more bits
- * because the page and buffers will be freed and can
- * never be reused once we are done with them.
+ * our hands. Note that if mapping == NULL, we don't
+ * need to make buffer unmapped because the page is
+ * already detached from the mapping and buffers cannot
+ * get reused.
*/
mapping = READ_ONCE(bh->b_page->mapping);
if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
diff --git a/fs/libfs.c b/fs/libfs.c
index c686bd9caac6..3759fbacf522 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -891,7 +891,7 @@ int simple_attr_open(struct inode *inode, struct file *file,
{
struct simple_attr *attr;
- attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
@@ -931,9 +931,11 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
if (ret)
return ret;
- if (*ppos) { /* continued read */
+ if (*ppos && attr->get_buf[0]) {
+ /* continued read */
size = strlen(attr->get_buf);
- } else { /* first read */
+ } else {
+ /* first read */
u64 val;
ret = attr->get(attr->data, &val);
if (ret)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index cd4c6bc81cae..40d31024b72d 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -128,6 +128,8 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!pnfs_layout_is_valid(lo))
+ continue;
if (stateid != NULL &&
!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
continue;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b768a0b42e82..ade2435551c8 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -571,6 +571,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
result = PTR_ERR(l_ctx);
+ nfs_direct_req_release(dreq);
goto out_release;
}
dreq->l_ctx = l_ctx;
@@ -990,6 +991,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
result = PTR_ERR(l_ctx);
+ nfs_direct_req_release(dreq);
goto out_release;
}
dreq->l_ctx = l_ctx;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index e113fcb4bb4c..1c8d8bedf34e 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -190,6 +190,7 @@ static const struct constant_table nfs_vers_tokens[] = {
{ "4.0", Opt_vers_4_0 },
{ "4.1", Opt_vers_4_1 },
{ "4.2", Opt_vers_4_2 },
+ {}
};
enum {
@@ -202,13 +203,14 @@ enum {
nr__Opt_xprt
};
-static const struct constant_table nfs_xprt_protocol_tokens[nr__Opt_xprt] = {
+static const struct constant_table nfs_xprt_protocol_tokens[] = {
{ "rdma", Opt_xprt_rdma },
{ "rdma6", Opt_xprt_rdma6 },
{ "tcp", Opt_xprt_tcp },
{ "tcp6", Opt_xprt_tcp6 },
{ "udp", Opt_xprt_udp },
{ "udp6", Opt_xprt_udp6 },
+ {}
};
enum {
@@ -239,6 +241,7 @@ static const struct constant_table nfs_secflavor_tokens[] = {
{ "spkm3i", Opt_sec_spkmi },
{ "spkm3p", Opt_sec_spkmp },
{ "sys", Opt_sec_sys },
+ {}
};
/*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 11bf15800ac9..a10fb87c6ac3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -959,16 +959,16 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
struct file *filp)
{
struct nfs_open_context *ctx;
- const struct cred *cred = get_current_cred();
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx) {
- put_cred(cred);
+ if (!ctx)
return ERR_PTR(-ENOMEM);
- }
nfs_sb_active(dentry->d_sb);
ctx->dentry = dget(dentry);
- ctx->cred = cred;
+ if (filp)
+ ctx->cred = get_cred(filp->f_cred);
+ else
+ ctx->cred = get_current_cred();
ctx->ll_cred = NULL;
ctx->state = NULL;
ctx->mode = f_mode;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f3ece8ed3203..4c943b890995 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -145,6 +145,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
struct vfsmount *mnt = ERR_PTR(-ENOMEM);
struct nfs_server *server = NFS_SERVER(d_inode(path->dentry));
struct nfs_client *client = server->nfs_client;
+ int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout);
int ret;
if (IS_ROOT(path->dentry))
@@ -190,12 +191,12 @@ struct vfsmount *nfs_d_automount(struct path *path)
if (IS_ERR(mnt))
goto out_fc;
- if (nfs_mountpoint_expiry_timeout < 0)
+ mntget(mnt); /* prevent immediate expiration */
+ if (timeout <= 0)
goto out_fc;
- mntget(mnt); /* prevent immediate expiration */
mnt_set_expiry(mnt, &nfs_automount_list);
- schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+ schedule_delayed_work(&nfs_automount_task, timeout);
out_fc:
put_fs_context(fc);
@@ -233,10 +234,11 @@ const struct inode_operations nfs_referral_inode_operations = {
static void nfs_expire_automounts(struct work_struct *work)
{
struct list_head *list = &nfs_automount_list;
+ int timeout = READ_ONCE(nfs_mountpoint_expiry_timeout);
mark_mounts_for_expiry(list);
- if (!list_empty(list))
- schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
+ if (!list_empty(list) && timeout > 0)
+ schedule_delayed_work(&nfs_automount_task, timeout);
}
void nfs_release_automount_timer(void)
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 1297919e0fce..8e5d6223ddd3 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -252,6 +252,9 @@ static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
if (remap_flags & ~REMAP_FILE_ADVISORY)
return -EINVAL;
+ if (IS_SWAPFILE(dst_inode) || IS_SWAPFILE(src_inode))
+ return -ETXTBSY;
+
/* check alignment w.r.t. clone_blksize */
ret = -EINVAL;
if (bs) {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 20b3717cd7ca..b736912098ee 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -886,15 +886,6 @@ static void nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
pgio->pg_mirror_count = mirror_count;
}
-/*
- * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1)
- */
-void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio)
-{
- pgio->pg_mirror_count = 1;
- pgio->pg_mirror_idx = 0;
-}
-
static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_mirror_count = 1;
@@ -1177,38 +1168,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_error < 0)
goto out_failed;
- for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- if (midx) {
- nfs_page_group_lock(req);
+ /* Create the mirror instances first, and fire them off */
+ for (midx = 1; midx < desc->pg_mirror_count; midx++) {
+ nfs_page_group_lock(req);
- /* find the last request */
- for (lastreq = req->wb_head;
- lastreq->wb_this_page != req->wb_head;
- lastreq = lastreq->wb_this_page)
- ;
+ /* find the last request */
+ for (lastreq = req->wb_head;
+ lastreq->wb_this_page != req->wb_head;
+ lastreq = lastreq->wb_this_page)
+ ;
- dupreq = nfs_create_subreq(req, lastreq,
- pgbase, offset, bytes);
+ dupreq = nfs_create_subreq(req, lastreq,
+ pgbase, offset, bytes);
- nfs_page_group_unlock(req);
- if (IS_ERR(dupreq)) {
- desc->pg_error = PTR_ERR(dupreq);
- goto out_failed;
- }
- } else
- dupreq = req;
+ nfs_page_group_unlock(req);
+ if (IS_ERR(dupreq)) {
+ desc->pg_error = PTR_ERR(dupreq);
+ goto out_failed;
+ }
- if (nfs_pgio_has_mirroring(desc))
- desc->pg_mirror_idx = midx;
+ desc->pg_mirror_idx = midx;
if (!nfs_pageio_add_request_mirror(desc, dupreq))
goto out_cleanup_subreq;
}
+ desc->pg_mirror_idx = 0;
+ if (!nfs_pageio_add_request_mirror(desc, req))
+ goto out_failed;
+
return 1;
out_cleanup_subreq:
- if (req != dupreq)
- nfs_pageio_cleanup_request(desc, dupreq);
+ nfs_pageio_cleanup_request(desc, dupreq);
out_failed:
nfs_pageio_error_cleanup(desc);
return 0;
@@ -1320,6 +1311,14 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
}
}
+/*
+ * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1)
+ */
+void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio)
+{
+ nfs_pageio_complete(pgio);
+}
+
int __init nfs_init_nfspagecache(void)
{
nfs_page_cachep = kmem_cache_create("nfs_page",
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c478b772cc49..38abd130528a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -444,6 +444,7 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
}
subreq->wb_head = subreq;
+ nfs_release_request(old_head);
if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
nfs_release_request(subreq);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c3b11a715082..5cf91322de0f 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1312,6 +1312,7 @@ nfsd4_run_cb_work(struct work_struct *work)
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
+ int flags;
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
@@ -1340,7 +1341,8 @@ nfsd4_run_cb_work(struct work_struct *work)
}
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
- rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+ flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
+ rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 65cfe9ab47be..de9fbe7ed06c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -267,6 +267,8 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
if (!nbl) {
nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
if (nbl) {
+ INIT_LIST_HEAD(&nbl->nbl_list);
+ INIT_LIST_HEAD(&nbl->nbl_lru);
fh_copy_shallow(&nbl->nbl_fh, fh);
locks_init_lock(&nbl->nbl_lock);
nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index e109a1007704..3bb2db947d29 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1333,6 +1333,7 @@ void nfsd_client_rmdir(struct dentry *dentry)
dget(dentry);
ret = simple_rmdir(dir, dentry);
WARN_ON_ONCE(ret);
+ fsnotify_rmdir(dir, dentry);
d_delete(dentry);
inode_unlock(dir);
}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 88534eb0e7c2..3d5b6b989db2 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7403,6 +7403,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_inline_data *idata = &di->id2.i_data;
+ /* No need to punch hole beyond i_size. */
+ if (start >= i_size_read(inode))
+ return 0;
+
if (end > i_size_read(inode))
end = i_size_read(inode);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 79e8994e3bc1..3f993c114829 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -891,7 +891,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
oip->index);
- int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
+ int fsid = bylower ? lowerpath->layer->fsid : 0;
bool is_dir, metacopy = false;
unsigned long ino = 0;
int err = oip->newinode ? -EEXIST : -ENOMEM;
@@ -941,6 +941,8 @@ struct inode *ovl_get_inode(struct super_block *sb,
err = -ENOMEM;
goto out_err;
}
+ ino = realinode->i_ino;
+ fsid = lowerpath->layer->fsid;
}
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
diff --git a/fs/pnode.c b/fs/pnode.c
index 49f6d7ff2139..1106137c747a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -261,14 +261,13 @@ static int propagate_one(struct mount *m)
child = copy_tree(last_source, last_source->mnt.mnt_root, type);
if (IS_ERR(child))
return PTR_ERR(child);
+ read_seqlock_excl(&mount_lock);
mnt_set_mountpoint(m, mp, child);
+ if (m->mnt_master != dest_master)
+ SET_MNT_MARK(m->mnt_master);
+ read_sequnlock_excl(&mount_lock);
last_dest = m;
last_source = child;
- if (m->mnt_master != dest_master) {
- read_seqlock_excl(&mount_lock);
- SET_MNT_MARK(m->mnt_master);
- read_sequnlock_excl(&mount_lock);
- }
hlist_add_head(&child->mnt_hash, list);
return count_mounts(m->mnt_ns, child);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 76d99aea5ae2..bdeec6ead1c3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1573,6 +1573,7 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
noffsets = 0;
for (pos = kbuf; pos; pos = next_line) {
struct proc_timens_offset *off = &offsets[noffsets];
+ char clock[10];
int err;
/* Find the end of line and ensure we don't look past it */
@@ -1584,10 +1585,21 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
next_line = NULL;
}
- err = sscanf(pos, "%u %lld %lu", &off->clockid,
+ err = sscanf(pos, "%9s %lld %lu", clock,
&off->val.tv_sec, &off->val.tv_nsec);
if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
goto out;
+
+ clock[sizeof(clock) - 1] = 0;
+ if (strcmp(clock, "monotonic") == 0 ||
+ strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
+ off->clockid = CLOCK_MONOTONIC;
+ else if (strcmp(clock, "boottime") == 0 ||
+ strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
+ off->clockid = CLOCK_BOOTTIME;
+ else
+ goto out;
+
noffsets++;
if (noffsets == ARRAY_SIZE(offsets)) {
if (next_line)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7dc800cce354..c663202da8de 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -266,7 +266,8 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
if (start < offset + dump->size) {
tsz = min(offset + (u64)dump->size - start, (u64)size);
buf = dump->buf + start - offset;
- if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) {
+ if (remap_vmalloc_range_partial(vma, dst, buf, 0,
+ tsz)) {
ret = -EFAULT;
goto out_unlock;
}
@@ -624,7 +625,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz;
if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
- kaddr, tsz))
+ kaddr, 0, tsz))
goto fail;
size -= tsz;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 7fbe8f058220..d99b5d39aa90 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -87,11 +87,11 @@ static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos)
struct pstore_private *ps = s->private;
struct pstore_ftrace_seq_data *data = v;
+ (*pos)++;
data->off += REC_SIZE;
if (data->off + REC_SIZE > ps->total_size)
return NULL;
- (*pos)++;
return data;
}
@@ -101,6 +101,9 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
struct pstore_ftrace_seq_data *data = v;
struct pstore_ftrace_record *rec;
+ if (!data)
+ return 0;
+
rec = (struct pstore_ftrace_record *)(ps->record->buf + data->off);
seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %ps <- %pS\n",
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index d896457e7c11..408277ee3cdb 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -823,9 +823,9 @@ static int __init pstore_init(void)
ret = pstore_init_fs();
if (ret)
- return ret;
+ free_buf_for_compression();
- return 0;
+ return ret;
}
late_initcall(pstore_init);
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index edf43ddd7dce..7dd740e3692d 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -688,14 +688,14 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
ino_key_init(c, &key1, inum);
err = ubifs_tnc_lookup(c, &key1, ino);
- if (err)
+ if (err && err != -ENOENT)
goto out_free;
/*
* Check whether an inode can really get deleted.
* linkat() with O_TMPFILE allows rebirth of an inode.
*/
- if (ino->nlink == 0) {
+ if (err == 0 && ino->nlink == 0) {
dbg_rcvry("deleting orphaned inode %lu",
(unsigned long)inum);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index d8053bc96c4d..5a130409f173 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1515,7 +1515,7 @@ xfs_alloc_ag_vextent_lastblock(
* maxlen, go to the start of this block, and skip all those smaller
* than minlen.
*/
- if (len || args->alignment > 1) {
+ if (*len || args->alignment > 1) {
acur->cnt->bc_ptrs[0] = 1;
do {
error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index bbfa6ba84dcd..fe8f60b59ec4 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -145,8 +145,8 @@ xfs_attr3_node_inactive(
* Since this code is recursive (gasp!) we must protect ourselves.
*/
if (level > XFS_DA_NODE_MAXDEPTH) {
- xfs_trans_brelse(*trans, bp); /* no locks for later trans */
xfs_buf_corruption_error(bp);
+ xfs_trans_brelse(*trans, bp); /* no locks for later trans */
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 0d3b640cf1cc..871ec22c9aee 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -147,7 +147,7 @@ xfs_dir2_block_getdents(
xfs_off_t cook;
struct xfs_da_geometry *geo = args->geo;
int lock_mode;
- unsigned int offset;
+ unsigned int offset, next_offset;
unsigned int end;
/*
@@ -173,9 +173,10 @@ xfs_dir2_block_getdents(
* Loop over the data portion of the block.
* Each object is a real entry (dep) or an unused one (dup).
*/
- offset = geo->data_entry_offset;
end = xfs_dir3_data_end_offset(geo, bp->b_addr);
- while (offset < end) {
+ for (offset = geo->data_entry_offset;
+ offset < end;
+ offset = next_offset) {
struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
uint8_t filetype;
@@ -184,14 +185,15 @@ xfs_dir2_block_getdents(
* Unused, skip it.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- offset += be16_to_cpu(dup->length);
+ next_offset = offset + be16_to_cpu(dup->length);
continue;
}
/*
* Bump pointer for the next iteration.
*/
- offset += xfs_dir2_data_entsize(dp->i_mount, dep->namelen);
+ next_offset = offset +
+ xfs_dir2_data_entsize(dp->i_mount, dep->namelen);
/*
* The entry is before the desired starting point, skip it.
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 8dc2e5414276..00932d2b503b 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -907,7 +907,12 @@ xfs_eofblocks_worker(
{
struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_eofblocks_work);
+
+ if (!sb_start_write_trylock(mp->m_super))
+ return;
xfs_icache_free_eofblocks(mp, NULL);
+ sb_end_write(mp->m_super);
+
xfs_queue_eofblocks(mp);
}
@@ -934,7 +939,12 @@ xfs_cowblocks_worker(
{
struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_cowblocks_work);
+
+ if (!sb_start_write_trylock(mp->m_super))
+ return;
xfs_icache_free_cowblocks(mp, NULL);
+ sb_end_write(mp->m_super);
+
xfs_queue_cowblocks(mp);
}
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d42de92cb283..4a99e0b0f333 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -2264,7 +2264,10 @@ xfs_file_ioctl(
if (error)
return error;
- return xfs_icache_free_eofblocks(mp, &keofb);
+ sb_start_write(mp->m_super);
+ error = xfs_icache_free_eofblocks(mp, &keofb);
+ sb_end_write(mp->m_super);
+ return error;
}
default:
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f6006d94a581..796ff37d5bb5 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -605,18 +605,23 @@ xfs_log_release_iclog(
struct xlog *log = mp->m_log;
bool sync;
- if (iclog->ic_state == XLOG_STATE_IOERROR) {
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
- return -EIO;
- }
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
+ goto error;
if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) {
+ if (iclog->ic_state == XLOG_STATE_IOERROR) {
+ spin_unlock(&log->l_icloglock);
+ goto error;
+ }
sync = __xlog_state_release_iclog(log, iclog);
spin_unlock(&log->l_icloglock);
if (sync)
xlog_sync(log, iclog);
}
return 0;
+error:
+ xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+ return -EIO;
}
/*
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index b0ce04ffd3cd..107bf2a2f344 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1051,6 +1051,7 @@ xfs_reflink_remap_extent(
uirec.br_startblock = irec->br_startblock + rlen;
uirec.br_startoff = irec->br_startoff + rlen;
uirec.br_blockcount = unmap_len - rlen;
+ uirec.br_state = irec->br_state;
unmap_len = rlen;
/* If this isn't a real mapping, we're done. */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 2094386af8ac..68fea439d974 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1861,7 +1861,8 @@ xfs_init_zones(void)
xfs_ili_zone = kmem_cache_create("xfs_ili",
sizeof(struct xfs_inode_log_item), 0,
- SLAB_MEM_SPREAD, NULL);
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
if (!xfs_ili_zone)
goto out_destroy_inode_zone;
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 00cc5b8734be..3bc570c90ad9 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -529,8 +529,9 @@ xfsaild(
{
struct xfs_ail *ailp = data;
long tout = 0; /* milliseconds */
+ unsigned int noreclaim_flag;
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
set_freezable();
while (1) {
@@ -601,6 +602,7 @@ xfsaild(
tout = xfsaild_push(ailp);
}
+ memalloc_noreclaim_restore(noreclaim_flag);
return 0;
}