aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/dynroot.c13
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/aio.c4
-rw-r--r--fs/attr.c20
-rw-r--r--fs/autofs/waitq.c3
-rw-r--r--fs/binfmt_aout.c2
-rw-r--r--fs/binfmt_elf_fdpic.c12
-rw-r--r--fs/binfmt_flat.c3
-rw-r--r--fs/binfmt_misc.c8
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/ctree.c34
-rw-r--r--fs/btrfs/delayed-inode.c19
-rw-r--r--fs/btrfs/disk-io.c9
-rw-r--r--fs/btrfs/extent-tree.c11
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/free-space-cache.c7
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ioctl.c37
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/rcu-string.h6
-rw-r--r--fs/btrfs/relocation.c14
-rw-r--r--fs/btrfs/send.c19
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c11
-rw-r--r--fs/ceph/caps.c15
-rw-r--r--fs/ceph/inode.c4
-rw-r--r--fs/ceph/snap.c17
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifsfs.c8
-rw-r--r--fs/cifs/cifsfs.h5
-rw-r--r--fs/cifs/cifssmb.c9
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/link.c1
-rw-r--r--fs/cifs/misc.c4
-rw-r--r--fs/cifs/smb2misc.c26
-rw-r--r--fs/cifs/smb2ops.c5
-rw-r--r--fs/cifs/smb2pdu.c15
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/cifs/smbdirect.c133
-rw-r--r--fs/cifs/smbdirect.h5
-rw-r--r--fs/debugfs/file.c36
-rw-r--r--fs/debugfs/inode.c6
-rw-r--r--fs/dlm/lock.c53
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/dlm/plock.c185
-rw-r--r--fs/dlm/recover.c39
-rw-r--r--fs/eventfd.c7
-rw-r--r--fs/ext2/ext2.h13
-rw-r--r--fs/ext2/super.c30
-rw-r--r--fs/ext2/xattr.c4
-rw-r--r--fs/ext4/acl.h5
-rw-r--r--fs/ext4/balloc.c25
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/fsmap.c10
-rw-r--r--fs/ext4/indirect.c17
-rw-r--r--fs/ext4/inline.c26
-rw-r--r--fs/ext4/inode.c78
-rw-r--r--fs/ext4/ioctl.c16
-rw-r--r--fs/ext4/mballoc.c217
-rw-r--r--fs/ext4/namei.c42
-rw-r--r--fs/ext4/page-io.c11
-rw-r--r--fs/ext4/resize.c25
-rw-r--r--fs/ext4/super.c94
-rw-r--r--fs/ext4/sysfs.c7
-rw-r--r--fs/ext4/xattr.c278
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/f2fs/checkpoint.c12
-rw-r--r--fs/f2fs/data.c7
-rw-r--r--fs/f2fs/extent_cache.c3
-rw-r--r--fs/f2fs/file.c1
-rw-r--r--fs/f2fs/gc.c3
-rw-r--r--fs/f2fs/inline.c13
-rw-r--r--fs/f2fs/namei.c2
-rw-r--r--fs/f2fs/node.c4
-rw-r--r--fs/f2fs/segment.c2
-rw-r--r--fs/f2fs/xattr.c6
-rw-r--r--fs/fat/dir.c2
-rw-r--r--fs/file.c1
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/gfs2/aops.c7
-rw-r--r--fs/gfs2/bmap.c5
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/quota.c11
-rw-r--r--fs/gfs2/super.c34
-rw-r--r--fs/hfs/bnode.c1
-rw-r--r--fs/hfs/inode.c13
-rw-r--r--fs/hfs/trans.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h2
-rw-r--r--fs/hfsplus/inode.c36
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hfsplus/unicode.c2
-rw-r--r--fs/iomap.c3
-rw-r--r--fs/isofs/namei.c4
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/jbd2/recovery.c8
-rw-r--r--fs/jffs2/build.c5
-rw-r--r--fs/jffs2/erase.c2
-rw-r--r--fs/jffs2/file.c15
-rw-r--r--fs/jffs2/xattr.c13
-rw-r--r--fs/jffs2/xattr.h4
-rw-r--r--fs/jfs/jfs_dmap.c59
-rw-r--r--fs/jfs/jfs_extent.c5
-rw-r--r--fs/jfs/jfs_filsys.h2
-rw-r--r--fs/jfs/jfs_imap.c6
-rw-r--r--fs/jfs/jfs_txnmgr.c5
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/kernfs/mount.c2
-rw-r--r--fs/libfs.c22
-rw-r--r--fs/lockd/mon.c3
-rw-r--r--fs/mbcache.c121
-rw-r--r--fs/namei.c22
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/dev.c4
-rw-r--r--fs/nfs/filelayout/filelayout.c8
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c1
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c52
-rw-r--r--fs/nfs/nfs4state.c14
-rw-r--r--fs/nfs/nfs4xdr.c10
-rw-r--r--fs/nfs/pnfs_dev.c2
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/blocklayoutxdr.c9
-rw-r--r--fs/nfsd/flexfilelayoutxdr.c9
-rw-r--r--fs/nfsd/nfs3xdr.c4
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/nfsd/nfs4layouts.c4
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4state.c23
-rw-r--r--fs/nfsd/nfs4xdr.c49
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/vfs.c12
-rw-r--r--fs/nilfs2/alloc.c3
-rw-r--r--fs/nilfs2/bmap.c16
-rw-r--r--fs/nilfs2/btnode.c12
-rw-r--r--fs/nilfs2/btree.c15
-rw-r--r--fs/nilfs2/gcinode.c6
-rw-r--r--fs/nilfs2/inode.c33
-rw-r--r--fs/nilfs2/ioctl.c9
-rw-r--r--fs/nilfs2/page.c10
-rw-r--r--fs/nilfs2/segbuf.c6
-rw-r--r--fs/nilfs2/segment.c42
-rw-r--r--fs/nilfs2/sufile.c51
-rw-r--r--fs/nilfs2/super.c34
-rw-r--r--fs/nilfs2/the_nilfs.c101
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/nls/nls_base.c4
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/ocfs2/aops.c18
-rw-r--r--fs/ocfs2/dir.c14
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/extent_map.c4
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/ocfs2/move_extents.c34
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/ocfs2/super.c6
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/orangefs/orangefs-debugfs.c3
-rw-r--r--fs/orangefs/orangefs-mod.c8
-rw-r--r--fs/overlayfs/copy_up.c4
-rw-r--r--fs/overlayfs/dir.c46
-rw-r--r--fs/overlayfs/namei.c24
-rw-r--r--fs/overlayfs/super.c5
-rw-r--r--fs/pnode.c2
-rw-r--r--fs/proc/base.c3
-rw-r--r--fs/proc/proc_sysctl.c33
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/pstore/Kconfig1
-rw-r--r--fs/pstore/ram.c2
-rw-r--r--fs/pstore/ram_core.c10
-rw-r--r--fs/quota/dquot.c342
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/super.c6
-rw-r--r--fs/reiserfs/xattr_security.c10
-rw-r--r--fs/squashfs/squashfs_fs.h2
-rw-r--r--fs/squashfs/squashfs_fs_sb.h2
-rw-r--r--fs/squashfs/xattr.h4
-rw-r--r--fs/squashfs/xattr_id.c2
-rw-r--r--fs/statfs.c4
-rw-r--r--fs/sync.c3
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/sysv/itree.c6
-rw-r--r--fs/ubifs/budget.c9
-rw-r--r--fs/ubifs/commit.c6
-rw-r--r--fs/ubifs/dir.c16
-rw-r--r--fs/ubifs/file.c16
-rw-r--r--fs/ubifs/journal.c2
-rw-r--r--fs/ubifs/lpt.c2
-rw-r--r--fs/ubifs/tnc.c158
-rw-r--r--fs/ubifs/tnc_misc.c4
-rw-r--r--fs/udf/balloc.c33
-rw-r--r--fs/udf/directory.c2
-rw-r--r--fs/udf/file.c33
-rw-r--r--fs/udf/ialloc.c14
-rw-r--r--fs/udf/inode.c255
-rw-r--r--fs/udf/misc.c6
-rw-r--r--fs/udf/namei.c15
-rw-r--r--fs/udf/partition.c2
-rw-r--r--fs/udf/super.c12
-rw-r--r--fs/udf/symlink.c2
-rw-r--r--fs/udf/truncate.c48
-rw-r--r--fs/udf/udf_i.h12
-rw-r--r--fs/udf/udf_sb.h2
-rw-r--r--fs/udf/unicode.c2
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c2
222 files changed, 2751 insertions, 1442 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index ba084b0b214b..82bb38370aa9 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -878,7 +878,7 @@ affs_truncate(struct inode *inode)
if (inode->i_size > AFFS_I(inode)->mmu_private) {
struct address_space *mapping = inode->i_mapping;
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
loff_t isize = inode->i_size;
int res;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 59eb92484051..31055d71b788 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -887,7 +887,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
{
struct afs_vnode *vnode, *dir;
- struct afs_fid uninitialized_var(fid);
+ struct afs_fid fid;
struct dentry *parent;
struct inode *inode;
struct key *key;
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index fc6c42eeb659..ff21ce511f47 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -172,20 +172,9 @@ static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags)
return 1;
}
-/*
- * Allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
- * sleep)
- * - called from dput() when d_count is going to 0.
- * - return 1 to request dentry be unhashed, 0 otherwise
- */
-static int afs_dynroot_d_delete(const struct dentry *dentry)
-{
- return d_really_is_positive(dentry);
-}
-
const struct dentry_operations afs_dynroot_dentry_operations = {
.d_revalidate = afs_dynroot_d_revalidate,
- .d_delete = afs_dynroot_d_delete,
+ .d_delete = always_delete_dentry,
.d_release = afs_d_release,
.d_automount = afs_d_automount,
};
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 66042b432baa..e12e532069ee 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -340,7 +340,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
int afs_permission(struct inode *inode, int mask)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
- afs_access_t uninitialized_var(access);
+ afs_access_t access;
struct key *key;
int ret;
diff --git a/fs/aio.c b/fs/aio.c
index 9635c29b83da..1bd934eccbf6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -332,6 +332,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
spin_lock(&mm->ioctx_lock);
rcu_read_lock();
table = rcu_dereference(mm->ioctx_table);
+ if (!table)
+ goto out_unlock;
+
for (i = 0; i < table->nr; i++) {
struct kioctx *ctx;
@@ -345,6 +348,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma)
}
}
+out_unlock:
rcu_read_unlock();
spin_unlock(&mm->ioctx_lock);
return res;
diff --git a/fs/attr.c b/fs/attr.c
index 4d2541c1e68c..f064f08f5194 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -256,9 +256,25 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
}
if ((ia_valid & ATTR_MODE)) {
- umode_t amode = attr->ia_mode;
+ /*
+ * Don't allow changing the mode of symlinks:
+ *
+ * (1) The vfs doesn't take the mode of symlinks into account
+ * during permission checking.
+ * (2) This has never worked correctly. Most major filesystems
+ * did return EOPNOTSUPP due to interactions with POSIX ACLs
+ * but did still updated the mode of the symlink.
+ * This inconsistency led system call wrapper providers such
+ * as libc to block changing the mode of symlinks with
+ * EOPNOTSUPP already.
+ * (3) To even do this in the first place one would have to use
+ * specific file descriptors and quite some effort.
+ */
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
/* Flag setting protected by i_mutex */
- if (is_sxid(amode))
+ if (is_sxid(attr->ia_mode))
inode->i_flags &= ~S_NOSEC;
}
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index f6385c6ef0a5..44ba0cd4ebc4 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -35,8 +35,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name);
wq->name.name = NULL;
- wq->wait_ctr--;
wake_up_interruptible(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index c3deb2e35f20..e7a9e8b56e71 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -244,6 +244,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
set_personality(PER_LINUX);
#endif
setup_new_exec(bprm);
+ install_exec_creds(bprm);
current->mm->end_code = ex.a_text +
(current->mm->start_code = N_TXTADDR(ex));
@@ -256,7 +257,6 @@ static int load_aout_binary(struct linux_binprm * bprm)
if (retval < 0)
return retval;
- install_exec_creds(bprm);
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index b53bb3729ac1..a7c2efcd0a4a 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -349,14 +349,14 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
/* there's now no turning back... the old userspace image is dead,
* defunct, deceased, etc.
*/
+ SET_PERSONALITY(exec_params.hdr);
if (elf_check_fdpic(&exec_params.hdr))
- set_personality(PER_LINUX_FDPIC);
- else
- set_personality(PER_LINUX);
+ current->personality |= PER_LINUX_FDPIC;
if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
setup_new_exec(bprm);
+ install_exec_creds(bprm);
set_binfmt(&elf_fdpic_format);
@@ -438,9 +438,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
current->mm->start_stack = current->mm->start_brk + stack_size;
#endif
- install_exec_creds(bprm);
- if (create_elf_fdpic_tables(bprm, current->mm,
- &exec_params, &interp_params) < 0)
+ retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params,
+ &interp_params);
+ if (retval < 0)
goto error;
kdebug("- start_code %lx", current->mm->start_code);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a6f97d86fb80..a909743b1a0e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -541,6 +541,7 @@ static int load_flat_file(struct linux_binprm *bprm,
/* OK, This is the point of no return */
set_personality(PER_LINUX_32BIT);
setup_new_exec(bprm);
+ install_exec_creds(bprm);
}
/*
@@ -965,8 +966,6 @@ static int load_flat_binary(struct linux_binprm *bprm)
}
}
- install_exec_creds(bprm);
-
set_binfmt(&flat_format);
#ifdef CONFIG_MMU
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 27a04f492541..8fe7edd2b001 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -42,10 +42,10 @@ static LIST_HEAD(entries);
static int enabled = 1;
enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
-#define MISC_FMT_OPEN_BINARY (1 << 30)
-#define MISC_FMT_CREDENTIALS (1 << 29)
-#define MISC_FMT_OPEN_FILE (1 << 28)
+#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
+#define MISC_FMT_OPEN_BINARY (1UL << 30)
+#define MISC_FMT_CREDENTIALS (1UL << 29)
+#define MISC_FMT_OPEN_FILE (1UL << 28)
typedef struct {
struct list_head list;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b34f76af59c4..5c6ff1572405 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -2041,21 +2041,26 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
if ((start | len) & (bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- /* Invalidate the page cache, including dirty pages. */
+ /*
+ * Invalidate the page cache, including dirty pages, for valid
+ * de-allocate mode calls to fallocate().
+ */
mapping = bdev->bd_inode->i_mapping;
- truncate_inode_pages_range(mapping, start, end);
switch (mode) {
case FALLOC_FL_ZERO_RANGE:
case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
+ truncate_inode_pages_range(mapping, start, end);
error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
+ truncate_inode_pages_range(mapping, start, end);
error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
+ truncate_inode_pages_range(mapping, start, end);
error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
GFP_KERNEL, 0);
break;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 00dc1b5c8737..e48c6d7a860f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -3590,6 +3590,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
ret = tree_mod_log_eb_copy(fs_info, split, c, 0, mid, c_nritems - mid);
if (ret) {
+ btrfs_tree_unlock(split);
+ free_extent_buffer(split);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -5151,10 +5153,12 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
+ struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
+ orig_key = key;
if (key.offset > 0) {
key.offset--;
@@ -5171,8 +5175,36 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
+ if (ret <= 0)
return ret;
+
+ /*
+ * Previous key not found. Even if we were at slot 0 of the leaf we had
+ * before releasing the path and calling btrfs_search_slot(), we now may
+ * be in a slot pointing to the same original key - this can happen if
+ * after we released the path, one of more items were moved from a
+ * sibling leaf into the front of the leaf we had due to an insertion
+ * (see push_leaf_right()).
+ * If we hit this case and our slot is > 0 and just decrement the slot
+ * so that the caller does not process the same key again, which may or
+ * may not break the caller, depending on its logic.
+ */
+ if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+ btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
+ ret = comp_keys(&found_key, &orig_key);
+ if (ret == 0) {
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ return 0;
+ }
+ /*
+ * At slot 0, same key as before, it means orig_key is
+ * the lowest, leftmost, key in the tree. We're done.
+ */
+ return 1;
+ }
+ }
+
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 1fbe2dee1e70..469a90b07d3f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1171,20 +1171,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
- btrfs_release_delayed_node(curr_node);
- curr_node = NULL;
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
+ /*
+ * See the comment below about releasing path before releasing
+ * node. If the commit of delayed items was successful the path
+ * should always be released, but in case of an error, it may
+ * point to locked extent buffers (a leaf at the very least).
+ */
+ ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
}
+ /*
+ * Release the path to avoid a potential deadlock and lockdep splat when
+ * releasing the delayed node, as that requires taking the delayed node's
+ * mutex. If another task starts running delayed items before we take
+ * the mutex, it will first lock the mutex and then it may try to lock
+ * the same btree path (leaf).
+ */
+ btrfs_free_path(path);
+
if (curr_node)
btrfs_release_delayed_node(curr_node);
- btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 98f87cc47433..437ca4691967 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4348,7 +4348,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
*/
inode = igrab(&btrfs_inode->vfs_inode);
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
iput(inode);
}
spin_lock(&root->delalloc_lock);
@@ -4466,7 +4470,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache)
inode = cache->io_ctl.inode;
if (inode) {
+ unsigned int nofs_flag;
+
+ nofs_flag = memalloc_nofs_save();
invalidate_inode_pages2(inode->i_mapping);
+ memalloc_nofs_restore(nofs_flag);
+
BTRFS_I(inode)->generation = 0;
cache->io_ctl.inode = NULL;
iput(inode);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d71f800e8bf6..902ab00bfd7a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1549,6 +1549,11 @@ again:
err = -ENOENT;
goto out;
} else if (WARN_ON(ret)) {
+ btrfs_print_leaf(path->nodes[0]);
+ btrfs_err(fs_info,
+"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
+ bytenr, num_bytes, parent, root_objectid, owner,
+ offset);
err = -EIO;
goto out;
}
@@ -2322,12 +2327,12 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
parent = ref->parent;
ref_root = ref->root;
- if (node->ref_mod != 1) {
+ if (unlikely(node->ref_mod != 1)) {
btrfs_err(trans->fs_info,
- "btree block(%llu) has %d references rather than 1: action %d ref_root %llu parent %llu",
+ "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
node->bytenr, node->ref_mod, node->action, ref_root,
parent);
- return -EIO;
+ return -EUCLEAN;
}
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
BUG_ON(!extent_op || !extent_op->update_flags);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index dabf153843e9..504d63fb81fa 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3928,11 +3928,12 @@ retry:
free_extent_buffer(eb);
/*
- * the filesystem may choose to bump up nr_to_write.
+ * The filesystem may choose to bump up nr_to_write.
* We have to make sure to honor the new nr_to_write
- * at any time
+ * at any time.
*/
- nr_to_write_done = wbc->nr_to_write <= 0;
+ nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
+ wbc->nr_to_write <= 0);
}
pagevec_release(&pvec);
cond_resched();
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 6511cb71986c..b623e9f3b4c4 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -783,15 +783,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
- ctl->total_bitmaps++;
- ctl->op->recalc_thresholds(ctl);
- spin_unlock(&ctl->tree_lock);
if (ret) {
+ spin_unlock(&ctl->tree_lock);
btrfs_err(fs_info,
"Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
+ ctl->total_bitmaps++;
+ ctl->op->recalc_thresholds(ctl);
+ spin_unlock(&ctl->tree_lock);
list_add_tail(&e->list, &bitmaps);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f314b2c2d148..e4a4074ef33d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6620,7 +6620,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -6684,7 +6684,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -6837,7 +6837,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_fail;
@@ -9819,7 +9819,7 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
u64 objectid;
u64 index;
- ret = btrfs_find_free_ino(root, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret)
return ret;
@@ -10316,7 +10316,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(trans))
return PTR_ERR(trans);
- err = btrfs_find_free_ino(root, &objectid);
+ err = btrfs_find_free_objectid(root, &objectid);
if (err)
goto out_unlock;
@@ -10600,7 +10600,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ret = btrfs_find_free_ino(root, &objectid);
+ ret = btrfs_find_free_objectid(root, &objectid);
if (ret)
goto out;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 752b5d265284..c76277ccf03b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1789,6 +1789,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
* are limited to own subvolumes only
*/
ret = -EPERM;
+ } else if (btrfs_ino(BTRFS_I(src_inode)) != BTRFS_FIRST_FREE_OBJECTID) {
+ /*
+ * Snapshots must be made with the src_inode referring
+ * to the subvolume inode, otherwise the permission
+ * checking above is useless because we may have
+ * permission on a lower directory but not the subvol
+ * itself.
+ */
+ ret = -EINVAL;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
@@ -2045,7 +2054,7 @@ static noinline int key_in_sk(struct btrfs_key *key,
static noinline int copy_to_sk(struct btrfs_path *path,
struct btrfs_key *key,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf,
unsigned long *sk_offset,
int *num_found)
@@ -2177,7 +2186,7 @@ out:
static noinline int search_ioctl(struct inode *inode,
struct btrfs_ioctl_search_key *sk,
- size_t *buf_size,
+ u64 *buf_size,
char __user *ubuf)
{
struct btrfs_fs_info *info = btrfs_sb(inode->i_sb);
@@ -2249,7 +2258,7 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
struct btrfs_ioctl_search_key sk;
struct inode *inode;
int ret;
- size_t buf_size;
+ u64 buf_size;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2283,8 +2292,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
struct btrfs_ioctl_search_args_v2 args;
struct inode *inode;
int ret;
- size_t buf_size;
- const size_t buf_limit = SZ_16M;
+ u64 buf_size;
+ const u64 buf_limit = SZ_16M;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -3234,13 +3243,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
di_args->bytes_used = btrfs_device_get_bytes_used(dev);
di_args->total_bytes = btrfs_device_get_total_bytes(dev);
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
- if (dev->name) {
- strncpy(di_args->path, rcu_str_deref(dev->name),
- sizeof(di_args->path) - 1);
- di_args->path[sizeof(di_args->path) - 1] = 0;
- } else {
+ if (dev->name)
+ strscpy(di_args->path, rcu_str_deref(dev->name), sizeof(di_args->path));
+ else
di_args->path[0] = '\0';
- }
out:
rcu_read_unlock();
@@ -4529,7 +4535,7 @@ static void get_block_group_info(struct list_head *groups_list,
static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info,
void __user *arg)
{
- struct btrfs_ioctl_space_args space_args;
+ struct btrfs_ioctl_space_args space_args = { 0 };
struct btrfs_ioctl_space_info space;
struct btrfs_ioctl_space_info *dest;
struct btrfs_ioctl_space_info *dest_orig;
@@ -4725,6 +4731,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
if (IS_ERR(sa))
return PTR_ERR(sa);
+ if (sa->flags & ~BTRFS_SCRUB_SUPPORTED_FLAGS) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
if (!(sa->flags & BTRFS_SCRUB_READONLY)) {
ret = mnt_want_write_file(file);
if (ret)
@@ -5882,7 +5893,7 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
- struct btrfs_ioctl_send_args_32 args32;
+ struct btrfs_ioctl_send_args_32 args32 = { 0 };
ret = copy_from_user(&args32, argp, sizeof(args32));
if (ret)
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 4b217e9a581c..e3de0c4ecbfc 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -109,10 +109,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
pr_cont("shared data backref parent %llu count %u\n",
offset, btrfs_shared_data_ref_count(eb, sref));
/*
- * offset is supposed to be a tree block which
- * must be aligned to nodesize.
+ * Offset is supposed to be a tree block which must be
+ * aligned to sectorsize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
pr_info(
"\t\t\t(parent %llu not aligned to sectorsize %u)\n",
offset, eb->fs_info->sectorsize);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 69b43df186a8..ef95525fa6cd 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1115,7 +1115,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
goto end_trans;
}
+ spin_lock(&fs_info->trans_lock);
list_del(&quota_root->dirty_list);
+ spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(quota_root->node);
clean_tree_block(fs_info, quota_root->node);
diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h
index a97dc74a4d3d..02f15321cecc 100644
--- a/fs/btrfs/rcu-string.h
+++ b/fs/btrfs/rcu-string.h
@@ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
(len * sizeof(char)), mask);
if (!ret)
return ret;
- strncpy(ret->str, src, len);
+ /* Warn if the source got unexpectedly truncated. */
+ if (WARN_ON(strscpy(ret->str, src, len) < 0)) {
+ kfree(ret);
+ return NULL;
+ }
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 06c6a66a991f..3b9318a3d421 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2341,7 +2341,7 @@ again:
list_splice(&reloc_roots, &rc->reloc_roots);
if (!err)
- btrfs_commit_transaction(trans);
+ err = btrfs_commit_transaction(trans);
else
btrfs_end_transaction(trans);
return err;
@@ -3930,8 +3930,12 @@ int prepare_to_relocate(struct reloc_control *rc)
*/
return PTR_ERR(trans);
}
- btrfs_commit_transaction(trans);
- return 0;
+
+ ret = btrfs_commit_transaction(trans);
+ if (ret)
+ unset_reloc_control(rc);
+
+ return ret;
}
static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
@@ -4097,7 +4101,9 @@ restart:
err = PTR_ERR(trans);
goto out_free;
}
- btrfs_commit_transaction(trans);
+ ret = btrfs_commit_transaction(trans);
+ if (ret && !err)
+ err = ret;
out_free:
btrfs_free_block_rsv(fs_info, rc->block_rsv);
btrfs_free_path(path);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index eb2f8e84ffc9..cb584c044f8a 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1306,6 +1306,7 @@ static int find_extent_clone(struct send_ctx *sctx,
u64 disk_byte;
u64 num_bytes;
u64 extent_item_pos;
+ u64 extent_refs;
u64 flags = 0;
struct btrfs_file_extent_item *fi;
struct extent_buffer *eb = path->nodes[0];
@@ -1373,14 +1374,22 @@ static int find_extent_clone(struct send_ctx *sctx,
ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
struct btrfs_extent_item);
+ extent_refs = btrfs_extent_refs(tmp_path->nodes[0], ei);
/*
* Backreference walking (iterate_extent_inodes() below) is currently
* too expensive when an extent has a large number of references, both
* in time spent and used memory. So for now just fallback to write
* operations instead of clone operations when an extent has more than
* a certain amount of references.
+ *
+ * Also, if we have only one reference and only the send root as a clone
+ * source - meaning no clone roots were given in the struct
+ * btrfs_ioctl_send_args passed to the send ioctl - then it's our
+ * reference and there's no point in doing backref walking which is
+ * expensive, so exit early.
*/
- if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
+ if ((extent_refs == 1 && sctx->clone_roots_cnt == 1) ||
+ extent_refs > SEND_MAX_EXTENT_REFS) {
ret = -ENOENT;
goto out;
}
@@ -6817,10 +6826,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
/*
* Check that we don't overflow at later allocations, we request
* clone_sources_count + 1 items, and compare to unsigned long inside
- * access_ok.
+ * access_ok. Also set an upper limit for allocation size so this can't
+ * easily exhaust memory. Max number of clone sources is about 200K.
*/
- if (arg->clone_sources_count >
- ULONG_MAX / sizeof(struct clone_root) - 1) {
+ if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) {
ret = -EINVAL;
goto out;
}
@@ -6851,7 +6860,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
sctx->flags = arg->flags;
sctx->send_filp = fget(arg->send_fd);
- if (!sctx->send_filp) {
+ if (!sctx->send_filp || !(sctx->send_filp->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 521f6c2091ad..a59543951851 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2196,7 +2196,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
* calculated f_bavail.
*/
if (!mixed && block_rsv->space_info->full &&
- total_free_meta - thresh < block_rsv->size)
+ (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size))
buf->f_bavail = 0;
buf->f_type = BTRFS_SUPER_MAGIC;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 049535115c9d..a34c0436ebb1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -200,10 +200,11 @@ loop:
spin_unlock(&fs_info->trans_lock);
/*
- * If we are ATTACH, we just want to catch the current transaction,
- * and commit it. If there is no transaction, just return ENOENT.
+ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
+ * current transaction, and commit it. If there is no transaction, just
+ * return ENOENT.
*/
- if (type == TRANS_ATTACH)
+ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
return -ENOENT;
/*
@@ -703,8 +704,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
trans = start_transaction(root, 0, TRANS_ATTACH,
BTRFS_RESERVE_NO_FLUSH, true);
- if (trans == ERR_PTR(-ENOENT))
- btrfs_wait_for_commit(root->fs_info, 0);
+ if (trans == ERR_PTR(-ENOENT)) {
+ int ret;
+
+ ret = btrfs_wait_for_commit(root->fs_info, 0);
+ if (ret)
+ return ERR_PTR(ret);
+ }
return trans;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0fe32c567ed7..23ec766eeb0a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4236,7 +4236,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
int slot;
int ins_nr = 0;
- int start_slot;
+ int start_slot = 0;
int ret;
if (!(inode->flags & BTRFS_INODE_PREALLOC))
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2b4d33b58a68..ceced5e56c5a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1418,7 +1418,7 @@ again:
goto out;
}
- while (1) {
+ while (search_start < search_end) {
l = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(l)) {
@@ -1441,6 +1441,9 @@ again:
if (key.type != BTRFS_DEV_EXTENT_KEY)
goto next;
+ if (key.offset > search_end)
+ break;
+
if (key.offset > search_start) {
hole_size = key.offset - search_start;
@@ -1515,6 +1518,7 @@ next:
else
ret = 0;
+ ASSERT(max_hole_start + max_hole_size <= search_end);
out:
btrfs_free_path(path);
*start = max_hole_start;
@@ -2756,7 +2760,7 @@ static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
return ERR_PTR(-EINVAL);
}
- if (em->start > logical || em->start + em->len < logical) {
+ if (em->start > logical || em->start + em->len <= logical) {
btrfs_crit(fs_info,
"found a bad mapping, wanted %llu-%llu, found %llu-%llu",
logical, length, em->start, em->start + em->len);
@@ -4102,8 +4106,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
}
}
- BUG_ON(fs_info->balance_ctl ||
- test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
return 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6443ba1e60eb..fcfba2af5f98 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1554,6 +1554,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_session *session = NULL;
+ bool need_put = false;
int mds;
dout("ceph_flush_snaps %p\n", inode);
@@ -1607,8 +1608,13 @@ out:
}
/* we flushed them all; remove this inode from the queue */
spin_lock(&mdsc->snap_flush_lock);
+ if (!list_empty(&ci->i_snap_flush_item))
+ need_put = true;
list_del_init(&ci->i_snap_flush_item);
spin_unlock(&mdsc->snap_flush_lock);
+
+ if (need_put)
+ iput(inode);
}
/*
@@ -3279,6 +3285,15 @@ static void handle_cap_grant(struct inode *inode,
}
BUG_ON(cap->issued & ~cap->implemented);
+ /* don't let check_caps skip sending a response to MDS for revoke msgs */
+ if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
+ cap->mds_wanted = 0;
+ if (cap == ci->i_auth_cap)
+ check_caps = 1; /* check auth cap only */
+ else
+ check_caps = 2; /* check all caps */
+ }
+
if (extra_info->inline_version > 0 &&
extra_info->inline_version >= ci->i_inline_version) {
ci->i_inline_version = extra_info->inline_version;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5f041fede7aa..d6f181e3c1ac 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -631,9 +631,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
ci->i_truncate_seq = truncate_seq;
/* the MDS should have revoked these caps */
- WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL |
- CEPH_CAP_FILE_RD |
- CEPH_CAP_FILE_WR |
+ WARN_ON_ONCE(issued & (CEPH_CAP_FILE_RD |
CEPH_CAP_FILE_LAZYIO));
/*
* If we hold relevant caps, or in the case where we're
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index a5ef8275440d..4f727f2c98db 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -623,8 +623,10 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size);
spin_lock(&mdsc->snap_flush_lock);
- if (list_empty(&ci->i_snap_flush_item))
+ if (list_empty(&ci->i_snap_flush_item)) {
+ ihold(inode);
list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+ }
spin_unlock(&mdsc->snap_flush_lock);
return 1; /* caller may want to ceph_flush_snaps */
}
@@ -976,6 +978,19 @@ skip_inode:
continue;
adjust_snap_realm_parent(mdsc, child, realm->ino);
}
+ } else {
+ /*
+ * In the non-split case both 'num_split_inos' and
+ * 'num_split_realms' should be 0, making this a no-op.
+ * However the MDS happens to populate 'split_realms' list
+ * in one of the UPDATE op cases by mistake.
+ *
+ * Skip both lists just in case to ensure that 'p' is
+ * positioned at the start of realm info, as expected by
+ * ceph_update_snap_trace().
+ */
+ p += sizeof(u64) * num_split_inos;
+ p += sizeof(u64) * num_split_realms;
}
/*
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 5fffd5050fb7..2c3d519b21c2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -553,7 +553,7 @@ int cdev_device_add(struct cdev *cdev, struct device *dev)
}
rc = device_add(dev);
- if (rc)
+ if (rc && dev->devt)
cdev_del(cdev);
return rc;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7f01c6e60791..6eb65988321f 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -76,8 +76,8 @@ struct key_type cifs_spnego_key_type = {
* strlen(";sec=ntlmsspi") */
#define MAX_MECH_STR_LEN 13
-/* strlen of "host=" */
-#define HOST_KEY_LEN 5
+/* strlen of ";host=" */
+#define HOST_KEY_LEN 6
/* strlen of ";ip4=" or ";ip6=" */
#define IP_KEY_LEN 5
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 52b1524b40cd..7c9e5ed1644b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -663,11 +663,6 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
struct inode *dir = d_inode(dentry);
struct dentry *child;
- if (!dir) {
- dput(dentry);
- dentry = ERR_PTR(-ENOENT);
- break;
- }
if (!S_ISDIR(dir->i_mode)) {
dput(dentry);
dentry = ERR_PTR(-ENOTDIR);
@@ -684,7 +679,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
while (*s && *s != sep)
s++;
- child = lookup_one_len_unlocked(p, dentry, s - p);
+ child = lookup_positive_unlocked(p, dentry, s - p);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
@@ -975,6 +970,7 @@ const struct inode_operations cifs_file_inode_ops = {
const struct inode_operations cifs_symlink_inode_ops = {
.get_link = cifs_get_link,
+ .setattr = cifs_setattr,
.permission = cifs_permission,
.listxattr = cifs_listxattr,
};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f047e87871a1..c1d5daa4b351 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -121,7 +121,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops;
#ifdef CONFIG_CIFS_DFS_UPCALL
extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
#else
-#define cifs_dfs_d_automount NULL
+static inline struct vfsmount *cifs_dfs_d_automount(struct path *path)
+{
+ return ERR_PTR(-EREMOTE);
+}
#endif
/* Functions related to symlinks */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index cb70f0c6aa1b..d16fd8d1f291 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -4895,8 +4895,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses,
return -ENODEV;
getDFSRetry:
- rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB,
- (void **) &pSMBr);
+ /*
+ * Use smb_init_no_reconnect() instead of smb_init() as
+ * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus
+ * causing an infinite recursion.
+ */
+ rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc,
+ (void **)&pSMB, (void **)&pSMBr);
if (rc)
return rc;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b5aba1c895cb..37e91f27f49b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -429,7 +429,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->ssocket->state, server->ssocket->flags);
sock_release(server->ssocket);
server->ssocket = NULL;
- }
+ } else if (cifs_rdma_enabled(server))
+ smbd_destroy(server);
server->sequence_number = 0;
server->session_estab = false;
kfree(server->session_key.response);
@@ -799,10 +800,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
wake_up_all(&server->request_q);
/* give those requests time to exit */
msleep(125);
- if (cifs_rdma_enabled(server) && server->smbd_conn) {
- smbd_destroy(server->smbd_conn);
- server->smbd_conn = NULL;
- }
+ if (cifs_rdma_enabled(server))
+ smbd_destroy(server);
if (server->ssocket) {
sock_release(server->ssocket);
server->ssocket = NULL;
@@ -2940,7 +2939,7 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)),
static struct cifs_ses *
cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
{
- int rc = -ENOMEM;
+ int rc = 0;
unsigned int xid;
struct cifs_ses *ses;
struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
@@ -2982,6 +2981,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
return ses;
}
+ rc = -ENOMEM;
+
cifs_dbg(FYI, "Existing smb sess not found\n");
ses = sesInfoAlloc();
if (ses == NULL)
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7b482489bd22..0613b86cc3fd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3991,9 +3991,9 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
io_error:
kunmap(page);
- unlock_page(page);
read_complete:
+ unlock_page(page);
return rc;
}
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 5b1c33d9283a..f590149e21ba 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -481,6 +481,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
oparms.disposition = FILE_CREATE;
oparms.fid = &fid;
oparms.reconnect = false;
+ oparms.mode = 0644;
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL,
NULL);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index dd67f56ea61e..c9ebfff5190a 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -338,6 +338,10 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
cifs_dbg(VFS, "Length less than smb header size\n");
}
return -EIO;
+ } else if (total_read < sizeof(*smb) + 2 * smb->WordCount) {
+ cifs_dbg(VFS, "%s: can't read BCC due to invalid WordCount(%u)\n",
+ __func__, smb->WordCount);
+ return -EIO;
}
/* otherwise, there is enough to get to the BCC */
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 7177720e822e..d3d5d2c6c401 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -302,6 +302,9 @@ static const bool has_smb2_data_area[NUMBER_OF_SMB2_COMMANDS] = {
char *
smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
{
+ const int max_off = 4096;
+ const int max_len = 128 * 1024;
+
*off = 0;
*len = 0;
@@ -369,29 +372,20 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_sync_hdr *shdr)
* Invalid length or offset probably means data area is invalid, but
* we have little choice but to ignore the data area in this case.
*/
- if (*off > 4096) {
- cifs_dbg(VFS, "offset %d too large, data area ignored\n", *off);
- *len = 0;
- *off = 0;
- } else if (*off < 0) {
- cifs_dbg(VFS, "negative offset %d to data invalid ignore data area\n",
- *off);
+ if (unlikely(*off < 0 || *off > max_off ||
+ *len < 0 || *len > max_len)) {
+ cifs_dbg(VFS, "%s: invalid data area (off=%d len=%d)\n",
+ __func__, *off, *len);
*off = 0;
*len = 0;
- } else if (*len < 0) {
- cifs_dbg(VFS, "negative data length %d invalid, data area ignored\n",
- *len);
- *len = 0;
- } else if (*len > 128 * 1024) {
- cifs_dbg(VFS, "data area larger than 128K: %d\n", *len);
+ } else if (*off == 0) {
*len = 0;
}
/* return pointer to beginning of data area, ie offset from SMB start */
- if ((*off != 0) && (*len != 0))
+ if (*off > 0 && *len > 0)
return (char *)shdr + *off;
- else
- return NULL;
+ return NULL;
}
/*
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f906984eb25b..01ab4496cb89 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -79,6 +79,7 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
*val = 65000; /* Don't get near 64K credits, avoid srv bugs */
printk_once(KERN_WARNING "server overflowed SMB3 credits\n");
}
+ WARN_ON_ONCE(server->in_flight == 0);
server->in_flight--;
if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
rc = change_conf(server);
@@ -475,7 +476,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
if (rc == -EOPNOTSUPP) {
cifs_dbg(FYI,
"server does not support query network interfaces\n");
- goto out;
+ ret_data_len = 0;
} else if (rc != 0) {
cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc);
goto out;
@@ -1180,7 +1181,7 @@ smb2_copychunk_range(const unsigned int xid,
pcchunk->SourceOffset = cpu_to_le64(src_off);
pcchunk->TargetOffset = cpu_to_le64(dest_off);
pcchunk->Length =
- cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
+ cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
kfree(retbuf);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 3485b9bf970f..50c6405befc4 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3152,12 +3152,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
(struct smb2_sync_hdr *)rdata->iov[0].iov_base;
unsigned int credits_received = 0;
struct smb_rqst rqst = { .rq_iov = &rdata->iov[1],
- .rq_nvec = 1,
- .rq_pages = rdata->pages,
- .rq_offset = rdata->page_offset,
- .rq_npages = rdata->nr_pages,
- .rq_pagesz = rdata->pagesz,
- .rq_tailsz = rdata->tailsz };
+ .rq_nvec = 1, };
+
+ if (rdata->got_bytes) {
+ rqst.rq_pages = rdata->pages;
+ rqst.rq_offset = rdata->page_offset;
+ rqst.rq_npages = rdata->nr_pages;
+ rqst.rq_pagesz = rdata->pagesz;
+ rqst.rq_tailsz = rdata->tailsz;
+ }
cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n",
__func__, mid->mid, mid->mid_state, rdata->result,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 8a44d59947b7..dd6e749b2400 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -610,7 +610,7 @@ struct smb2_tree_disconnect_rsp {
#define SMB2_CREATE_SD_BUFFER "SecD" /* security descriptor */
#define SMB2_CREATE_DURABLE_HANDLE_REQUEST "DHnQ"
#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT "DHnC"
-#define SMB2_CREATE_ALLOCATION_SIZE "AISi"
+#define SMB2_CREATE_ALLOCATION_SIZE "AlSi"
#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
#define SMB2_CREATE_TIMEWARP_REQUEST "TWrp"
#define SMB2_CREATE_QUERY_ON_DISK_ID "QFid"
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 784628ec4bc4..117dc475bea8 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -320,6 +320,9 @@ static int smbd_conn_upcall(
info->transport_status = SMBD_DISCONNECTED;
smbd_process_disconnected(info);
+ wake_up(&info->disconn_wait);
+ wake_up_interruptible(&info->wait_reassembly_queue);
+ wake_up_interruptible_all(&info->wait_send_queue);
break;
default:
@@ -703,8 +706,13 @@ static struct rdma_cm_id *smbd_create_id(
log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc);
goto out;
}
- wait_for_completion_interruptible_timeout(
+ rc = wait_for_completion_interruptible_timeout(
&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
+ /* e.g. if interrupted returns -ERESTARTSYS */
+ if (rc < 0) {
+ log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc);
+ goto out;
+ }
rc = info->ri_rc;
if (rc) {
log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc);
@@ -717,8 +725,13 @@ static struct rdma_cm_id *smbd_create_id(
log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc);
goto out;
}
- wait_for_completion_interruptible_timeout(
+ rc = wait_for_completion_interruptible_timeout(
&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
+ /* e.g. if interrupted returns -ERESTARTSYS */
+ if (rc < 0) {
+ log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc);
+ goto out;
+ }
rc = info->ri_rc;
if (rc) {
log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc);
@@ -1478,21 +1491,102 @@ static void idle_connection_timer(struct work_struct *work)
info->keep_alive_interval*HZ);
}
-/* Destroy this SMBD connection, called from upper layer */
-void smbd_destroy(struct smbd_connection *info)
+/*
+ * Destroy the transport and related RDMA and memory resources
+ * Need to go through all the pending counters and make sure on one is using
+ * the transport while it is destroyed
+ */
+void smbd_destroy(struct TCP_Server_Info *server)
{
+ struct smbd_connection *info = server->smbd_conn;
+ struct smbd_response *response;
+ unsigned long flags;
+
+ if (!info) {
+ log_rdma_event(INFO, "rdma session already destroyed\n");
+ return;
+ }
+
log_rdma_event(INFO, "destroying rdma session\n");
+ if (info->transport_status != SMBD_DISCONNECTED) {
+ rdma_disconnect(server->smbd_conn->id);
+ log_rdma_event(INFO, "wait for transport being disconnected\n");
+ wait_event(
+ info->disconn_wait,
+ info->transport_status == SMBD_DISCONNECTED);
+ }
- /* Kick off the disconnection process */
- smbd_disconnect_rdma_connection(info);
+ log_rdma_event(INFO, "destroying qp\n");
+ ib_drain_qp(info->id->qp);
+ rdma_destroy_qp(info->id);
+
+ log_rdma_event(INFO, "cancelling idle timer\n");
+ cancel_delayed_work_sync(&info->idle_timer_work);
+ log_rdma_event(INFO, "cancelling send immediate work\n");
+ cancel_delayed_work_sync(&info->send_immediate_work);
+
+ log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
+ wait_event(info->wait_send_pending,
+ atomic_read(&info->send_pending) == 0);
+ wait_event(info->wait_send_payload_pending,
+ atomic_read(&info->send_payload_pending) == 0);
+
+ /* It's not posssible for upper layer to get to reassembly */
+ log_rdma_event(INFO, "drain the reassembly queue\n");
+ do {
+ spin_lock_irqsave(&info->reassembly_queue_lock, flags);
+ response = _get_first_reassembly(info);
+ if (response) {
+ list_del(&response->list);
+ spin_unlock_irqrestore(
+ &info->reassembly_queue_lock, flags);
+ put_receive_buffer(info, response);
+ } else
+ spin_unlock_irqrestore(
+ &info->reassembly_queue_lock, flags);
+ } while (response);
+ info->reassembly_data_length = 0;
+
+ log_rdma_event(INFO, "free receive buffers\n");
+ wait_event(info->wait_receive_queues,
+ info->count_receive_queue + info->count_empty_packet_queue
+ == info->receive_credit_max);
+ destroy_receive_buffers(info);
+
+ /*
+ * For performance reasons, memory registration and deregistration
+ * are not locked by srv_mutex. It is possible some processes are
+ * blocked on transport srv_mutex while holding memory registration.
+ * Release the transport srv_mutex to allow them to hit the failure
+ * path when sending data, and then release memory registartions.
+ */
+ log_rdma_event(INFO, "freeing mr list\n");
+ wake_up_interruptible_all(&info->wait_mr);
+ while (atomic_read(&info->mr_used_count)) {
+ mutex_unlock(&server->srv_mutex);
+ msleep(1000);
+ mutex_lock(&server->srv_mutex);
+ }
+ destroy_mr_list(info);
- log_rdma_event(INFO, "wait for transport being destroyed\n");
- wait_event(info->wait_destroy,
- info->transport_status == SMBD_DESTROYED);
+ ib_free_cq(info->send_cq);
+ ib_free_cq(info->recv_cq);
+ ib_dealloc_pd(info->pd);
+ rdma_destroy_id(info->id);
+
+ /* free mempools */
+ mempool_destroy(info->request_mempool);
+ kmem_cache_destroy(info->request_cache);
+
+ mempool_destroy(info->response_mempool);
+ kmem_cache_destroy(info->response_cache);
+
+ info->transport_status = SMBD_DESTROYED;
destroy_workqueue(info->workqueue);
log_rdma_event(INFO, "rdma session destroyed\n");
kfree(info);
+ server->smbd_conn = NULL;
}
/*
@@ -1514,17 +1608,9 @@ int smbd_reconnect(struct TCP_Server_Info *server)
*/
if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
log_rdma_event(INFO, "disconnecting transport\n");
- smbd_disconnect_rdma_connection(server->smbd_conn);
+ smbd_destroy(server);
}
- /* wait until the transport is destroyed */
- if (!wait_event_timeout(server->smbd_conn->wait_destroy,
- server->smbd_conn->transport_status == SMBD_DESTROYED, 5*HZ))
- return -EAGAIN;
-
- destroy_workqueue(server->smbd_conn->workqueue);
- kfree(server->smbd_conn);
-
create_conn:
log_rdma_event(INFO, "creating rdma session\n");
server->smbd_conn = smbd_get_connection(
@@ -1741,12 +1827,13 @@ static struct smbd_connection *_smbd_get_connection(
conn_param.retry_count = SMBD_CM_RETRY;
conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY;
conn_param.flow_control = 0;
- init_waitqueue_head(&info->wait_destroy);
log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
&addr_in->sin_addr, port);
init_waitqueue_head(&info->conn_wait);
+ init_waitqueue_head(&info->disconn_wait);
+ init_waitqueue_head(&info->wait_reassembly_queue);
rc = rdma_connect(info->id, &conn_param);
if (rc) {
log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
@@ -1770,8 +1857,6 @@ static struct smbd_connection *_smbd_get_connection(
}
init_waitqueue_head(&info->wait_send_queue);
- init_waitqueue_head(&info->wait_reassembly_queue);
-
INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work);
queue_delayed_work(info->workqueue, &info->idle_timer_work,
@@ -1812,7 +1897,8 @@ static struct smbd_connection *_smbd_get_connection(
allocate_mr_failed:
/* At this point, need to a full transport shutdown */
- smbd_destroy(info);
+ server->smbd_conn = info;
+ smbd_destroy(server);
return NULL;
negotiation_failed:
@@ -2378,6 +2464,7 @@ static int allocate_mr_list(struct smbd_connection *info)
atomic_set(&info->mr_ready_count, 0);
atomic_set(&info->mr_used_count, 0);
init_waitqueue_head(&info->wait_for_mr_cleanup);
+ INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
/* Allocate more MRs (2x) than hardware responder_resources */
for (i = 0; i < info->responder_resources * 2; i++) {
smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
@@ -2406,13 +2493,13 @@ static int allocate_mr_list(struct smbd_connection *info)
list_add_tail(&smbdirect_mr->list, &info->mr_list);
atomic_inc(&info->mr_ready_count);
}
- INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
return 0;
out:
kfree(smbdirect_mr);
list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
+ list_del(&smbdirect_mr->list);
ib_dereg_mr(smbdirect_mr->mr);
kfree(smbdirect_mr->sgl);
kfree(smbdirect_mr);
diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
index b5c240ff2191..b0ca7df41454 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/cifs/smbdirect.h
@@ -71,6 +71,7 @@ struct smbd_connection {
struct completion ri_done;
wait_queue_head_t conn_wait;
wait_queue_head_t wait_destroy;
+ wait_queue_head_t disconn_wait;
struct completion negotiate_completion;
bool negotiate_done;
@@ -288,7 +289,7 @@ struct smbd_connection *smbd_get_connection(
/* Reconnect SMBDirect session */
int smbd_reconnect(struct TCP_Server_Info *server);
/* Destroy SMBDirect session */
-void smbd_destroy(struct smbd_connection *info);
+void smbd_destroy(struct TCP_Server_Info *server);
/* Interface for carrying upper layer I/O through send/recv */
int smbd_recv(struct smbd_connection *info, struct msghdr *msg);
@@ -331,7 +332,7 @@ struct smbd_connection {};
static inline void *smbd_get_connection(
struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;}
static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; }
-static inline void smbd_destroy(struct smbd_connection *info) {}
+static inline void smbd_destroy(struct TCP_Server_Info *server) {}
static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; }
static inline int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst) {return -1; }
#endif
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 4fce1da7db23..acdc802bfe9a 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/device.h>
+#include <linux/pm_runtime.h>
#include <linux/poll.h>
#include "internal.h"
@@ -330,8 +331,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf,
}
EXPORT_SYMBOL_GPL(debugfs_attr_read);
-ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
+static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos, bool is_signed)
{
struct dentry *dentry = F_DENTRY(file);
ssize_t ret;
@@ -339,12 +340,28 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
ret = debugfs_file_get(dentry);
if (unlikely(ret))
return ret;
- ret = simple_attr_write(file, buf, len, ppos);
+ if (is_signed)
+ ret = simple_attr_write_signed(file, buf, len, ppos);
+ else
+ ret = simple_attr_write(file, buf, len, ppos);
debugfs_file_put(dentry);
return ret;
}
+
+ssize_t debugfs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return debugfs_attr_write_xsigned(file, buf, len, ppos, false);
+}
EXPORT_SYMBOL_GPL(debugfs_attr_write);
+ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return debugfs_attr_write_xsigned(file, buf, len, ppos, true);
+}
+EXPORT_SYMBOL_GPL(debugfs_attr_write_signed);
+
static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode,
struct dentry *parent, void *value,
const struct file_operations *fops,
@@ -742,11 +759,11 @@ static int debugfs_atomic_t_get(void *data, u64 *val)
*val = atomic_read((atomic_t *)data);
return 0;
}
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get,
debugfs_atomic_t_set, "%lld\n");
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL,
"%lld\n");
-DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
+DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set,
"%lld\n");
/**
@@ -1068,7 +1085,14 @@ static int debugfs_show_regset32(struct seq_file *s, void *data)
{
struct debugfs_regset32 *regset = s->private;
+ if (regset->dev)
+ pm_runtime_get_sync(regset->dev);
+
debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
+
+ if (regset->dev)
+ pm_runtime_put(regset->dev);
+
return 0;
}
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4661ecaf6741..1e4ae78f64a8 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -275,13 +275,9 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
if (!parent)
parent = debugfs_mount->mnt_root;
- dentry = lookup_one_len_unlocked(name, parent, strlen(name));
+ dentry = lookup_positive_unlocked(name, parent, strlen(name));
if (IS_ERR(dentry))
return NULL;
- if (!d_really_is_positive(dentry)) {
- dput(dentry);
- return NULL;
- }
return dentry;
}
EXPORT_SYMBOL_GPL(debugfs_lookup);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d4e204473e76..0864481d8551 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1858,7 +1858,7 @@ static void del_timeout(struct dlm_lkb *lkb)
void dlm_scan_timeout(struct dlm_ls *ls)
{
struct dlm_rsb *r;
- struct dlm_lkb *lkb;
+ struct dlm_lkb *lkb = NULL, *iter;
int do_cancel, do_warn;
s64 wait_us;
@@ -1869,27 +1869,28 @@ void dlm_scan_timeout(struct dlm_ls *ls)
do_cancel = 0;
do_warn = 0;
mutex_lock(&ls->ls_timeout_mutex);
- list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+ list_for_each_entry(iter, &ls->ls_timeout, lkb_time_list) {
wait_us = ktime_to_us(ktime_sub(ktime_get(),
- lkb->lkb_timestamp));
+ iter->lkb_timestamp));
- if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
- wait_us >= (lkb->lkb_timeout_cs * 10000))
+ if ((iter->lkb_exflags & DLM_LKF_TIMEOUT) &&
+ wait_us >= (iter->lkb_timeout_cs * 10000))
do_cancel = 1;
- if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+ if ((iter->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
wait_us >= dlm_config.ci_timewarn_cs * 10000)
do_warn = 1;
if (!do_cancel && !do_warn)
continue;
- hold_lkb(lkb);
+ hold_lkb(iter);
+ lkb = iter;
break;
}
mutex_unlock(&ls->ls_timeout_mutex);
- if (!do_cancel && !do_warn)
+ if (!lkb)
break;
r = lkb->lkb_resource;
@@ -5243,21 +5244,18 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
{
- struct dlm_lkb *lkb;
- int found = 0;
+ struct dlm_lkb *lkb = NULL, *iter;
mutex_lock(&ls->ls_waiters_mutex);
- list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
- if (lkb->lkb_flags & DLM_IFL_RESEND) {
- hold_lkb(lkb);
- found = 1;
+ list_for_each_entry(iter, &ls->ls_waiters, lkb_wait_reply) {
+ if (iter->lkb_flags & DLM_IFL_RESEND) {
+ hold_lkb(iter);
+ lkb = iter;
break;
}
}
mutex_unlock(&ls->ls_waiters_mutex);
- if (!found)
- lkb = NULL;
return lkb;
}
@@ -5916,37 +5914,36 @@ int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, void *name, unsigned int namelen,
unsigned long timeout_cs, uint32_t *lkid)
{
- struct dlm_lkb *lkb;
+ struct dlm_lkb *lkb = NULL, *iter;
struct dlm_user_args *ua;
int found_other_mode = 0;
- int found = 0;
int rv = 0;
mutex_lock(&ls->ls_orphans_mutex);
- list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) {
- if (lkb->lkb_resource->res_length != namelen)
+ list_for_each_entry(iter, &ls->ls_orphans, lkb_ownqueue) {
+ if (iter->lkb_resource->res_length != namelen)
continue;
- if (memcmp(lkb->lkb_resource->res_name, name, namelen))
+ if (memcmp(iter->lkb_resource->res_name, name, namelen))
continue;
- if (lkb->lkb_grmode != mode) {
+ if (iter->lkb_grmode != mode) {
found_other_mode = 1;
continue;
}
- found = 1;
- list_del_init(&lkb->lkb_ownqueue);
- lkb->lkb_flags &= ~DLM_IFL_ORPHAN;
- *lkid = lkb->lkb_id;
+ lkb = iter;
+ list_del_init(&iter->lkb_ownqueue);
+ iter->lkb_flags &= ~DLM_IFL_ORPHAN;
+ *lkid = iter->lkb_id;
break;
}
mutex_unlock(&ls->ls_orphans_mutex);
- if (!found && found_other_mode) {
+ if (!lkb && found_other_mode) {
rv = -EAGAIN;
goto out;
}
- if (!found) {
+ if (!lkb) {
rv = -ENOENT;
goto out;
}
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 43a96c330570..ea50f59610e5 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -115,7 +115,7 @@ static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
void dlm_timeout_warn(struct dlm_lkb *lkb)
{
- struct sk_buff *uninitialized_var(send_skb);
+ struct sk_buff *send_skb;
struct dlm_lock_data *data;
size_t size;
int rv;
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index e0c05e08d8bf..c6079f6c6a79 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -22,20 +22,20 @@ static struct list_head recv_list;
static wait_queue_head_t send_wq;
static wait_queue_head_t recv_wq;
-struct plock_op {
- struct list_head list;
- int done;
- struct dlm_plock_info info;
- int (*callback)(struct file_lock *fl, int result);
-};
-
-struct plock_xop {
- struct plock_op xop;
+struct plock_async_data {
void *fl;
void *file;
struct file_lock flc;
+ int (*callback)(struct file_lock *fl, int result);
};
+struct plock_op {
+ struct list_head list;
+ int done;
+ struct dlm_plock_info info;
+ /* if set indicates async handling */
+ struct plock_async_data *data;
+};
static inline void set_version(struct dlm_plock_info *info)
{
@@ -61,6 +61,12 @@ static int check_version(struct dlm_plock_info *info)
return 0;
}
+static void dlm_release_plock_op(struct plock_op *op)
+{
+ kfree(op->data);
+ kfree(op);
+}
+
static void send_op(struct plock_op *op)
{
set_version(&op->info);
@@ -77,8 +83,7 @@ static void send_op(struct plock_op *op)
abandoned waiter. So, we have to insert the unlock-close when the
lock call is interrupted. */
-static void do_unlock_close(struct dlm_ls *ls, u64 number,
- struct file *file, struct file_lock *fl)
+static void do_unlock_close(const struct dlm_plock_info *info)
{
struct plock_op *op;
@@ -87,15 +92,12 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
return;
op->info.optype = DLM_PLOCK_OP_UNLOCK;
- op->info.pid = fl->fl_pid;
- op->info.fsid = ls->ls_global_id;
- op->info.number = number;
+ op->info.pid = info->pid;
+ op->info.fsid = info->fsid;
+ op->info.number = info->number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
- if (fl->fl_lmops && fl->fl_lmops->lm_grant)
- op->info.owner = (__u64) fl->fl_pid;
- else
- op->info.owner = (__u64)(long) fl->fl_owner;
+ op->info.owner = info->owner;
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
@@ -104,22 +106,21 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
int cmd, struct file_lock *fl)
{
+ struct plock_async_data *op_data;
struct dlm_ls *ls;
struct plock_op *op;
- struct plock_xop *xop;
int rv;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
- xop = kzalloc(sizeof(*xop), GFP_NOFS);
- if (!xop) {
+ op = kzalloc(sizeof(*op), GFP_NOFS);
+ if (!op) {
rv = -ENOMEM;
goto out;
}
- op = &xop->xop;
op->info.optype = DLM_PLOCK_OP_LOCK;
op->info.pid = fl->fl_pid;
op->info.ex = (fl->fl_type == F_WRLCK);
@@ -128,35 +129,45 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
+ /* async handling */
if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
+ op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
+ if (!op_data) {
+ dlm_release_plock_op(op);
+ rv = -ENOMEM;
+ goto out;
+ }
+
/* fl_owner is lockd which doesn't distinguish
processes on the nfs client */
op->info.owner = (__u64) fl->fl_pid;
- op->callback = fl->fl_lmops->lm_grant;
- locks_init_lock(&xop->flc);
- locks_copy_lock(&xop->flc, fl);
- xop->fl = fl;
- xop->file = file;
+ op_data->callback = fl->fl_lmops->lm_grant;
+ locks_init_lock(&op_data->flc);
+ locks_copy_lock(&op_data->flc, fl);
+ op_data->fl = fl;
+ op_data->file = file;
+
+ op->data = op_data;
+
+ send_op(op);
+ rv = FILE_LOCK_DEFERRED;
+ goto out;
} else {
op->info.owner = (__u64)(long) fl->fl_owner;
}
send_op(op);
- if (!op->callback) {
- rv = wait_event_interruptible(recv_wq, (op->done != 0));
- if (rv == -ERESTARTSYS) {
- log_debug(ls, "dlm_posix_lock: wait killed %llx",
- (unsigned long long)number);
- spin_lock(&ops_lock);
- list_del(&op->list);
- spin_unlock(&ops_lock);
- kfree(xop);
- do_unlock_close(ls, number, file, fl);
- goto out;
- }
- } else {
- rv = FILE_LOCK_DEFERRED;
+ rv = wait_event_killable(recv_wq, (op->done != 0));
+ if (rv == -ERESTARTSYS) {
+ spin_lock(&ops_lock);
+ list_del(&op->list);
+ spin_unlock(&ops_lock);
+ log_debug(ls, "%s: wait interrupted %x %llx pid %d",
+ __func__, ls->ls_global_id,
+ (unsigned long long)number, op->info.pid);
+ dlm_release_plock_op(op);
+ do_unlock_close(&op->info);
goto out;
}
@@ -176,7 +187,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
(unsigned long long)number);
}
- kfree(xop);
+ dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
return rv;
@@ -186,11 +197,11 @@ EXPORT_SYMBOL_GPL(dlm_posix_lock);
/* Returns failure iff a successful lock operation should be canceled */
static int dlm_plock_callback(struct plock_op *op)
{
+ struct plock_async_data *op_data = op->data;
struct file *file;
struct file_lock *fl;
struct file_lock *flc;
int (*notify)(struct file_lock *fl, int result) = NULL;
- struct plock_xop *xop = (struct plock_xop *)op;
int rv = 0;
spin_lock(&ops_lock);
@@ -202,10 +213,10 @@ static int dlm_plock_callback(struct plock_op *op)
spin_unlock(&ops_lock);
/* check if the following 2 are still valid or make a copy */
- file = xop->file;
- flc = &xop->flc;
- fl = xop->fl;
- notify = op->callback;
+ file = op_data->file;
+ flc = &op_data->flc;
+ fl = op_data->fl;
+ notify = op_data->callback;
if (op->info.rv) {
notify(fl, op->info.rv);
@@ -236,7 +247,7 @@ static int dlm_plock_callback(struct plock_op *op)
}
out:
- kfree(xop);
+ dlm_release_plock_op(op);
return rv;
}
@@ -306,7 +317,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = 0;
out_free:
- kfree(op);
+ dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
fl->fl_flags = fl_flags;
@@ -366,13 +377,15 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
locks_init_lock(fl);
fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
fl->fl_flags = FL_POSIX;
- fl->fl_pid = -op->info.pid;
+ fl->fl_pid = op->info.pid;
+ if (op->info.nodeid != dlm_our_nodeid())
+ fl->fl_pid = -fl->fl_pid;
fl->fl_start = op->info.start;
fl->fl_end = op->info.end;
rv = 0;
}
- kfree(op);
+ dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
return rv;
@@ -395,7 +408,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
list_del(&op->list);
else
- list_move(&op->list, &recv_list);
+ list_move_tail(&op->list, &recv_list);
memcpy(&info, &op->info, sizeof(info));
}
spin_unlock(&ops_lock);
@@ -408,7 +421,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
(the process did not make an unlock call). */
if (op->info.flags & DLM_PLOCK_FL_CLOSE)
- kfree(op);
+ dlm_release_plock_op(op);
if (copy_to_user(u, &info, sizeof(info)))
return -EFAULT;
@@ -420,9 +433,9 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
loff_t *ppos)
{
+ struct plock_op *op = NULL, *iter;
struct dlm_plock_info info;
- struct plock_op *op;
- int found = 0, do_callback = 0;
+ int do_callback = 0;
if (count != sizeof(info))
return -EINVAL;
@@ -433,31 +446,63 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
if (check_version(&info))
return -EINVAL;
+ /*
+ * The results for waiting ops (SETLKW) can be returned in any
+ * order, so match all fields to find the op. The results for
+ * non-waiting ops are returned in the order that they were sent
+ * to userspace, so match the result with the first non-waiting op.
+ */
spin_lock(&ops_lock);
- list_for_each_entry(op, &recv_list, list) {
- if (op->info.fsid == info.fsid &&
- op->info.number == info.number &&
- op->info.owner == info.owner) {
- list_del_init(&op->list);
- memcpy(&op->info, &info, sizeof(info));
- if (op->callback)
- do_callback = 1;
- else
- op->done = 1;
- found = 1;
- break;
+ if (info.wait) {
+ list_for_each_entry(iter, &recv_list, list) {
+ if (iter->info.fsid == info.fsid &&
+ iter->info.number == info.number &&
+ iter->info.owner == info.owner &&
+ iter->info.pid == info.pid &&
+ iter->info.start == info.start &&
+ iter->info.end == info.end &&
+ iter->info.ex == info.ex &&
+ iter->info.wait) {
+ op = iter;
+ break;
+ }
}
+ } else {
+ list_for_each_entry(iter, &recv_list, list) {
+ if (!iter->info.wait &&
+ iter->info.fsid == info.fsid) {
+ op = iter;
+ break;
+ }
+ }
+ }
+
+ if (op) {
+ /* Sanity check that op and info match. */
+ if (info.wait)
+ WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
+ else
+ WARN_ON(op->info.number != info.number ||
+ op->info.owner != info.owner ||
+ op->info.optype != info.optype);
+
+ list_del_init(&op->list);
+ memcpy(&op->info, &info, sizeof(info));
+ if (op->data)
+ do_callback = 1;
+ else
+ op->done = 1;
}
spin_unlock(&ops_lock);
- if (found) {
+ if (op) {
if (do_callback)
dlm_plock_callback(op);
else
wake_up(&recv_wq);
} else
- log_print("dev_write no op %x %llx", info.fsid,
- (unsigned long long)info.number);
+ log_print("%s: no op %x %llx", __func__,
+ info.fsid, (unsigned long long)info.number);
return count;
}
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index ce2aa54ca2e2..98b710cc9cf3 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -734,10 +734,9 @@ void dlm_recovered_lock(struct dlm_rsb *r)
static void recover_lvb(struct dlm_rsb *r)
{
- struct dlm_lkb *lkb, *high_lkb = NULL;
+ struct dlm_lkb *big_lkb = NULL, *iter, *high_lkb = NULL;
uint32_t high_seq = 0;
int lock_lvb_exists = 0;
- int big_lock_exists = 0;
int lvblen = r->res_ls->ls_lvblen;
if (!rsb_flag(r, RSB_NEW_MASTER2) &&
@@ -753,37 +752,37 @@ static void recover_lvb(struct dlm_rsb *r)
/* we are the new master, so figure out if VALNOTVALID should
be set, and set the rsb lvb from the best lkb available. */
- list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
- if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+ list_for_each_entry(iter, &r->res_grantqueue, lkb_statequeue) {
+ if (!(iter->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
- if (lkb->lkb_grmode > DLM_LOCK_CR) {
- big_lock_exists = 1;
+ if (iter->lkb_grmode > DLM_LOCK_CR) {
+ big_lkb = iter;
goto setflag;
}
- if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
- high_lkb = lkb;
- high_seq = lkb->lkb_lvbseq;
+ if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) {
+ high_lkb = iter;
+ high_seq = iter->lkb_lvbseq;
}
}
- list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
- if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
+ list_for_each_entry(iter, &r->res_convertqueue, lkb_statequeue) {
+ if (!(iter->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
- if (lkb->lkb_grmode > DLM_LOCK_CR) {
- big_lock_exists = 1;
+ if (iter->lkb_grmode > DLM_LOCK_CR) {
+ big_lkb = iter;
goto setflag;
}
- if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
- high_lkb = lkb;
- high_seq = lkb->lkb_lvbseq;
+ if (((int)iter->lkb_lvbseq - (int)high_seq) >= 0) {
+ high_lkb = iter;
+ high_seq = iter->lkb_lvbseq;
}
}
@@ -792,7 +791,7 @@ static void recover_lvb(struct dlm_rsb *r)
goto out;
/* lvb is invalidated if only NL/CR locks remain */
- if (!big_lock_exists)
+ if (!big_lkb)
rsb_set_flag(r, RSB_VALNOTVALID);
if (!r->res_lvbptr) {
@@ -801,9 +800,9 @@ static void recover_lvb(struct dlm_rsb *r)
goto out;
}
- if (big_lock_exists) {
- r->res_lvbseq = lkb->lkb_lvbseq;
- memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
+ if (big_lkb) {
+ r->res_lvbseq = big_lkb->lkb_lvbseq;
+ memcpy(r->res_lvbptr, big_lkb->lkb_lvbptr, lvblen);
} else if (high_lkb) {
r->res_lvbseq = high_lkb->lkb_lvbseq;
memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index ce1d1711fbba..66864100b823 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -174,11 +174,14 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait)
return events;
}
-static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
{
- *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+ lockdep_assert_held(&ctx->wqh.lock);
+
+ *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
ctx->count -= *cnt;
}
+EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
/**
* eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 00e759f05161..598dc2874808 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -68,10 +68,7 @@ struct mb_cache;
* second extended-fs super-block data in memory
*/
struct ext2_sb_info {
- unsigned long s_frag_size; /* Size of a fragment in bytes */
- unsigned long s_frags_per_block;/* Number of fragments per block */
unsigned long s_inodes_per_block;/* Number of inodes per block */
- unsigned long s_frags_per_group;/* Number of fragments in a group */
unsigned long s_blocks_per_group;/* Number of blocks in a group */
unsigned long s_inodes_per_group;/* Number of inodes in a group */
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
@@ -177,6 +174,7 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
#define EXT2_MIN_BLOCK_SIZE 1024
#define EXT2_MAX_BLOCK_SIZE 4096
#define EXT2_MIN_BLOCK_LOG_SIZE 10
+#define EXT2_MAX_BLOCK_LOG_SIZE 16
#define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize)
#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32))
#define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
@@ -185,15 +183,6 @@ static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb)
#define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino)
/*
- * Macro-instructions used to manage fragments
- */
-#define EXT2_MIN_FRAG_SIZE 1024
-#define EXT2_MAX_FRAG_SIZE 4096
-#define EXT2_MIN_FRAG_LOG_SIZE 10
-#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size)
-#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block)
-
-/*
* Structure of a blocks group descriptor
*/
struct ext2_group_desc
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 44a1f356aca2..5c0af53f2e8f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -691,10 +691,9 @@ static int ext2_setup_super (struct super_block * sb,
es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
if (test_opt (sb, DEBUG))
- ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, "
+ ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, gc=%lu, "
"bpg=%lu, ipg=%lu, mo=%04lx]",
EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize,
- sbi->s_frag_size,
sbi->s_groups_count,
EXT2_BLOCKS_PER_GROUP(sb),
EXT2_INODES_PER_GROUP(sb),
@@ -978,6 +977,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
+ if (le32_to_cpu(es->s_log_block_size) >
+ (EXT2_MAX_BLOCK_LOG_SIZE - BLOCK_SIZE_BITS)) {
+ ext2_msg(sb, KERN_ERR,
+ "Invalid log block size: %u",
+ le32_to_cpu(es->s_log_block_size));
+ goto failed_mount;
+ }
blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
@@ -1033,14 +1039,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
}
}
- sbi->s_frag_size = EXT2_MIN_FRAG_SIZE <<
- le32_to_cpu(es->s_log_frag_size);
- if (sbi->s_frag_size == 0)
- goto cantfind_ext2;
- sbi->s_frags_per_block = sb->s_blocksize / sbi->s_frag_size;
-
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
- sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
if (EXT2_INODE_SIZE(sb) == 0)
@@ -1068,11 +1067,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- if (sb->s_blocksize != sbi->s_frag_size) {
+ if (es->s_log_frag_size != es->s_log_block_size) {
ext2_msg(sb, KERN_ERR,
- "error: fragsize %lu != blocksize %lu"
- "(not supported yet)",
- sbi->s_frag_size, sb->s_blocksize);
+ "error: fragsize log %u != blocksize log %u",
+ le32_to_cpu(es->s_log_frag_size), sb->s_blocksize_bits);
goto failed_mount;
}
@@ -1082,12 +1080,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group);
goto failed_mount;
}
- if (sbi->s_frags_per_group > sb->s_blocksize * 8) {
- ext2_msg(sb, KERN_ERR,
- "error: #fragments per group too big: %lu",
- sbi->s_frags_per_group);
- goto failed_mount;
- }
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > sb->s_blocksize * 8) {
ext2_msg(sb, KERN_ERR,
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index bd1d68ff3a9f..437175bce22e 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -664,10 +664,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
EXT2_I(inode)->i_block_group);
- int block = ext2_new_block(inode, goal, &error);
+ ext2_fsblk_t block = ext2_new_block(inode, goal, &error);
if (error)
goto cleanup;
- ea_idebug(inode, "creating block %d", block);
+ ea_idebug(inode, "creating block %lu", block);
new_bh = sb_getblk(sb, block);
if (unlikely(!new_bh)) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9b63f5416a2f..7f3b25b3fa6d 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -67,6 +67,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
+ /* usually, the umask is applied by posix_acl_create(), but if
+ ext4 ACL support is disabled at compile time, we need to do
+ it here, because posix_acl_create() will never be called */
+ inode->i_mode &= ~current_umask();
+
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index f9645de9d04c..9761aeb4b224 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -303,6 +303,22 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
return desc;
}
+static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
+ ext4_group_t block_group,
+ struct buffer_head *bh)
+{
+ ext4_grpblk_t next_zero_bit;
+ unsigned long bitmap_size = sb->s_blocksize * 8;
+ unsigned int offset = num_clusters_in_group(sb, block_group);
+
+ if (bitmap_size <= offset)
+ return 0;
+
+ next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
+
+ return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
+}
+
/*
* Return the block number which was discovered to be invalid, or 0 if
* the block bitmap is valid.
@@ -395,6 +411,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
return -EFSCORRUPTED;
}
+ blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
+ if (unlikely(blk != 0)) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
+ block_group, blk);
+ ext4_mark_group_bitmap_corrupted(sb, block_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ return -EFSCORRUPTED;
+ }
set_buffer_verified(bh);
verified:
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6938dff9f04b..fa2579abea7d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -503,7 +503,7 @@ enum {
*
* It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/
-#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
+#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG))
#define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
static inline void ext4_check_flag_values(void)
@@ -929,11 +929,13 @@ do { \
* where the second inode has larger inode number
* than the first
* I_DATA_SEM_QUOTA - Used for quota inodes only
+ * I_DATA_SEM_EA - Used for ea_inodes only
*/
enum {
I_DATA_SEM_NORMAL = 0,
I_DATA_SEM_OTHER,
I_DATA_SEM_QUOTA,
+ I_DATA_SEM_EA
};
@@ -1410,7 +1412,7 @@ struct ext4_sb_info {
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
- struct block_device *journal_bdev;
+ struct block_device *s_journal_bdev;
#ifdef CONFIG_QUOTA
/* Names of quota files with journalled quota */
char __rcu *s_qf_names[EXT4_MAXQUOTAS];
@@ -1498,7 +1500,7 @@ struct ext4_sb_info {
struct task_struct *s_mmp_tsk;
/* record the last minlen when FITRIM is called. */
- atomic_t s_last_trim_minblks;
+ unsigned long s_last_trim_minblks;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 6c492fca60c4..d931252b7d0d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -997,6 +997,11 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
ix = curp->p_idx;
}
+ if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
+ EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
+ return -EFSCORRUPTED;
+ }
+
len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
BUG_ON(len < 0);
if (len > 0) {
@@ -1006,11 +1011,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
}
- if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
- EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
- return -EFSCORRUPTED;
- }
-
ix->ei_block = cpu_to_le32(logical);
ext4_idx_store_pblock(ix, ptr);
le16_add_cpu(&curp->p_hdr->eh_entries, 1);
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 6f3f245f3a80..69c76327792e 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
+ if (keys[1].fmr_physical < keys[0].fmr_physical)
+ return 0;
start_fsb = keys[0].fmr_physical;
end_fsb = keys[1].fmr_physical;
@@ -574,8 +576,8 @@ static bool ext4_getfsmap_is_valid_device(struct super_block *sb,
if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev))
return true;
- if (EXT4_SB(sb)->journal_bdev &&
- fm->fmr_device == new_encode_dev(EXT4_SB(sb)->journal_bdev->bd_dev))
+ if (EXT4_SB(sb)->s_journal_bdev &&
+ fm->fmr_device == new_encode_dev(EXT4_SB(sb)->s_journal_bdev->bd_dev))
return true;
return false;
}
@@ -645,9 +647,9 @@ int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head,
memset(handlers, 0, sizeof(handlers));
handlers[0].gfd_dev = new_encode_dev(sb->s_bdev->bd_dev);
handlers[0].gfd_fn = ext4_getfsmap_datadev;
- if (EXT4_SB(sb)->journal_bdev) {
+ if (EXT4_SB(sb)->s_journal_bdev) {
handlers[1].gfd_dev = new_encode_dev(
- EXT4_SB(sb)->journal_bdev->bd_dev);
+ EXT4_SB(sb)->s_journal_bdev->bd_dev);
handlers[1].gfd_fn = ext4_getfsmap_logdev;
}
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index a5442528a60d..9bf711d63368 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
+ unsigned int key;
int ret = -EIO;
*err = 0;
@@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
if (!p->key)
goto no_block;
while (--depth) {
- bh = sb_getblk(sb, le32_to_cpu(p->key));
+ key = le32_to_cpu(p->key);
+ if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) {
+ /* the block was out of range */
+ ret = -EFSCORRUPTED;
+ goto failure;
+ }
+ bh = sb_getblk(sb, key);
if (unlikely(!bh)) {
ret = -ENOMEM;
goto failure;
@@ -635,6 +642,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_update_inode_fsync_trans(handle, inode, 1);
count = ar.len;
+
+ /*
+ * Update reserved blocks/metadata blocks after successful block
+ * allocation which had been deferred till now.
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, count, 1);
+
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index b1c6b9398eef..71bb3cfc5933 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -32,6 +32,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
struct ext4_inode *raw_inode;
+ void *end;
int free, min_offs;
if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
@@ -55,14 +56,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
+ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
/* Compute min_offs. */
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+ while (!IS_LAST_ENTRY(entry)) {
+ void *next = EXT4_XATTR_NEXT(entry);
+
+ if (next >= end) {
+ EXT4_ERROR_INODE(inode,
+ "corrupt xattr in inline inode");
+ return 0;
+ }
if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
+ entry = next;
}
free = min_offs -
((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
@@ -157,7 +167,6 @@ int ext4_find_inline_data_nolock(struct inode *inode)
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size);
- ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
}
out:
brelse(is.iloc.bh);
@@ -207,7 +216,7 @@ out:
/*
* write the buffer to the inline inode.
* If 'create' is set, we don't need to do the extra copy in the xattr
- * value since it is already handled by ext4_xattr_ibody_inline_set.
+ * value since it is already handled by ext4_xattr_ibody_set.
* That saves us one memcpy.
*/
static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
@@ -289,7 +298,7 @@ static int ext4_create_inline_data(handle_t *handle,
BUG_ON(!is.s.not_found);
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error) {
if (error == -ENOSPC)
ext4_clear_inode_state(inode,
@@ -349,7 +358,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
- if (error == -ENODATA)
+ if (error < 0)
goto out;
BUFFER_TRACE(is.iloc.bh, "get_write_access");
@@ -361,7 +370,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
i.value = value;
i.value_len = len;
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
@@ -434,7 +443,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
if (error)
goto out;
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
@@ -1978,8 +1987,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
i.value = value;
i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
- err = ext4_xattr_ibody_inline_set(handle, inode,
- &i, &is);
+ err = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (err)
goto out_error;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 95139c992a54..646285fbc9fc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -207,6 +207,8 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
+ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
+ ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
/*
* When journalling data dirty buffers are tracked only in the
@@ -666,16 +668,6 @@ found:
*/
ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
}
-
- /*
- * Update reserved blocks/metadata blocks after successful
- * block allocation which had been deferred till now. We don't
- * support fallocate for non extent files. So we can update
- * reserve space here.
- */
- if ((retval > 0) &&
- (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
- ext4_da_update_reserve_space(inode, retval, 1);
}
if (retval > 0) {
@@ -1426,7 +1418,8 @@ static int ext4_write_end(struct file *file,
int inline_data = ext4_has_inline_data(inode);
trace_ext4_write_end(inode, pos, len, copied);
- if (inline_data) {
+ if (inline_data &&
+ ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
@@ -4549,7 +4542,7 @@ int ext4_truncate(struct inode *inode)
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
- return 0;
+ goto out_trace;
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
@@ -4560,16 +4553,15 @@ int ext4_truncate(struct inode *inode)
int has_inline = 1;
err = ext4_inline_data_truncate(inode, &has_inline);
- if (err)
- return err;
- if (has_inline)
- return 0;
+ if (err || has_inline)
+ goto out_trace;
}
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
- if (ext4_inode_attach_jinode(inode) < 0)
- return 0;
+ err = ext4_inode_attach_jinode(inode);
+ if (err)
+ goto out_trace;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4578,8 +4570,10 @@ int ext4_truncate(struct inode *inode)
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_trace;
+ }
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4628,6 +4622,7 @@ out_stop:
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
+out_trace:
trace_ext4_truncate_exit(inode);
return err;
}
@@ -4663,9 +4658,17 @@ static int __ext4_get_inode_loc(struct inode *inode,
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
inode_offset = ((inode->i_ino - 1) %
EXT4_INODES_PER_GROUP(sb));
- block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+ block = ext4_inode_table(sb, gdp);
+ if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) ||
+ (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) {
+ ext4_error(sb, "Invalid inode table block %llu in "
+ "block_group %u", block, iloc->block_group);
+ return -EFSCORRUPTED;
+ }
+ block += (inode_offset / inodes_per_block);
+
bh = sb_getblk(sb, block);
if (unlikely(!bh))
return -ENOMEM;
@@ -4853,8 +4856,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+ int err;
+
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- return ext4_find_inline_data_nolock(inode);
+ err = ext4_find_inline_data_nolock(inode);
+ if (!err && ext4_has_inline_data(inode))
+ ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ return err;
} else
EXT4_I(inode)->i_inline_off = 0;
return 0;
@@ -4930,13 +4938,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
goto bad_inode;
raw_inode = ext4_raw_inode(&iloc);
- if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
- ext4_error_inode(inode, function, line, 0,
- "iget: root inode unallocated");
- ret = -EFSCORRUPTED;
- goto bad_inode;
- }
-
if ((flags & EXT4_IGET_HANDLE) &&
(raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
ret = -ESTALE;
@@ -5007,11 +5008,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* NeilBrown 1999oct15
*/
if (inode->i_nlink == 0) {
- if ((inode->i_mode == 0 ||
+ if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
ino != EXT4_BOOT_LOADER_INO) {
- /* this inode is deleted */
- ret = -ESTALE;
+ /* this inode is deleted or unallocated */
+ if (flags & EXT4_IGET_SPECIAL) {
+ ext4_error_inode(inode, function, line, 0,
+ "iget: special inode unallocated");
+ ret = -EFSCORRUPTED;
+ } else
+ ret = -ESTALE;
goto bad_inode;
}
/* The only unlinked inodes we let through here have
@@ -6033,6 +6039,14 @@ static int __ext4_expand_extra_isize(struct inode *inode,
return 0;
}
+ /*
+ * We may need to allocate external xattr block so we need quotas
+ * initialized. Here we can be called with various locks held so we
+ * cannot affort to initialize quotas ourselves. So just bail.
+ */
+ if (dquot_initialize_needed(inode))
+ return -EAGAIN;
+
/* try to expand with EAs present */
error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
raw_inode, handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 484cb68c34d9..43e036f0b661 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl);
- if (inode_bl->i_nlink == 0) {
+ if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) {
/* this inode has never been used as a BOOT_LOADER */
set_nlink(inode_bl, 1);
i_uid_write(inode_bl, 0);
@@ -178,6 +178,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ei_bl->i_flags = 0;
inode_set_iversion(inode_bl, 1);
i_size_write(inode_bl, 0);
+ EXT4_I(inode_bl)->i_disksize = inode_bl->i_size;
inode_bl->i_mode = S_IFREG;
if (ext4_has_feature_extents(sb)) {
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
@@ -449,6 +450,10 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
if (ext4_is_quota_file(inode))
return err;
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
err = ext4_get_inode_loc(inode, &iloc);
if (err)
return err;
@@ -464,10 +469,6 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
brelse(iloc.bh);
}
- err = dquot_initialize(inode);
- if (err)
- return err;
-
handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
EXT4_QUOTA_INIT_BLOCKS(sb) +
EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
@@ -560,6 +561,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
__u32 flags;
+ struct super_block *ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -578,7 +580,9 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
switch (flags) {
case EXT4_GOING_FLAGS_DEFAULT:
- freeze_bdev(sb->s_bdev);
+ ret = freeze_bdev(sb->s_bdev);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
thaw_bdev(sb->s_bdev, sb);
break;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4ea4fe92eb8c..8875fac9f958 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/nospec.h>
#include <linux/backing-dev.h>
+#include <linux/freezer.h>
#include <trace/events/ext4.h>
#ifdef CONFIG_EXT4_DEBUG
@@ -3089,6 +3090,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_allocation_request *ar)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ struct ext4_super_block *es = sbi->s_es;
int bsbits, max;
ext4_lblk_t end;
loff_t size, start_off;
@@ -3179,6 +3181,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
start = max(start, rounddown(ac->ac_o_ex.fe_logical,
(ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
+ /* avoid unnecessary preallocation that may trigger assertions */
+ if (start + size > EXT_MAX_BLOCKS)
+ size = EXT_MAX_BLOCKS - start;
+
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
size -= ar->lleft + 1 - start;
@@ -3269,18 +3275,21 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
/* define goal start in order to merge */
- if (ar->pright && (ar->lright == (start + size))) {
+ if (ar->pright && (ar->lright == (start + size)) &&
+ ar->pright >= size &&
+ ar->pright - size >= le32_to_cpu(es->s_first_data_block)) {
/* merge to the right */
ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
- &ac->ac_f_ex.fe_group,
- &ac->ac_f_ex.fe_start);
+ &ac->ac_g_ex.fe_group,
+ &ac->ac_g_ex.fe_start);
ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
}
- if (ar->pleft && (ar->lleft + 1 == start)) {
+ if (ar->pleft && (ar->lleft + 1 == start) &&
+ ar->pleft + 1 < ext4_blocks_count(es)) {
/* merge to the left */
ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
- &ac->ac_f_ex.fe_group,
- &ac->ac_f_ex.fe_start);
+ &ac->ac_g_ex.fe_group,
+ &ac->ac_g_ex.fe_start);
ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
}
@@ -3372,6 +3381,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
BUG_ON(start < pa->pa_pstart);
BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
BUG_ON(pa->pa_free < len);
+ BUG_ON(ac->ac_b_ex.fe_len <= 0);
pa->pa_free -= len;
mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
@@ -3676,10 +3686,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
return -ENOMEM;
if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
- int winl;
- int wins;
- int win;
- int offs;
+ int new_bex_start;
+ int new_bex_end;
/* we can't allocate as much as normalizer wants.
* so, found space must get proper lstart
@@ -3687,26 +3695,40 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
- /* we're limited by original request in that
- * logical block must be covered any way
- * winl is window we can move our chunk within */
- winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
+ /*
+ * Use the below logic for adjusting best extent as it keeps
+ * fragmentation in check while ensuring logical range of best
+ * extent doesn't overflow out of goal extent:
+ *
+ * 1. Check if best ex can be kept at end of goal and still
+ * cover original start
+ * 2. Else, check if best ex can be kept at start of goal and
+ * still cover original start
+ * 3. Else, keep the best ex at start of original request.
+ */
+ new_bex_end = ac->ac_g_ex.fe_logical +
+ EXT4_C2B(sbi, ac->ac_g_ex.fe_len);
+ new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+ if (ac->ac_o_ex.fe_logical >= new_bex_start)
+ goto adjust_bex;
- /* also, we should cover whole original request */
- wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
+ new_bex_start = ac->ac_g_ex.fe_logical;
+ new_bex_end =
+ new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
+ if (ac->ac_o_ex.fe_logical < new_bex_end)
+ goto adjust_bex;
- /* the smallest one defines real window */
- win = min(winl, wins);
+ new_bex_start = ac->ac_o_ex.fe_logical;
+ new_bex_end =
+ new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- offs = ac->ac_o_ex.fe_logical %
- EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (offs && offs < win)
- win = offs;
+adjust_bex:
+ ac->ac_b_ex.fe_logical = new_bex_start;
- ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
- EXT4_NUM_B2C(sbi, win);
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
+ BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical +
+ EXT4_C2B(sbi, ac->ac_g_ex.fe_len)));
}
/* preallocation can change ac_b_ex, thus we store actually
@@ -3893,7 +3915,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+ if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
+ ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
+ e4b->bd_group, group, pa->pa_pstart);
+ return 0;
+ }
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
@@ -4927,8 +4953,8 @@ do_more:
* them with group lock_held
*/
if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, block_group, bit, count,
- NULL);
+ err = ext4_issue_discard(sb, block_group, bit,
+ count_clusters, NULL);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%lu failed"
@@ -5128,19 +5154,19 @@ error_return:
* @sb: super block for the file system
* @start: starting block of the free extent in the alloc. group
* @count: number of blocks to TRIM
- * @group: alloc. group we are working with
* @e4b: ext4 buddy for the group
*
* Trim "count" blocks starting at "start" in the "group". To assure that no
* one will allocate those blocks, mark it as used in buddy bitmap. This must
* be called with under the group lock.
*/
-static int ext4_trim_extent(struct super_block *sb, int start, int count,
- ext4_group_t group, struct ext4_buddy *e4b)
+static int ext4_trim_extent(struct super_block *sb,
+ int start, int count, struct ext4_buddy *e4b)
__releases(bitlock)
__acquires(bitlock)
{
struct ext4_free_extent ex;
+ ext4_group_t group = e4b->bd_group;
int ret = 0;
trace_ext4_trim_extent(sb, group, start, count);
@@ -5163,6 +5189,71 @@ __acquires(bitlock)
return ret;
}
+static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
+ ext4_group_t grp)
+{
+ if (grp < ext4_get_groups_count(sb))
+ return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+ return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
+ ext4_group_first_block_no(sb, grp) - 1) >>
+ EXT4_CLUSTER_BITS(sb);
+}
+
+static bool ext4_trim_interrupted(void)
+{
+ return fatal_signal_pending(current) || freezing(current);
+}
+
+static int ext4_try_to_trim_range(struct super_block *sb,
+ struct ext4_buddy *e4b, ext4_grpblk_t start,
+ ext4_grpblk_t max, ext4_grpblk_t minblocks)
+{
+ ext4_grpblk_t next, count, free_count;
+ bool set_trimmed = false;
+ void *bitmap;
+
+ bitmap = e4b->bd_bitmap;
+ if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
+ set_trimmed = true;
+ start = max(e4b->bd_info->bb_first_free, start);
+ count = 0;
+ free_count = 0;
+
+ while (start <= max) {
+ start = mb_find_next_zero_bit(bitmap, max + 1, start);
+ if (start > max)
+ break;
+ next = mb_find_next_bit(bitmap, max + 1, start);
+
+ if ((next - start) >= minblocks) {
+ int ret = ext4_trim_extent(sb, start, next - start, e4b);
+
+ if (ret && ret != -EOPNOTSUPP)
+ return count;
+ count += next - start;
+ }
+ free_count += next - start;
+ start = next + 1;
+
+ if (ext4_trim_interrupted())
+ return count;
+
+ if (need_resched()) {
+ ext4_unlock_group(sb, e4b->bd_group);
+ cond_resched();
+ ext4_lock_group(sb, e4b->bd_group);
+ }
+
+ if ((e4b->bd_info->bb_free - free_count) < minblocks)
+ break;
+ }
+
+ if (set_trimmed)
+ EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info);
+
+ return count;
+}
+
/**
* ext4_trim_all_free -- function to trim all free space in alloc. group
* @sb: super block for file system
@@ -5186,10 +5277,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ext4_grpblk_t start, ext4_grpblk_t max,
ext4_grpblk_t minblocks)
{
- void *bitmap;
- ext4_grpblk_t next, count = 0, free_count = 0;
struct ext4_buddy e4b;
- int ret = 0;
+ int ret;
trace_ext4_trim_all_free(sb, group, start, max);
@@ -5199,58 +5288,20 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ret, group);
return ret;
}
- bitmap = e4b.bd_bitmap;
ext4_lock_group(sb, group);
- if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
- minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
- goto out;
- start = (e4b.bd_info->bb_first_free > start) ?
- e4b.bd_info->bb_first_free : start;
-
- while (start <= max) {
- start = mb_find_next_zero_bit(bitmap, max + 1, start);
- if (start > max)
- break;
- next = mb_find_next_bit(bitmap, max + 1, start);
-
- if ((next - start) >= minblocks) {
- ret = ext4_trim_extent(sb, start,
- next - start, group, &e4b);
- if (ret && ret != -EOPNOTSUPP)
- break;
- ret = 0;
- count += next - start;
- }
- free_count += next - start;
- start = next + 1;
-
- if (fatal_signal_pending(current)) {
- count = -ERESTARTSYS;
- break;
- }
-
- if (need_resched()) {
- ext4_unlock_group(sb, group);
- cond_resched();
- ext4_lock_group(sb, group);
- }
-
- if ((e4b.bd_info->bb_free - free_count) < minblocks)
- break;
- }
+ if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+ minblocks < EXT4_SB(sb)->s_last_trim_minblks)
+ ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+ else
+ ret = 0;
- if (!ret) {
- ret = count;
- EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
- }
-out:
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
ext4_debug("trimmed %d blocks in the group %d\n",
- count, group);
+ ret, group);
return ret;
}
@@ -5295,7 +5346,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
goto out;
}
- if (end >= max_blks)
+ if (end >= max_blks - 1)
end = max_blks - 1;
if (end <= first_data_blk)
goto out;
@@ -5312,6 +5363,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
for (group = first_group; group <= last_group; group++) {
+ if (ext4_trim_interrupted())
+ break;
grp = ext4_get_group_info(sb, group);
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
@@ -5328,10 +5381,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
*/
if (group == last_group)
end = last_cluster;
-
if (grp->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, group, first_cluster,
- end, minlen);
+ end, minlen);
if (cnt < 0) {
ret = cnt;
break;
@@ -5347,7 +5399,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
}
if (!ret)
- atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
+ EXT4_SB(sb)->s_last_trim_minblks = minlen;
out:
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
@@ -5376,8 +5428,7 @@ ext4_mballoc_query_range(
ext4_lock_group(sb, group);
- start = (e4b.bd_info->bb_first_free > start) ?
- e4b.bd_info->bb_first_free : start;
+ start = max(e4b.bd_info->bb_first_free, start);
if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a878b9a8d9ea..93d392576c12 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -322,17 +322,17 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
struct ext4_dir_entry *de)
{
struct ext4_dir_entry_tail *t;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
#ifdef PARANOID
struct ext4_dir_entry *d, *top;
d = de;
top = (struct ext4_dir_entry *)(((void *)de) +
- (EXT4_BLOCK_SIZE(inode->i_sb) -
- sizeof(struct ext4_dir_entry_tail)));
- while (d < top && d->rec_len)
+ (blocksize - sizeof(struct ext4_dir_entry_tail)));
+ while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize))
d = (struct ext4_dir_entry *)(((void *)d) +
- le16_to_cpu(d->rec_len));
+ ext4_rec_len_from_disk(d->rec_len, blocksize));
if (d != top)
return NULL;
@@ -343,7 +343,8 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
#endif
if (t->det_reserved_zero1 ||
- le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
+ (ext4_rec_len_from_disk(t->det_rec_len, blocksize) !=
+ sizeof(struct ext4_dir_entry_tail)) ||
t->det_reserved_zero2 ||
t->det_reserved_ft != EXT4_FT_DIR_CSUM)
return NULL;
@@ -425,13 +426,14 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
struct ext4_dir_entry *dp;
struct dx_root_info *root;
int count_offset;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
+ unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
- if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
+ if (rlen == blocksize)
count_offset = 8;
- else if (le16_to_cpu(dirent->rec_len) == 12) {
+ else if (rlen == 12) {
dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
- if (le16_to_cpu(dp->rec_len) !=
- EXT4_BLOCK_SIZE(inode->i_sb) - 12)
+ if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12)
return NULL;
root = (struct dx_root_info *)(((void *)dp + 12));
if (root->reserved_zero ||
@@ -1244,6 +1246,7 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
unsigned int buflen = bh->b_size;
char *base = bh->b_data;
struct dx_hash_info h = *hinfo;
+ int blocksize = EXT4_BLOCK_SIZE(dir->i_sb);
if (ext4_has_metadata_csum(dir->i_sb))
buflen -= sizeof(struct ext4_dir_entry_tail);
@@ -1257,11 +1260,12 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
- map_tail->size = le16_to_cpu(de->rec_len);
+ map_tail->size = ext4_rec_len_from_disk(de->rec_len,
+ blocksize);
count++;
cond_resched();
}
- de = ext4_next_entry(de, dir->i_sb->s_blocksize);
+ de = ext4_next_entry(de, blocksize);
}
return count;
}
@@ -1419,11 +1423,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
int has_inline_data = 1;
ret = ext4_find_inline_entry(dir, fname, res_dir,
&has_inline_data);
- if (has_inline_data) {
- if (inlined)
- *inlined = 1;
+ if (inlined)
+ *inlined = has_inline_data;
+ if (has_inline_data)
goto cleanup_and_exit;
- }
}
if ((namelen <= 2) && (name[0] == '.') &&
@@ -3515,7 +3518,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
* so the old->de may no longer valid and need to find it again
* before reset old inode info.
*/
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
if (IS_ERR(old.bh))
retval = PTR_ERR(old.bh);
if (!old.bh)
@@ -3677,7 +3681,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
return retval;
}
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
if (IS_ERR(old.bh))
return PTR_ERR(old.bh);
/*
@@ -3870,6 +3875,9 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = dquot_initialize(old.dir);
if (retval)
return retval;
+ retval = dquot_initialize(old.inode);
+ if (retval)
+ return retval;
retval = dquot_initialize(new.dir);
if (retval)
return retval;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 3de933354a08..bf910f266469 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -388,7 +388,8 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
static int io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
- struct page *page,
+ struct page *pagecache_page,
+ struct page *bounce_page,
struct buffer_head *bh)
{
int ret;
@@ -403,10 +404,11 @@ submit_and_retry:
return ret;
io->io_bio->bi_write_hint = inode->i_write_hint;
}
- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
+ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
+ bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
- wbc_account_io(io->io_wbc, page, bh->b_size);
+ wbc_account_io(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++;
return 0;
}
@@ -514,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode,
- data_page ? data_page : page, bh);
+ ret = io_submit_add_bh(io, inode, page, data_page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 8737d1bcdb6e..f4b3d450dead 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -572,13 +572,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
goto handle_itb;
- if (meta_bg == 1) {
- ext4_group_t first_group;
- first_group = ext4_meta_bg_first_group(sb, group);
- if (first_group != group + 1 &&
- first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
- goto handle_itb;
- }
+ if (meta_bg == 1)
+ goto handle_itb;
block = start + ext4_bg_has_super(sb, group);
/* Copy all of the GDT blocks into the backup in this group */
@@ -1565,10 +1560,12 @@ exit_journal:
int gdb_num_end = ((group + flex_gd->count - 1) /
EXT4_DESC_PER_BLOCK(sb));
int meta_bg = ext4_has_feature_meta_bg(sb);
+ sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr -
+ ext4_group_first_block_no(sb, 0);
sector_t old_gdb = 0;
- update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
- sizeof(struct ext4_super_block), 0);
+ update_backups(sb, ext4_group_first_block_no(sb, 0),
+ (char *)es, sizeof(struct ext4_super_block), 0);
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
@@ -1576,8 +1573,8 @@ exit_journal:
gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
- update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
- gdb_bh->b_size, meta_bg);
+ update_backups(sb, gdb_bh->b_blocknr - padding_blocks,
+ gdb_bh->b_data, gdb_bh->b_size, meta_bg);
old_gdb = gdb_bh->b_blocknr;
}
}
@@ -1775,7 +1772,7 @@ errout:
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
+ update_backups(sb, ext4_group_first_block_no(sb, 0),
(char *)es, sizeof(struct ext4_super_block), 0);
}
return err;
@@ -1940,9 +1937,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
errout:
ret = ext4_journal_stop(handle);
- if (!err)
- err = ret;
- return ret;
+ return err ? err : ret;
invalid_resize_inode:
ext4_error(sb, "corrupted/inconsistent resize inode");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f00cc301da36..926063a6d232 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -905,10 +905,16 @@ static void ext4_blkdev_put(struct block_device *bdev)
static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
{
struct block_device *bdev;
- bdev = sbi->journal_bdev;
+ bdev = sbi->s_journal_bdev;
if (bdev) {
+ /*
+ * Invalidate the journal device's buffers. We don't want them
+ * floating about in memory - the physical journal device may
+ * hotswapped, and it breaks the `ro-after' testing code.
+ */
+ invalidate_bdev(bdev);
ext4_blkdev_put(bdev);
- sbi->journal_bdev = NULL;
+ sbi->s_journal_bdev = NULL;
}
}
@@ -1032,14 +1038,8 @@ static void ext4_put_super(struct super_block *sb)
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
- if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
- /*
- * Invalidate the journal device's buffers. We don't want them
- * floating about in memory - the physical journal device may
- * hotswapped, and it breaks the `ro-after' testing code.
- */
- sync_blockdev(sbi->journal_bdev);
- invalidate_bdev(sbi->journal_bdev);
+ if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
+ sync_blockdev(sbi->s_journal_bdev);
ext4_blkdev_remove(sbi);
}
if (sbi->s_ea_inode_cache) {
@@ -1081,6 +1081,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
return NULL;
inode_set_iversion(&ei->vfs_inode, 1);
+ ei->i_flags = 0;
spin_lock_init(&ei->i_raw_lock);
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
@@ -2422,11 +2423,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
- if (ext4_has_feature_64bit(sb) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
+ sbi->s_desc_size - offset);
out:
return cpu_to_le16(crc);
@@ -3538,7 +3537,7 @@ int ext4_calculate_overhead(struct super_block *sb)
* Add the internal journal blocks whether the journal has been
* loaded or not
*/
- if (sbi->s_journal && !sbi->journal_bdev)
+ if (sbi->s_journal && !sbi->s_journal_bdev)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
/* j_inum for internal journal is non-zero */
@@ -4302,30 +4301,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only");
- goto failed_mount_wq;
+ goto failed_mount3a;
} else {
/* Nojournal mode, all journal mount options are illegal */
- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "journal_checksum, fs mounted w/o journal");
- goto failed_mount_wq;
- }
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_async_commit, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
+ }
+
+ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_checksum, fs mounted w/o journal");
+ goto failed_mount3a;
}
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"commit=%lu, fs mounted w/o journal",
sbi->s_commit_interval / HZ);
- goto failed_mount_wq;
+ goto failed_mount3a;
}
if (EXT4_MOUNT_DATA_FLAGS &
(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"data=, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
}
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
@@ -4673,6 +4673,7 @@ failed_mount:
ext4_blkdev_remove(sbi);
brelse(bh);
out_fail:
+ invalidate_bdev(sb->s_bdev);
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
out_free_base:
@@ -4731,7 +4732,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
journal_inode, journal_inode->i_size);
- if (!S_ISREG(journal_inode->i_mode)) {
+ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
return NULL;
@@ -4848,7 +4849,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
be32_to_cpu(journal->j_superblock->s_nr_users));
goto out_journal;
}
- EXT4_SB(sb)->journal_bdev = bdev;
+ EXT4_SB(sb)->s_journal_bdev = bdev;
ext4_init_journal_params(sb, journal);
return journal;
@@ -5501,9 +5502,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
@@ -5513,6 +5511,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
}
+ /* Release old quota file names */
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(old_opts.s_qf_names[i]);
#endif
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
ext4_release_system_zone(sb);
@@ -5529,6 +5530,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
return 0;
restore_opts:
+ /*
+ * If there was a failing r/w to ro transition, we may need to
+ * re-enable quota
+ */
+ if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -5839,6 +5847,20 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return err;
}
+static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
+{
+ switch (type) {
+ case USRQUOTA:
+ return qf_inum == EXT4_USR_QUOTA_INO;
+ case GRPQUOTA:
+ return qf_inum == EXT4_GRP_QUOTA_INO;
+ case PRJQUOTA:
+ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
+ default:
+ BUG();
+ }
+}
+
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags)
{
@@ -5855,9 +5877,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
+ if (!ext4_check_quota_inum(type, qf_inums[type])) {
+ ext4_error(sb, "Bad quota inum: %lu, type: %d",
+ qf_inums[type], type);
+ return -EUCLEAN;
+ }
+
qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
+ ext4_error(sb, "Bad quota inode: %lu, type: %d",
+ qf_inums[type], type);
return PTR_ERR(qf_inode);
}
@@ -5896,8 +5925,9 @@ static int ext4_enable_quotas(struct super_block *sb)
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "e2fsck to fix.", type, err);
+ "(type=%d, err=%d, ino=%lu). "
+ "Please run e2fsck to fix.", type,
+ err, qf_inums[type]);
for (type--; type >= 0; type--) {
struct inode *inode;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9212a026a1f1..74722ce7206c 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -349,6 +349,11 @@ static void ext4_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
+static void ext4_feat_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
static const struct sysfs_ops ext4_attr_ops = {
.show = ext4_attr_show,
.store = ext4_attr_store,
@@ -363,7 +368,7 @@ static struct kobj_type ext4_sb_ktype = {
static struct kobj_type ext4_feat_ktype = {
.default_attrs = ext4_feat_attrs,
.sysfs_ops = &ext4_attr_ops,
- .release = (void (*)(struct kobject *))kfree,
+ .release = ext4_feat_release,
};
static struct kobject *ext4_root;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 497649c69bba..88bdb2714e51 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -121,7 +121,11 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
#ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode)
{
+ struct ext4_inode_info *ei = EXT4_I(ea_inode);
+
lockdep_set_subclass(&ea_inode->i_rwsem, 1);
+ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
+ lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA);
}
#endif
@@ -384,6 +388,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
struct inode *inode;
int err;
+ /*
+ * We have to check for this corruption early as otherwise
+ * iget_locked() could wait indefinitely for the state of our
+ * parent inode.
+ */
+ if (parent->i_ino == ea_ino) {
+ ext4_error(parent->i_sb,
+ "Parent and EA inode have the same ino %lu", ea_ino);
+ return -EFSCORRUPTED;
+ }
+
inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
@@ -434,6 +449,21 @@ error:
return err;
}
+/* Remove entry from mbcache when EA inode is getting evicted */
+void ext4_evict_ea_inode(struct inode *inode)
+{
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
+}
+
static int
ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
struct ext4_xattr_entry *entry, void *buffer,
@@ -1019,10 +1049,8 @@ static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
struct ext4_iloc iloc;
s64 ref_count;
- u32 hash;
int ret;
inode_lock(ea_inode);
@@ -1047,14 +1075,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache,
- GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
- }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -1067,12 +1087,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
- }
}
}
@@ -1253,6 +1267,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1262,9 +1277,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1288,7 +1312,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ce = mb_cache_entry_get(ea_block_cache, hash,
bh->b_blocknr);
if (ce) {
- ce->e_reusable = 1;
+ set_bit(MBE_REUSABLE_B, &ce->e_flags);
mb_cache_entry_put(ea_block_cache, ce);
}
}
@@ -1427,6 +1451,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
int err;
+ if (inode->i_sb->s_root == NULL) {
+ ext4_warning(inode->i_sb,
+ "refuse to create EA inode when umounting");
+ WARN_ON(1);
+ return ERR_PTR(-EINVAL);
+ }
+
/*
* Let the next inode be the goal, so we try and allocate the EA inode
* in the same group, or nearby one.
@@ -1446,6 +1477,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
if (!err)
err = ext4_inode_attach_jinode(ea_inode);
if (err) {
+ if (ext4_xattr_inode_dec_ref(handle, ea_inode))
+ ext4_warning_inode(ea_inode,
+ "cleanup dec ref error %d", err);
iput(ea_inode);
return ERR_PTR(err);
}
@@ -1733,6 +1767,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
memmove(here, (void *)here + size,
(void *)last - (void *)here + sizeof(__u32));
memset(last, 0, size);
+
+ /*
+ * Update i_inline_off - moved ibody region might contain
+ * system.data attribute. Handling a failure here won't
+ * cause other complications for setting an xattr.
+ */
+ if (!is_block && ext4_has_inline_data(inode)) {
+ ret = ext4_find_inline_data_nolock(inode);
+ if (ret) {
+ ext4_warning_inode(inode,
+ "unable to update i_inline_off");
+ goto out;
+ }
+ }
} else if (s->not_found) {
/* Insert new name. */
size_t size = EXT4_XATTR_LEN(name_len);
@@ -1872,6 +1920,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
#define header(x) ((struct ext4_xattr_header *)(x))
if (s->base) {
+ int offset = (char *)s->here - bs->bh->b_data;
+
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -1886,9 +1936,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1903,50 +1964,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error)
goto cleanup;
goto inserted;
- } else {
- int offset = (char *)s->here - bs->bh->b_data;
+ }
+clone_block:
+ unlock_buffer(bs->bh);
+ ea_bdebug(bs->bh, "cloning");
+ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+ error = -ENOMEM;
+ if (s->base == NULL)
+ goto cleanup;
+ s->first = ENTRY(header(s->base)+1);
+ header(s->base)->h_refcount = cpu_to_le32(1);
+ s->here = ENTRY(s->base + offset);
+ s->end = s->base + bs->bh->b_size;
- unlock_buffer(bs->bh);
- ea_bdebug(bs->bh, "cloning");
- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
- error = -ENOMEM;
- if (s->base == NULL)
+ /*
+ * If existing entry points to an xattr inode, we need
+ * to prevent ext4_xattr_set_entry() from decrementing
+ * ref count on it because the reference belongs to the
+ * original block. In this case, make the entry look
+ * like it has an empty value.
+ */
+ if (!s->not_found && s->here->e_value_inum) {
+ ea_ino = le32_to_cpu(s->here->e_value_inum);
+ error = ext4_xattr_inode_iget(inode, ea_ino,
+ le32_to_cpu(s->here->e_hash),
+ &tmp_inode);
+ if (error)
goto cleanup;
- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
- s->first = ENTRY(header(s->base)+1);
- header(s->base)->h_refcount = cpu_to_le32(1);
- s->here = ENTRY(s->base + offset);
- s->end = s->base + bs->bh->b_size;
-
- /*
- * If existing entry points to an xattr inode, we need
- * to prevent ext4_xattr_set_entry() from decrementing
- * ref count on it because the reference belongs to the
- * original block. In this case, make the entry look
- * like it has an empty value.
- */
- if (!s->not_found && s->here->e_value_inum) {
- ea_ino = le32_to_cpu(s->here->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino,
- le32_to_cpu(s->here->e_hash),
- &tmp_inode);
- if (error)
- goto cleanup;
-
- if (!ext4_test_inode_state(tmp_inode,
- EXT4_STATE_LUSTRE_EA_INODE)) {
- /*
- * Defer quota free call for previous
- * inode until success is guaranteed.
- */
- old_ea_inode_quota = le32_to_cpu(
- s->here->e_value_size);
- }
- iput(tmp_inode);
- s->here->e_value_inum = 0;
- s->here->e_value_size = 0;
+ if (!ext4_test_inode_state(tmp_inode,
+ EXT4_STATE_LUSTRE_EA_INODE)) {
+ /*
+ * Defer quota free call for previous
+ * inode until success is guaranteed.
+ */
+ old_ea_inode_quota = le32_to_cpu(
+ s->here->e_value_size);
}
+ iput(tmp_inode);
+
+ s->here->e_value_inum = 0;
+ s->here->e_value_size = 0;
}
} else {
/* Allocate a buffer where we construct the new block. */
@@ -1997,8 +2055,9 @@ inserted:
else {
u32 ref;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -2013,18 +2072,13 @@ inserted:
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2039,10 +2093,9 @@ inserted:
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
- ce->e_reusable = 0;
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
+ clear_bit(MBE_REUSABLE_B, &ce->e_flags);
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
ext4_xattr_block_csum_set(inode, new_bh);
@@ -2066,23 +2119,16 @@ inserted:
/* We need to allocate a new block */
ext4_fsblk_t goal, block;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
-
- /* non-extent files can't have physical blocks past 2^32 */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
-
block = ext4_new_meta_blocks(handle, inode, goal, 0,
NULL, &error);
if (error)
goto cleanup;
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
-
ea_idebug(inode, "creating block %llu",
(unsigned long long)block);
@@ -2210,7 +2256,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
return 0;
}
-int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
+int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is)
{
@@ -2235,30 +2281,6 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
return 0;
}
-static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is)
-{
- struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_search *s = &is->s;
- int error;
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
- if (error)
- return error;
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
- if (!IS_LAST_ENTRY(s->first)) {
- header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
- ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- } else {
- header->h_magic = cpu_to_le32(0);
- ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
- }
- return 0;
-}
-
static int ext4_xattr_value_same(struct ext4_xattr_search *s,
struct ext4_xattr_info *i)
{
@@ -2571,13 +2593,13 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
.in_inode = !!entry->e_value_inum,
};
struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ int needs_kvfree = 0;
int error;
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
- buffer = kmalloc(value_size, GFP_NOFS);
b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
- if (!is || !bs || !buffer || !b_entry_name) {
+ if (!is || !bs || !b_entry_name) {
error = -ENOMEM;
goto out;
}
@@ -2589,12 +2611,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
/* Save the entry name and the entry value */
if (entry->e_value_inum) {
+ buffer = kvmalloc(value_size, GFP_NOFS);
+ if (!buffer) {
+ error = -ENOMEM;
+ goto out;
+ }
+ needs_kvfree = 1;
error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
if (error)
goto out;
} else {
size_t value_offs = le16_to_cpu(entry->e_value_offs);
- memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ buffer = (void *)IFIRST(header) + value_offs;
}
memcpy(b_entry_name, entry->e_name, entry->e_name_len);
@@ -2609,25 +2637,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
- /* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(handle, inode, &i, is);
- if (error)
- goto out;
-
i.value = buffer;
i.value_len = value_size;
error = ext4_xattr_block_find(inode, &i, bs);
if (error)
goto out;
- /* Add entry which was removed from the inode into the block */
+ /* Move ea entry from the inode into the block */
error = ext4_xattr_block_set(handle, inode, &i, bs);
if (error)
goto out;
- error = 0;
+
+ /* Remove the chosen entry from the inode */
+ i.value = NULL;
+ i.value_len = 0;
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
+
out:
kfree(b_entry_name);
- kfree(buffer);
+ if (needs_kvfree && buffer)
+ kvfree(buffer);
if (is)
brelse(is->iloc.bh);
if (bs)
@@ -2802,6 +2831,9 @@ shift:
(void *)header, total_ino);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
+ if (ext4_has_inline_data(inode))
+ error = ext4_find_inline_data_nolock(inode);
+
cleanup:
if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 990084e00374..66911f8a11f8 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -190,6 +190,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
+extern void ext4_evict_ea_inode(struct inode *inode);
extern const struct xattr_handler *ext4_xattr_handlers[];
@@ -198,9 +199,9 @@ extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
const char *name,
void *buffer, size_t buffer_size);
-extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is);
+extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_info *i,
+ struct ext4_xattr_ibody_find *is);
extern struct mb_cache *ext4_xattr_create_cache(void);
extern void ext4_xattr_destroy_cache(struct mb_cache *);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a563de5ccd21..621e0d4f1fbf 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -273,8 +273,15 @@ static int __f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) {
+ ClearPageUptodate(page);
+ dec_page_count(sbi, F2FS_DIRTY_META);
+ unlock_page(page);
+ return 0;
+ }
goto redirty_out;
+ }
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
@@ -1185,7 +1192,8 @@ void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi) &&
+ !is_sbi_flag_set(sbi, SBI_IS_CLOSE)))
break;
io_schedule_timeout(5*HZ);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c63f5e32630e..419586809cef 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -464,7 +464,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
}
if (fio->io_wbc && !is_read_io(fio->op))
- wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
+ wbc_account_io(fio->io_wbc, fio->page, PAGE_SIZE);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
@@ -533,7 +533,7 @@ alloc_new:
}
if (fio->io_wbc)
- wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
+ wbc_account_io(fio->io_wbc, fio->page, PAGE_SIZE);
io->last_block_in_bio = fio->new_blkaddr;
f2fs_trace_ios(fio, 0);
@@ -1885,7 +1885,8 @@ static int __write_data_page(struct page *page, bool *submitted,
* don't drop any dirty dentry pages for keeping lastest
* directory structure.
*/
- if (S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode) &&
+ !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
goto redirty_out;
goto out;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 36d6b561b524..e85ed4aa9d46 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -375,7 +375,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_node *en;
bool ret = false;
- f2fs_bug_on(sbi, !et);
+ if (!et)
+ return false;
trace_f2fs_lookup_extent_tree_start(inode, pgofs);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 2a7249496c57..043ce96ac127 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2892,6 +2892,7 @@ int f2fs_precache_extents(struct inode *inode)
return -EOPNOTSUPP;
map.m_lblk = 0;
+ map.m_pblk = 0;
map.m_next_pgofs = NULL;
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index ff447bbb5248..fb4494c54484 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -958,7 +958,8 @@ next_step:
if (phase == 3) {
inode = f2fs_iget(sb, dni.ino);
- if (IS_ERR(inode) || is_bad_inode(inode))
+ if (IS_ERR(inode) || is_bad_inode(inode) ||
+ special_file(inode->i_mode))
continue;
if (!down_write_trylock(
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 6bf78cf63ea2..7ad78aa9c7b8 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -408,18 +408,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
dentry_blk = page_address(page);
+ /*
+ * Start by zeroing the full block, to ensure that all unused space is
+ * zeroed and no uninitialized memory is leaked to disk.
+ */
+ memset(dentry_blk, 0, F2FS_BLKSIZE);
+
make_dentry_ptr_inline(dir, &src, inline_dentry);
make_dentry_ptr_block(dir, &dst, dentry_blk);
/* copy data from inline dentry block to new dentry block */
memcpy(dst.bitmap, src.bitmap, src.nr_bitmap);
- memset(dst.bitmap + src.nr_bitmap, 0, dst.nr_bitmap - src.nr_bitmap);
- /*
- * we do not need to zero out remainder part of dentry and filename
- * field, since we have used bitmap for marking the usage status of
- * them, besides, we can also ignore copying/zeroing reserved space
- * of dentry block, because them haven't been used so far.
- */
memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max);
memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 9e4c38481830..2eb7b0e2b34a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -979,7 +979,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
if (old_dir_entry) {
- if (old_dir != new_dir && !whiteout)
+ if (old_dir != new_dir)
f2fs_set_link(old_inode, old_dir_entry,
old_dir_page, new_dir);
else
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 2c28f488ac2f..9911f780e013 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -879,8 +879,10 @@ static int truncate_dnode(struct dnode_of_data *dn)
dn->ofs_in_node = 0;
f2fs_truncate_data_blocks(dn);
err = truncate_node(dn);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
return err;
+ }
return 1;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 6fbf0471323e..7596fce92bef 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1382,7 +1382,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
if (i + 1 < dpolicy->granularity)
break;
- if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+ if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
return __issue_discard_cmd_orderly(sbi, dpolicy);
pend_list = &dcc->pend_list[i];
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 64352d2833e2..db3e76b35607 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -717,6 +717,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
memcpy(pval, value, size);
last->e_value_size = cpu_to_le16(size);
new_hsize += newsize;
+ /*
+ * Explicitly add the null terminator. The unused xattr space
+ * is supposed to always be zeroed, which would make this
+ * unnecessary, but don't depend on that.
+ */
+ *(u32 *)((u8 *)last + newsize) = 0;
}
error = write_all_xattrs(inode, new_hsize, base_addr, ipage);
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 397f95b65881..947fbeb59db1 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1302,7 +1302,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
struct super_block *sb = dir->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */
- struct msdos_dir_entry *uninitialized_var(de);
+ struct msdos_dir_entry *de;
int err, free_slots, i, nr_bhs;
loff_t pos, i_pos;
diff --git a/fs/file.c b/fs/file.c
index d6ca500a1053..928ba7b8df1e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -627,6 +627,7 @@ int __close_fd(struct files_struct *files, unsigned fd)
fdt = files_fdtable(files);
if (fd >= fdt->max_fds)
goto out_unlock;
+ fd = array_index_nospec(fd, fdt->max_fds);
file = fdt->fd[fd];
if (!file)
goto out_unlock;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4d687e2e2373..61dc0dc139f8 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -702,7 +702,7 @@ void wbc_detach_inode(struct writeback_control *wbc)
* is okay. The main goal is avoiding keeping an inode on
* the wrong wb for an extended period of time.
*/
- if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
+ if (hweight16(history) > WB_FRN_HIST_THR_SLOTS)
inode_switch_wbs(inode, max_id);
}
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index acc35819aae6..af253127b309 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
- unsigned uninitialized_var(val);
+ unsigned val;
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index e10e2b62ccf4..acd985aa2cba 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -269,7 +269,7 @@ static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
{
char *end = p + len;
- char *uninitialized_var(key), *uninitialized_var(val);
+ char *key, *val;
int rc;
while (true) {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 6098e0c7f87b..16252727ec2e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -232,7 +232,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
spin_unlock(&fc->lock);
}
kfree(forget);
- if (ret == -ENOMEM)
+ if (ret == -ENOMEM || ret == -EINTR)
goto out;
if (ret || fuse_invalid_attr(&outarg.attr) ||
(outarg.attr.mode ^ inode->i_mode) & S_IFMT)
@@ -1314,8 +1314,16 @@ retry:
dput(dentry);
dentry = alias;
}
- if (IS_ERR(dentry))
+ if (IS_ERR(dentry)) {
+ if (!IS_ERR(inode)) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->nlookup--;
+ spin_unlock(&fc->lock);
+ }
return PTR_ERR(dentry);
+ }
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 599a6eeed02e..c629ccafb2b0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2774,7 +2774,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
{
spin_lock(&fc->lock);
if (RB_EMPTY_NODE(&ff->polled_node)) {
- struct rb_node **link, *uninitialized_var(parent);
+ struct rb_node **link, *parent;
link = fuse_find_polled_node(fc, ff->kh, &parent);
BUG_ON(*link);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 31e8270d0b26..910bfc39dc4b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -179,7 +179,6 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
{
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
if (PageChecked(page)) {
ClearPageChecked(page);
@@ -187,7 +186,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
create_empty_buffers(page, inode->i_sb->s_blocksize,
BIT(BH_Dirty)|BIT(BH_Uptodate));
}
- gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize);
+ gfs2_page_add_databufs(ip, page, 0, PAGE_SIZE);
}
return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc);
}
@@ -360,7 +359,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
int done = 0;
struct pagevec pvec;
int nr_pages;
- pgoff_t uninitialized_var(writeback_index);
+ pgoff_t writeback_index;
pgoff_t index;
pgoff_t end;
pgoff_t done_index;
@@ -481,8 +480,6 @@ int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
return error;
kaddr = kmap_atomic(page);
- if (dsize > gfs2_max_stuffed_size(ip))
- dsize = gfs2_max_stuffed_size(ip);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
kunmap_atomic(kaddr);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 150cec85c416..729f36fdced1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -72,9 +72,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
void *kaddr = kmap(page);
u64 dsize = i_size_read(inode);
- if (dsize > gfs2_max_stuffed_size(ip))
- dsize = gfs2_max_stuffed_size(ip);
-
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
kunmap(page);
@@ -1757,7 +1754,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift;
__u16 start_list[GFS2_MAX_META_HEIGHT];
__u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL;
- unsigned int start_aligned, uninitialized_var(end_aligned);
+ unsigned int start_aligned, end_aligned;
unsigned int strip_h = ip->i_height - 1;
u32 btotal = 0;
int ret, state;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 20f08f4391c9..ff35cc365930 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -339,6 +339,7 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
const struct gfs2_dinode *str = buf;
struct timespec64 atime;
u16 height, depth;
@@ -378,7 +379,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
gfs2_set_inode_flags(&ip->i_inode);
height = be16_to_cpu(str->di_height);
- if (unlikely(height > GFS2_MAX_META_HEIGHT))
+ if (unlikely(height > sdp->sd_max_height))
goto corrupt;
ip->i_height = (u8)height;
@@ -388,6 +389,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
ip->i_depth = (u8)depth;
ip->i_entries = be32_to_cpu(str->di_entries);
+ if (gfs2_is_stuffed(ip) && ip->i_inode.i_size > gfs2_max_stuffed_size(ip))
+ goto corrupt;
+
if (S_ISREG(ip->i_inode.i_mode))
gfs2_set_aops(&ip->i_inode);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index dd0f9bc13164..9f753595d90e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -434,6 +434,17 @@ static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd,
(sync_gen && (qd->qd_sync_gen >= *sync_gen)))
return 0;
+ /*
+ * If qd_change is 0 it means a pending quota change was negated.
+ * We should not sync it, but we still have a qd reference and slot
+ * reference taken by gfs2_quota_change -> do_qc that need to be put.
+ */
+ if (!qd->qd_change && test_and_clear_bit(QDF_CHANGE, &qd->qd_flags)) {
+ slot_put(qd);
+ qd_put(qd);
+ return 0;
+ }
+
if (!lockref_get_not_dead(&qd->qd_lockref))
return 0;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3cc2237e5896..29157f7d9663 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1374,7 +1374,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
{
struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
struct gfs2_args *args = &sdp->sd_args;
- int val;
+ unsigned int logd_secs, statfs_slow, statfs_quantum, quota_quantum;
+
+ spin_lock(&sdp->sd_tune.gt_spin);
+ logd_secs = sdp->sd_tune.gt_logd_secs;
+ quota_quantum = sdp->sd_tune.gt_quota_quantum;
+ statfs_quantum = sdp->sd_tune.gt_statfs_quantum;
+ statfs_slow = sdp->sd_tune.gt_statfs_slow;
+ spin_unlock(&sdp->sd_tune.gt_spin);
if (is_ancestor(root, sdp->sd_master_dir))
seq_puts(s, ",meta");
@@ -1429,17 +1436,14 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
}
if (args->ar_discard)
seq_puts(s, ",discard");
- val = sdp->sd_tune.gt_logd_secs;
- if (val != 30)
- seq_printf(s, ",commit=%d", val);
- val = sdp->sd_tune.gt_statfs_quantum;
- if (val != 30)
- seq_printf(s, ",statfs_quantum=%d", val);
- else if (sdp->sd_tune.gt_statfs_slow)
+ if (logd_secs != 30)
+ seq_printf(s, ",commit=%d", logd_secs);
+ if (statfs_quantum != 30)
+ seq_printf(s, ",statfs_quantum=%d", statfs_quantum);
+ else if (statfs_slow)
seq_puts(s, ",statfs_quantum=0");
- val = sdp->sd_tune.gt_quota_quantum;
- if (val != 60)
- seq_printf(s, ",quota_quantum=%d", val);
+ if (quota_quantum != 60)
+ seq_printf(s, ",quota_quantum=%d", quota_quantum);
if (args->ar_statfs_percent)
seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent);
if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
@@ -1586,6 +1590,14 @@ static void gfs2_evict_inode(struct inode *inode)
if (inode->i_nlink || sb_rdonly(sb))
goto out;
+ /*
+ * In case of an incomplete mount, gfs2_evict_inode() may be called for
+ * system files without having an active journal to write to. In that
+ * case, skip the filesystem evict.
+ */
+ if (!sdp->sd_jdesc)
+ goto out;
+
if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
gfs2_holder_mark_uninitialized(&gh);
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index c0a73a6ffb28..397e02a56697 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -281,6 +281,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
tree->node_hash[hash] = node;
tree->node_hash_cnt++;
} else {
+ hfs_bnode_get(node2);
spin_unlock(&tree->hash_lock);
kfree(node);
wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags));
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index da243c84e93b..ee2ea5532e69 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -453,14 +453,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
/* panic? */
return -EIO;
+ res = -EIO;
+ if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN)
+ goto out;
fd.search_key->cat = HFS_I(main_inode)->cat_key;
if (hfs_brec_find(&fd))
- /* panic? */
goto out;
if (S_ISDIR(main_inode->i_mode)) {
if (fd.entrylength < sizeof(struct hfs_cat_dir))
- /* panic? */;
+ goto out;
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_dir));
if (rec.type != HFS_CDR_DIR ||
@@ -473,6 +475,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_dir));
} else if (HFS_IS_RSRC(inode)) {
+ if (fd.entrylength < sizeof(struct hfs_cat_file))
+ goto out;
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_file));
hfs_inode_write_fork(inode, rec.file.RExtRec,
@@ -481,7 +485,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
sizeof(struct hfs_cat_file));
} else {
if (fd.entrylength < sizeof(struct hfs_cat_file))
- /* panic? */;
+ goto out;
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_file));
if (rec.type != HFS_CDR_FIL ||
@@ -498,9 +502,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc)
hfs_bnode_write(fd.bnode, &rec, fd.entryoffset,
sizeof(struct hfs_cat_file));
}
+ res = 0;
out:
hfs_find_exit(&fd);
- return 0;
+ return res;
}
static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c
index 39f5e343bf4d..fdb0edb8a607 100644
--- a/fs/hfs/trans.c
+++ b/fs/hfs/trans.c
@@ -109,7 +109,7 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr
if (nls_io) {
wchar_t ch;
- while (srclen > 0) {
+ while (srclen > 0 && dstlen > 0) {
size = nls_io->char2uni(src, srclen, &ch);
if (size < 0) {
ch = '?';
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index dd7ad9f13e3a..db2e1c750199 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -198,6 +198,8 @@ struct hfsplus_sb_info {
#define HFSPLUS_SB_HFSX 3
#define HFSPLUS_SB_CASEFOLD 4
#define HFSPLUS_SB_NOBARRIER 5
+#define HFSPLUS_SB_UID 6
+#define HFSPLUS_SB_GID 7
static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb)
{
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d7ab9d8c4b67..a1d4ad584b10 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -187,11 +187,11 @@ static void hfsplus_get_perms(struct inode *inode,
mode = be16_to_cpu(perms->mode);
i_uid_write(inode, be32_to_cpu(perms->owner));
- if (!i_uid_read(inode) && !mode)
+ if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode))
inode->i_uid = sbi->uid;
i_gid_write(inode, be32_to_cpu(perms->group));
- if (!i_gid_read(inode) && !mode)
+ if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode))
inode->i_gid = sbi->gid;
if (dir) {
@@ -476,8 +476,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
if (type == HFSPLUS_FOLDER) {
struct hfsplus_cat_folder *folder = &entry.folder;
- if (fd->entrylength < sizeof(struct hfsplus_cat_folder))
- /* panic? */;
+ if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) {
+ pr_err("bad catalog folder entry\n");
+ res = -EIO;
+ goto out;
+ }
hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
sizeof(struct hfsplus_cat_folder));
hfsplus_get_perms(inode, &folder->permissions, 1);
@@ -497,8 +500,11 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
} else if (type == HFSPLUS_FILE) {
struct hfsplus_cat_file *file = &entry.file;
- if (fd->entrylength < sizeof(struct hfsplus_cat_file))
- /* panic? */;
+ if (fd->entrylength < sizeof(struct hfsplus_cat_file)) {
+ pr_err("bad catalog file entry\n");
+ res = -EIO;
+ goto out;
+ }
hfs_bnode_read(fd->bnode, &entry, fd->entryoffset,
sizeof(struct hfsplus_cat_file));
@@ -529,6 +535,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
pr_err("bad catalog entry used to create inode\n");
res = -EIO;
}
+out:
return res;
}
@@ -537,6 +544,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
struct inode *main_inode = inode;
struct hfs_find_data fd;
hfsplus_cat_entry entry;
+ int res = 0;
if (HFSPLUS_IS_RSRC(inode))
main_inode = HFSPLUS_I(inode)->rsrc_inode;
@@ -555,8 +563,11 @@ int hfsplus_cat_write_inode(struct inode *inode)
if (S_ISDIR(main_inode->i_mode)) {
struct hfsplus_cat_folder *folder = &entry.folder;
- if (fd.entrylength < sizeof(struct hfsplus_cat_folder))
- /* panic? */;
+ if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) {
+ pr_err("bad catalog folder entry\n");
+ res = -EIO;
+ goto out;
+ }
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
sizeof(struct hfsplus_cat_folder));
/* simple node checks? */
@@ -581,8 +592,11 @@ int hfsplus_cat_write_inode(struct inode *inode)
} else {
struct hfsplus_cat_file *file = &entry.file;
- if (fd.entrylength < sizeof(struct hfsplus_cat_file))
- /* panic? */;
+ if (fd.entrylength < sizeof(struct hfsplus_cat_file)) {
+ pr_err("bad catalog file entry\n");
+ res = -EIO;
+ goto out;
+ }
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
sizeof(struct hfsplus_cat_file));
hfsplus_inode_write_fork(inode, &file->data_fork);
@@ -603,5 +617,5 @@ int hfsplus_cat_write_inode(struct inode *inode)
set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags);
out:
hfs_find_exit(&fd);
- return 0;
+ return res;
}
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index 047e05c57560..c94a58762ad6 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
if (!uid_valid(sbi->uid)) {
pr_err("invalid uid specified\n");
return 0;
+ } else {
+ set_bit(HFSPLUS_SB_UID, &sbi->flags);
}
break;
case opt_gid:
@@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
if (!gid_valid(sbi->gid)) {
pr_err("invalid gid specified\n");
return 0;
+ } else {
+ set_bit(HFSPLUS_SB_GID, &sbi->flags);
}
break;
case opt_part:
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index eb4535eba95d..3b1356b10a47 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -294,11 +294,11 @@ static void hfsplus_put_super(struct super_block *sb)
hfsplus_sync_fs(sb, 1);
}
+ iput(sbi->alloc_file);
+ iput(sbi->hidden_dir);
hfs_btree_close(sbi->attr_tree);
hfs_btree_close(sbi->cat_tree);
hfs_btree_close(sbi->ext_tree);
- iput(sbi->alloc_file);
- iput(sbi->hidden_dir);
kfree(sbi->s_vhdr_buf);
kfree(sbi->s_backup_vhdr_buf);
unload_nls(sbi->nls);
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index c8d1b2be7854..73342c925a4b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -398,7 +398,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
astr = str->name;
len = str->len;
while (len > 0) {
- int uninitialized_var(dsize);
+ int dsize;
size = asc2unichar(sb, astr, len, &c);
astr += size;
len -= size;
diff --git a/fs/iomap.c b/fs/iomap.c
index ac7b2152c3ad..04e82b6bd9bf 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -109,6 +109,7 @@ static struct iomap_page *
iomap_page_create(struct inode *inode, struct page *page)
{
struct iomap_page *iop = to_iomap_page(page);
+ unsigned int nr_blocks = PAGE_SIZE / i_blocksize(inode);
if (iop || i_blocksize(inode) == PAGE_SIZE)
return iop;
@@ -118,6 +119,8 @@ iomap_page_create(struct inode *inode, struct page *page)
atomic_set(&iop->write_count, 0);
spin_lock_init(&iop->uptodate_lock);
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+ if (PageUptodate(page))
+ bitmap_fill(iop->uptodate, nr_blocks);
/*
* migrate_page_move_mapping() assumes that pages with private data have
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 558e7c51ce0d..58f80e1b3ac0 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -153,8 +153,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
int found;
- unsigned long uninitialized_var(block);
- unsigned long uninitialized_var(offset);
+ unsigned long block;
+ unsigned long offset;
struct inode *inode;
struct page *page;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8a50722bca29..629928b19e48 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1375,9 +1375,11 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
return -EIO;
}
- trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
+
+ trace_jbd2_write_superblock(journal, write_flags);
+
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index a4967b27ffb6..ed923a9765c2 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -247,6 +247,8 @@ int jbd2_journal_recover(journal_t *journal)
journal_superblock_t * sb;
struct recovery_info info;
+ errseq_t wb_err;
+ struct address_space *mapping;
memset(&info, 0, sizeof(info));
sb = journal->j_superblock;
@@ -264,6 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
return 0;
}
+ wb_err = 0;
+ mapping = journal->j_fs_dev->bd_inode->i_mapping;
+ errseq_check_and_advance(&mapping->wb_err, &wb_err);
err = do_one_pass(journal, &info, PASS_SCAN);
if (!err)
err = do_one_pass(journal, &info, PASS_REVOKE);
@@ -284,6 +289,9 @@ int jbd2_journal_recover(journal_t *journal)
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
+ err2 = errseq_check_and_advance(&mapping->wb_err, &wb_err);
+ if (!err)
+ err = err2;
/* Make sure all replayed data is on permanent storage */
if (journal->j_flags & JBD2_BARRIER) {
err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 837cd55fd4c5..6ae9d6fefb86 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -211,7 +211,10 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
ic->scan_dents = NULL;
cond_resched();
}
- jffs2_build_xattr_subsystem(c);
+ ret = jffs2_build_xattr_subsystem(c);
+ if (ret)
+ goto exit;
+
c->flags &= ~JFFS2_SB_FLAG_BUILDING;
dbg_fsbuild("FS build complete\n");
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 83b8f06b4a64..7e9abdb89712 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -401,7 +401,7 @@ static void jffs2_mark_erased_block(struct jffs2_sb_info *c, struct jffs2_eraseb
{
size_t retlen;
int ret;
- uint32_t uninitialized_var(bad_offset);
+ uint32_t bad_offset;
switch (jffs2_block_check_erase(c, jeb, &bad_offset)) {
case -EAGAIN: goto refile;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 3047872fdac9..bf3d8a4516a5 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -137,19 +137,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
pgoff_t index = pos >> PAGE_SHIFT;
- uint32_t pageofs = index << PAGE_SHIFT;
int ret = 0;
jffs2_dbg(1, "%s()\n", __func__);
- if (pageofs > inode->i_size) {
- /* Make new hole frag from old EOF to new page */
+ if (pos > inode->i_size) {
+ /* Make new hole frag from old EOF to new position */
struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
uint32_t alloc_len;
- jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
- (unsigned int)inode->i_size, pageofs);
+ jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new position\n",
+ (unsigned int)inode->i_size, (uint32_t)pos);
ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
@@ -169,10 +168,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
ri.mode = cpu_to_jemode(inode->i_mode);
ri.uid = cpu_to_je16(i_uid_read(inode));
ri.gid = cpu_to_je16(i_gid_read(inode));
- ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs));
+ ri.isize = cpu_to_je32((uint32_t)pos);
ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW());
ri.offset = cpu_to_je32(inode->i_size);
- ri.dsize = cpu_to_je32(pageofs - inode->i_size);
+ ri.dsize = cpu_to_je32((uint32_t)pos - inode->i_size);
ri.csize = cpu_to_je32(0);
ri.compr = JFFS2_COMPR_ZERO;
ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
@@ -202,7 +201,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
goto out_err;
}
jffs2_complete_reservation(c);
- inode->i_size = pageofs;
+ inode->i_size = pos;
mutex_unlock(&f->sem);
}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index da3e18503c65..acb4492f5970 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -772,10 +772,10 @@ void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c)
}
#define XREF_TMPHASH_SIZE (128)
-void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
+int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
{
struct jffs2_xattr_ref *ref, *_ref;
- struct jffs2_xattr_ref *xref_tmphash[XREF_TMPHASH_SIZE];
+ struct jffs2_xattr_ref **xref_tmphash;
struct jffs2_xattr_datum *xd, *_xd;
struct jffs2_inode_cache *ic;
struct jffs2_raw_node_ref *raw;
@@ -784,9 +784,12 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
BUG_ON(!(c->flags & JFFS2_SB_FLAG_BUILDING));
+ xref_tmphash = kcalloc(XREF_TMPHASH_SIZE,
+ sizeof(struct jffs2_xattr_ref *), GFP_KERNEL);
+ if (!xref_tmphash)
+ return -ENOMEM;
+
/* Phase.1 : Merge same xref */
- for (i=0; i < XREF_TMPHASH_SIZE; i++)
- xref_tmphash[i] = NULL;
for (ref=c->xref_temp; ref; ref=_ref) {
struct jffs2_xattr_ref *tmp;
@@ -884,6 +887,8 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
"%u of xref (%u dead, %u orphan) found.\n",
xdatum_count, xdatum_unchecked_count, xdatum_orphan_count,
xref_count, xref_dead_count, xref_orphan_count);
+ kfree(xref_tmphash);
+ return 0;
}
struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index 720007b2fd65..1b5030a3349d 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -71,7 +71,7 @@ static inline int is_xattr_ref_dead(struct jffs2_xattr_ref *ref)
#ifdef CONFIG_JFFS2_FS_XATTR
extern void jffs2_init_xattr_subsystem(struct jffs2_sb_info *c);
-extern void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
+extern int jffs2_build_xattr_subsystem(struct jffs2_sb_info *c);
extern void jffs2_clear_xattr_subsystem(struct jffs2_sb_info *c);
extern struct jffs2_xattr_datum *jffs2_setup_xattr_datum(struct jffs2_sb_info *c,
@@ -103,7 +103,7 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
#else
#define jffs2_init_xattr_subsystem(c)
-#define jffs2_build_xattr_subsystem(c)
+#define jffs2_build_xattr_subsystem(c) (0)
#define jffs2_clear_xattr_subsystem(c)
#define jffs2_xattr_do_crccheck_inode(c, ic)
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 1014f2a24697..2f452b5ee731 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -100,7 +100,7 @@ static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
static int dbFindBits(u32 word, int l2nb);
static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
-static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
+static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl);
static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
int nblocks);
static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
@@ -168,7 +168,7 @@ int dbMount(struct inode *ipbmap)
struct bmap *bmp;
struct dbmap_disk *dbmp_le;
struct metapage *mp;
- int i;
+ int i, err;
/*
* allocate/initialize the in-memory bmap descriptor
@@ -183,30 +183,53 @@ int dbMount(struct inode *ipbmap)
BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
PSIZE, 0);
if (mp == NULL) {
- kfree(bmp);
- return -EIO;
+ err = -EIO;
+ goto err_kfree_bmp;
}
/* copy the on-disk bmap descriptor to its in-memory version. */
dbmp_le = (struct dbmap_disk *) mp->data;
bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
+
bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
+ if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE ||
+ bmp->db_l2nbperpage < 0) {
+ err = -EINVAL;
+ goto err_release_metapage;
+ }
+
bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
if (!bmp->db_numag) {
- release_metapage(mp);
- kfree(bmp);
- return -EINVAL;
+ err = -EINVAL;
+ goto err_release_metapage;
}
bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
+ if (bmp->db_maxag >= MAXAG || bmp->db_maxag < 0 ||
+ bmp->db_agpref >= MAXAG || bmp->db_agpref < 0) {
+ err = -EINVAL;
+ goto err_release_metapage;
+ }
+
bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
+ if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG ||
+ bmp->db_agl2size < 0) {
+ err = -EINVAL;
+ goto err_release_metapage;
+ }
+
+ if (((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) {
+ err = -EINVAL;
+ goto err_release_metapage;
+ }
+
for (i = 0; i < MAXAG; i++)
bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
@@ -227,6 +250,12 @@ int dbMount(struct inode *ipbmap)
BMAP_LOCK_INIT(bmp);
return (0);
+
+err_release_metapage:
+ release_metapage(mp);
+err_kfree_bmp:
+ kfree(bmp);
+ return err;
}
@@ -260,6 +289,7 @@ int dbUnmount(struct inode *ipbmap, int mounterror)
/* free the memory for the in-memory bmap. */
kfree(bmp);
+ JFS_SBI(ipbmap->i_sb)->bmap = NULL;
return (0);
}
@@ -1768,7 +1798,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
* dbFindLeaf() returns the index of the leaf at which
* free space was found.
*/
- rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx);
+ rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx, true);
/* release the buffer.
*/
@@ -2015,9 +2045,12 @@ dbAllocDmapLev(struct bmap * bmp,
* free space. if sufficient free space is found, dbFindLeaf()
* returns the index of the leaf at which free space was found.
*/
- if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
+ if (dbFindLeaf((dmtree_t *) &dp->tree, l2nb, &leafidx, false))
return -ENOSPC;
+ if (leafidx < 0)
+ return -EIO;
+
/* determine the block number within the file system corresponding
* to the leaf at which free space was found.
*/
@@ -2972,14 +3005,18 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
* leafidx - return pointer to be set to the index of the leaf
* describing at least l2nb free blocks if sufficient
* free blocks are found.
+ * is_ctl - determines if the tree is of type ctl
*
* RETURN VALUES:
* 0 - success
* -ENOSPC - insufficient free blocks.
*/
-static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
+static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl)
{
int ti, n = 0, k, x = 0;
+ int max_size;
+
+ max_size = is_ctl ? CTLTREESIZE : TREESIZE;
/* first check the root of the tree to see if there is
* sufficient free space.
@@ -3000,6 +3037,8 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
/* sufficient free space found. move to the next
* level (or quit if this is the last level).
*/
+ if (x + n > max_size)
+ return -ENOSPC;
if (l2nb <= tp->dmt_stree[x + n])
break;
}
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index 2ae7d59ab10a..c971e8a6525d 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -521,6 +521,11 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
* blocks in the map. in that case, we'll start off with the
* maximum free.
*/
+
+ /* give up if no space left */
+ if (bmp->db_maxfreebud == -1)
+ return -ENOSPC;
+
max = (s64) 1 << bmp->db_maxfreebud;
if (*nblocks >= max && *nblocks > nbperpage)
nb = nblks = (max > nbperpage) ? max : nbperpage;
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 415bfa90607a..0c36ce6318d5 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -135,7 +135,9 @@
#define NUM_INODE_PER_IAG INOSPERIAG
#define MINBLOCKSIZE 512
+#define L2MINBLOCKSIZE 9
#define MAXBLOCKSIZE 4096
+#define L2MAXBLOCKSIZE 12
#define MAXFILESIZE ((s64)1 << 52)
#define JFS_LINK_MAX 0xffffffff
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 93e8c590ff5c..b45cc109e506 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -208,6 +208,7 @@ int diUnmount(struct inode *ipimap, int mounterror)
* free in-memory control structure
*/
kfree(imap);
+ JFS_IP(ipimap)->i_imap = NULL;
return (0);
}
@@ -1334,7 +1335,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
int diAlloc(struct inode *pip, bool dir, struct inode *ip)
{
int rc, ino, iagno, addext, extno, bitno, sword;
- int nwords, rem, i, agno;
+ int nwords, rem, i, agno, dn_numag;
u32 mask, inosmap, extsmap;
struct inode *ipimap;
struct metapage *mp;
@@ -1370,6 +1371,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
/* get the ag number of this iag */
agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
+ dn_numag = JFS_SBI(pip->i_sb)->bmap->db_numag;
+ if (agno < 0 || agno > dn_numag)
+ return -EIO;
if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
/*
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 78789c5ed36b..e10db01f253b 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -367,6 +367,11 @@ tid_t txBegin(struct super_block *sb, int flag)
jfs_info("txBegin: flag = 0x%x", flag);
log = JFS_SBI(sb)->log;
+ if (!log) {
+ jfs_error(sb, "read-only filesystem\n");
+ return 0;
+ }
+
TXN_LOCK();
INCREMENT(TxStat.txBegin);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 14528c0ffe63..c2c439acbb78 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -811,6 +811,11 @@ static int jfs_link(struct dentry *old_dentry,
if (rc)
goto out;
+ if (isReadOnly(ip)) {
+ jfs_error(ip->i_sb, "read-only filesystem\n");
+ return -EROFS;
+ }
+
tid = txBegin(ip->i_sb, 0);
mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 0b22c39dad47..b2a126a947e3 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -212,7 +212,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
dput(dentry);
return ERR_PTR(-EINVAL);
}
- dtmp = lookup_one_len_unlocked(kntmp->name, dentry,
+ dtmp = lookup_positive_unlocked(kntmp->name, dentry,
strlen(kntmp->name));
dput(dentry);
if (IS_ERR(dtmp))
diff --git a/fs/libfs.c b/fs/libfs.c
index be57e64834e5..fd5f6c106059 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -864,8 +864,8 @@ out:
EXPORT_SYMBOL_GPL(simple_attr_read);
/* interpret the buffer as a number to call the set function with */
-ssize_t simple_attr_write(struct file *file, const char __user *buf,
- size_t len, loff_t *ppos)
+static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos, bool is_signed)
{
struct simple_attr *attr;
unsigned long long val;
@@ -886,7 +886,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- ret = kstrtoull(attr->set_buf, 0, &val);
+ if (is_signed)
+ ret = kstrtoll(attr->set_buf, 0, &val);
+ else
+ ret = kstrtoull(attr->set_buf, 0, &val);
if (ret)
goto out;
ret = attr->set(attr->data, val);
@@ -896,8 +899,21 @@ out:
mutex_unlock(&attr->mutex);
return ret;
}
+
+ssize_t simple_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return simple_attr_write_xsigned(file, buf, len, ppos, false);
+}
EXPORT_SYMBOL_GPL(simple_attr_write);
+ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return simple_attr_write_xsigned(file, buf, len, ppos, true);
+}
+EXPORT_SYMBOL_GPL(simple_attr_write_signed);
+
/**
* generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
* @sb: filesystem to do the file handle conversion on
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 654594ef4f94..68a2eac548c3 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -275,6 +275,9 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
{
struct nsm_handle *new;
+ if (!hostname)
+ return NULL;
+
new = kzalloc(sizeof(*new) + hostname_len + 1, GFP_KERNEL);
if (unlikely(new == NULL))
return NULL;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 081ccf0caee3..2e2d4de4cf87 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -10,7 +10,7 @@
/*
* Mbcache is a simple key-value store. Keys need not be unique, however
* key-value pairs are expected to be unique (we use this fact in
- * mb_cache_entry_delete()).
+ * mb_cache_entry_delete_or_get()).
*
* Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
* Ext4 also uses it for deduplication of xattr values stored in inodes.
@@ -89,12 +89,19 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
return -ENOMEM;
INIT_LIST_HEAD(&entry->e_list);
- /* One ref for hash, one ref returned */
- atomic_set(&entry->e_refcnt, 1);
+ /*
+ * We create entry with two references. One reference is kept by the
+ * hash table, the other reference is used to protect us from
+ * mb_cache_entry_delete_or_get() until the entry is fully setup. This
+ * avoids nesting of cache->c_list_lock into hash table bit locks which
+ * is problematic for RT.
+ */
+ atomic_set(&entry->e_refcnt, 2);
entry->e_key = key;
entry->e_value = value;
- entry->e_reusable = reusable;
- entry->e_referenced = 0;
+ entry->e_flags = 0;
+ if (reusable)
+ set_bit(MBE_REUSABLE_B, &entry->e_flags);
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
@@ -106,24 +113,41 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
}
hlist_bl_add_head(&entry->e_hash_list, head);
hlist_bl_unlock(head);
-
spin_lock(&cache->c_list_lock);
list_add_tail(&entry->e_list, &cache->c_list);
- /* Grab ref for LRU list */
- atomic_inc(&entry->e_refcnt);
cache->c_entry_count++;
spin_unlock(&cache->c_list_lock);
+ mb_cache_entry_put(cache, entry);
return 0;
}
EXPORT_SYMBOL(mb_cache_entry_create);
-void __mb_cache_entry_free(struct mb_cache_entry *entry)
+void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
{
+ struct hlist_bl_head *head;
+
+ head = mb_cache_entry_head(cache, entry->e_key);
+ hlist_bl_lock(head);
+ hlist_bl_del(&entry->e_hash_list);
+ hlist_bl_unlock(head);
kmem_cache_free(mb_entry_cache, entry);
}
EXPORT_SYMBOL(__mb_cache_entry_free);
+/*
+ * mb_cache_entry_wait_unused - wait to be the last user of the entry
+ *
+ * @entry - entry to work on
+ *
+ * Wait to be the last user of the entry.
+ */
+void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
+{
+ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
+}
+EXPORT_SYMBOL(mb_cache_entry_wait_unused);
+
static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
struct mb_cache_entry *entry,
u32 key)
@@ -141,10 +165,10 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
while (node) {
entry = hlist_bl_entry(node, struct mb_cache_entry,
e_hash_list);
- if (entry->e_key == key && entry->e_reusable) {
- atomic_inc(&entry->e_refcnt);
+ if (entry->e_key == key &&
+ test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
+ atomic_inc_not_zero(&entry->e_refcnt))
goto out;
- }
node = node->next;
}
entry = NULL;
@@ -204,10 +228,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
head = mb_cache_entry_head(cache, key);
hlist_bl_lock(head);
hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
- if (entry->e_key == key && entry->e_value == value) {
- atomic_inc(&entry->e_refcnt);
+ if (entry->e_key == key && entry->e_value == value &&
+ atomic_inc_not_zero(&entry->e_refcnt))
goto out;
- }
}
entry = NULL;
out:
@@ -216,7 +239,7 @@ out:
}
EXPORT_SYMBOL(mb_cache_entry_get);
-/* mb_cache_entry_delete - remove a cache entry
+/* mb_cache_entry_delete - try to remove a cache entry
* @cache - cache we work with
* @key - key
* @value - value
@@ -253,6 +276,43 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
}
EXPORT_SYMBOL(mb_cache_entry_delete);
+/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
+ * @cache - cache we work with
+ * @key - key
+ * @value - value
+ *
+ * Remove entry from cache @cache with key @key and value @value. The removal
+ * happens only if the entry is unused. The function returns NULL in case the
+ * entry was successfully removed or there's no entry in cache. Otherwise the
+ * function grabs reference of the entry that we failed to delete because it
+ * still has users and return it.
+ */
+struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+ u32 key, u64 value)
+{
+ struct mb_cache_entry *entry;
+
+ entry = mb_cache_entry_get(cache, key, value);
+ if (!entry)
+ return NULL;
+
+ /*
+ * Drop the ref we got from mb_cache_entry_get() and the initial hash
+ * ref if we are the last user
+ */
+ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
+ return entry;
+
+ spin_lock(&cache->c_list_lock);
+ if (!list_empty(&entry->e_list))
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+ spin_unlock(&cache->c_list_lock);
+ __mb_cache_entry_free(cache, entry);
+ return NULL;
+}
+EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
+
/* mb_cache_entry_touch - cache entry got used
* @cache - cache the entry belongs to
* @entry - entry that got used
@@ -262,7 +322,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete);
void mb_cache_entry_touch(struct mb_cache *cache,
struct mb_cache_entry *entry)
{
- entry->e_referenced = 1;
+ set_bit(MBE_REFERENCED_B, &entry->e_flags);
}
EXPORT_SYMBOL(mb_cache_entry_touch);
@@ -280,34 +340,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
unsigned long nr_to_scan)
{
struct mb_cache_entry *entry;
- struct hlist_bl_head *head;
unsigned long shrunk = 0;
spin_lock(&cache->c_list_lock);
while (nr_to_scan-- && !list_empty(&cache->c_list)) {
entry = list_first_entry(&cache->c_list,
struct mb_cache_entry, e_list);
- if (entry->e_referenced) {
- entry->e_referenced = 0;
+ /* Drop initial hash reference if there is no user */
+ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
+ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
+ clear_bit(MBE_REFERENCED_B, &entry->e_flags);
list_move_tail(&entry->e_list, &cache->c_list);
continue;
}
list_del_init(&entry->e_list);
cache->c_entry_count--;
- /*
- * We keep LRU list reference so that entry doesn't go away
- * from under us.
- */
spin_unlock(&cache->c_list_lock);
- head = mb_cache_entry_head(cache, entry->e_key);
- hlist_bl_lock(head);
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
- hlist_bl_del_init(&entry->e_hash_list);
- atomic_dec(&entry->e_refcnt);
- }
- hlist_bl_unlock(head);
- if (mb_cache_entry_put(cache, entry))
- shrunk++;
+ __mb_cache_entry_free(cache, entry);
+ shrunk++;
cond_resched();
spin_lock(&cache->c_list_lock);
}
@@ -399,11 +449,6 @@ void mb_cache_destroy(struct mb_cache *cache)
* point.
*/
list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
- if (!hlist_bl_unhashed(&entry->e_hash_list)) {
- hlist_bl_del_init(&entry->e_hash_list);
- atomic_dec(&entry->e_refcnt);
- } else
- WARN_ON(1);
list_del(&entry->e_list);
WARN_ON(atomic_read(&entry->e_refcnt) != 1);
mb_cache_entry_put(cache, entry);
diff --git a/fs/namei.c b/fs/namei.c
index 9e8fca598acc..60b57e0bc174 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2575,6 +2575,26 @@ struct dentry *lookup_one_len_unlocked(const char *name,
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
+/*
+ * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
+ * on negatives. Returns known positive or ERR_PTR(); that's what
+ * most of the users want. Note that pinned negative with unlocked parent
+ * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
+ * need to be very careful; pinned positives have ->d_inode stable, so
+ * this one avoids such problems.
+ */
+struct dentry *lookup_positive_unlocked(const char *name,
+ struct dentry *base, int len)
+{
+ struct dentry *ret = lookup_one_len_unlocked(name, base, len);
+ if (!IS_ERR(ret) && d_is_negative(ret)) {
+ dput(ret);
+ ret = ERR_PTR(-ENOENT);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(lookup_positive_unlocked);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
@@ -2593,7 +2613,7 @@ int path_pts(struct path *path)
this.name = "pts";
this.len = 3;
child = d_hash_and_lookup(parent, &this);
- if (!child)
+ if (IS_ERR_OR_NULL(child))
return -ENOENT;
path->dentry = child;
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 06cb0c1d9aee..a2bca78b80ab 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -604,6 +604,8 @@ retry:
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
goto retry;
}
+
+ nfs4_put_deviceid_node(node);
return ERR_PTR(-ENODEV);
}
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index dec5880ac6de..6e3a14fdff9c 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -422,7 +422,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->concat.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
@@ -451,7 +451,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
int ret, i;
d->children = kcalloc(v->stripe.volumes_count,
- sizeof(struct pnfs_block_dev), GFP_KERNEL);
+ sizeof(struct pnfs_block_dev), gfp_mask);
if (!d->children)
return -ENOMEM;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index e8e825497cbd..015d39ac2c8f 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -837,6 +837,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
return &fl->generic_hdr;
}
+static bool
+filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg)
+{
+ return flseg->num_fh > 1;
+}
+
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
@@ -857,6 +863,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
size = pnfs_generic_pg_test(pgio, prev, req);
if (!size)
return 0;
+ else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg)))
+ return size;
/* see if req and prev are in the same stripe */
if (prev) {
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index fee421da2197..bb10f2b21cc1 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1189,6 +1189,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -EPFNOSUPPORT:
case -EPROTONOSUPPORT:
case -EOPNOTSUPP:
+ case -EINVAL:
case -ECONNREFUSED:
case -ECONNRESET:
case -EHOSTDOWN:
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5ac7bf24c507..2d438318681a 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -190,7 +190,7 @@ struct nfs4_state {
unsigned int n_wronly; /* Number of write-only references */
unsigned int n_rdwr; /* Number of read/write references */
fmode_t state; /* State on the server (R,W, or RW) */
- atomic_t count;
+ refcount_t count;
wait_queue_head_t waitq;
};
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9a0f48f7f2b8..c9db9a0fc733 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -163,6 +163,7 @@ static int nfs4_map_errors(int err)
case -NFS4ERR_RESOURCE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
return -EREMOTEIO;
case -NFS4ERR_WRONGSEC:
case -NFS4ERR_WRONG_CRED:
@@ -509,6 +510,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
case -NFS4ERR_GRACE:
case -NFS4ERR_LAYOUTTRYLATER:
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
exception->delay = 1;
return 0;
@@ -1792,7 +1794,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
out:
return ERR_PTR(ret);
out_return_state:
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return state;
}
@@ -1851,8 +1853,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (!data->rpc_done) {
if (data->rpc_status)
return ERR_PTR(data->rpc_status);
- /* cached opens have already been processed */
- goto update;
+ return nfs4_try_open_cached(data);
}
ret = nfs_refresh_inode(inode, &data->f_attr);
@@ -1861,10 +1862,11 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
-update:
- update_open_stateid(state, &data->o_res.stateid, NULL,
- data->o_arg.fmode);
- atomic_inc(&state->count);
+
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode))
+ return ERR_PTR(-EAGAIN);
+ refcount_inc(&state->count);
return state;
}
@@ -1902,7 +1904,7 @@ nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data)
return ERR_CAST(inode);
if (data->state != NULL && data->state->inode == inode) {
state = data->state;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
} else
state = nfs4_get_open_state(inode, data->owner);
iput(inode);
@@ -1928,8 +1930,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
- update_open_stateid(state, &data->o_res.stateid, NULL,
- data->o_arg.fmode);
+ if (!update_open_stateid(state, &data->o_res.stateid,
+ NULL, data->o_arg.fmode)) {
+ nfs4_put_open_state(state);
+ state = ERR_PTR(-EAGAIN);
+ }
out:
nfs_release_seqid(data->o_arg.seqid);
return state;
@@ -1975,23 +1980,23 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return opendata;
}
static int nfs4_open_recover_helper(struct nfs4_opendata *opendata,
- fmode_t fmode)
+ fmode_t fmode)
{
struct nfs4_state *newstate;
+ struct nfs_server *server = NFS_SB(opendata->dentry->d_sb);
+ int openflags = opendata->o_arg.open_flags;
int ret;
if (!nfs4_mode_match_open_stateid(opendata->state, fmode))
return 0;
- opendata->o_arg.open_flags = 0;
opendata->o_arg.fmode = fmode;
- opendata->o_arg.share_access = nfs4_map_atomic_open_share(
- NFS_SB(opendata->dentry->d_sb),
- fmode, 0);
+ opendata->o_arg.share_access =
+ nfs4_map_atomic_open_share(server, fmode, openflags);
memset(&opendata->o_res, 0, sizeof(opendata->o_res));
memset(&opendata->c_res, 0, sizeof(opendata->c_res));
nfs4_init_opendata_res(opendata);
@@ -2569,10 +2574,15 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
struct nfs4_opendata *opendata;
int ret;
- opendata = nfs4_open_recoverdata_alloc(ctx, state,
- NFS4_OPEN_CLAIM_FH);
+ opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_FH);
if (IS_ERR(opendata))
return PTR_ERR(opendata);
+ /*
+ * We're not recovering a delegation, so ask for no delegation.
+ * Otherwise the recovery thread could deadlock with an outstanding
+ * delegation return.
+ */
+ opendata->o_arg.open_flags = O_DIRECT;
ret = nfs4_open_recover(opendata, state);
if (ret == -ESTALE)
d_drop(ctx->dentry);
@@ -5135,7 +5145,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
- nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr);
+ nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr);
}
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -5176,7 +5186,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
- nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
+ nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client,
+ NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
}
static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args,
@@ -8867,6 +8878,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
status = -EBUSY;
break;
case -NFS4ERR_RECALLCONFLICT:
+ case -NFS4ERR_RETURNCONFLICT:
status = -ERECALLCONFLICT;
break;
case -NFS4ERR_DELEG_REVOKED:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ab021f87ecf..f0f0fb7499e3 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -66,6 +66,8 @@
#define OPENOWNER_POOL_SIZE 8
+static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp);
+
const nfs4_stateid zero_stateid = {
{ .data = { 0 } },
.type = NFS4_SPECIAL_STATEID_TYPE,
@@ -338,6 +340,8 @@ do_confirm:
status = nfs4_proc_create_session(clp, cred);
if (status != 0)
goto out;
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R))
+ nfs4_state_start_reclaim_reboot(clp);
nfs41_finish_session_reset(clp);
nfs_mark_client_ready(clp, NFS_CS_READY);
out:
@@ -675,7 +679,7 @@ nfs4_alloc_open_state(void)
state = kzalloc(sizeof(*state), GFP_NOFS);
if (!state)
return NULL;
- atomic_set(&state->count, 1);
+ refcount_set(&state->count, 1);
INIT_LIST_HEAD(&state->lock_states);
spin_lock_init(&state->state_lock);
seqlock_init(&state->seqlock);
@@ -709,7 +713,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
continue;
if (!nfs4_valid_open_stateid(state))
continue;
- if (atomic_inc_not_zero(&state->count))
+ if (refcount_inc_not_zero(&state->count))
return state;
}
return NULL;
@@ -763,7 +767,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
- if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
+ if (!refcount_dec_and_lock(&state->count, &owner->so_lock))
return;
spin_lock(&inode->i_lock);
list_del(&state->inode_states);
@@ -1247,6 +1251,8 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
if (IS_ERR(task)) {
printk(KERN_ERR "%s: kthread_run: %ld\n",
__func__, PTR_ERR(task));
+ if (!nfs_client_init_is_complete(clp))
+ nfs_mark_client_ready(clp, PTR_ERR(task));
nfs4_clear_state_manager_bit(clp);
nfs_put_client(clp);
module_put(THIS_MODULE);
@@ -1594,7 +1600,7 @@ restart:
continue;
if (state->state == 0)
continue;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
spin_unlock(&sp->so_lock);
status = ops->recover_open(sp, state);
if (status >= 0) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 56e48642c43e..f0021e3b8efd 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4277,12 +4277,10 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
if (unlikely(!p))
goto out_overflow;
if (len < NFS4_MAXLABELLEN) {
- if (label) {
- if (label->len) {
- if (label->len < len)
- return -ERANGE;
- memcpy(label->label, p, len);
- }
+ if (label && label->len) {
+ if (label->len < len)
+ return -ERANGE;
+ memcpy(label->label, p, len);
label->len = len;
label->pi = pi;
label->lfs = lfs;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index e8a07b3f9aaa..ba67906d6b2c 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -152,7 +152,7 @@ nfs4_get_device_info(struct nfs_server *server,
set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
out_free_pages:
- for (i = 0; i < max_pages; i++)
+ while (--i >= 0)
__free_page(pages[i]);
kfree(pages);
out_free_pdev:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ec0fd6b3d185..65aaa6eaad2c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -598,9 +598,8 @@ release_request:
static void nfs_write_error_remove_page(struct nfs_page *req)
{
+ SetPageError(req->wb_page);
nfs_end_page_writeback(req);
- generic_error_remove_page(page_file_mapping(req->wb_page),
- req->wb_page);
nfs_release_request(req);
}
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 442543304930..2455dc8be18a 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -82,6 +82,15 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
int len = sizeof(__be32), ret, i;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index e81d2a5cf381..bb205328e043 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -85,6 +85,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
int addr_len;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index b90bea1c434e..9f537decdd9c 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -855,13 +855,11 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
} else
dchild = dget(dparent);
} else
- dchild = lookup_one_len_unlocked(name, dparent, namlen);
+ dchild = lookup_positive_unlocked(name, dparent, namlen);
if (IS_ERR(dchild))
return rv;
if (d_mountpoint(dchild))
goto out;
- if (d_really_is_negative(dchild))
- goto out;
if (dchild->d_inode->i_ino != ino)
goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7ee417b685e9..519d994c0c4c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -800,7 +800,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
} else {
if (!conn->cb_xprt)
return -EINVAL;
- clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
@@ -820,6 +819,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
rpc_shutdown_client(client);
return PTR_ERR(cred);
}
+
+ if (clp->cl_minorversion != 0)
+ clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
clp->cl_cb_client = client;
clp->cl_cb_cred = cred;
return 0;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index f4cf1c0793c6..cf81b5bc3e15 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -322,11 +322,11 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
if (ls->ls_recalled)
goto out_unlock;
- ls->ls_recalled = true;
- atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
if (list_empty(&ls->ls_layouts))
goto out_unlock;
+ ls->ls_recalled = true;
+ atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls);
trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid);
refcount_inc(&ls->ls_stid.sc_count);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a696a9b90786..5ec90b252b6a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -870,8 +870,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
rename->rn_tname, rename->rn_tnamelen);
if (status)
return status;
- set_change_info(&rename->rn_sinfo, &cstate->current_fh);
- set_change_info(&rename->rn_tinfo, &cstate->save_fh);
+ set_change_info(&rename->rn_sinfo, &cstate->save_fh);
+ set_change_info(&rename->rn_tinfo, &cstate->current_fh);
return nfs_ok;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 78191320f8e2..5c241e510888 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1019,9 +1019,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
WARN_ON(!list_empty(&dp->dl_recall_lru));
if (clp->cl_minorversion) {
+ spin_lock(&clp->cl_lock);
dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
refcount_inc(&dp->dl_stid.sc_count);
- spin_lock(&clp->cl_lock);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock);
}
@@ -4998,15 +4998,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
return status;
- /* Client debugging aid. */
- if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
- char addr_str[INET6_ADDRSTRLEN];
- rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str,
- sizeof(addr_str));
- pr_warn_ratelimited("NFSD: client %s testing state ID "
- "with incorrect client ID\n", addr_str);
- return status;
- }
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid);
if (!s)
@@ -6401,12 +6392,16 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
if (sop->so_is_open_owner || !same_owner_str(sop, owner))
continue;
- if (atomic_read(&sop->so_count) != 1) {
- spin_unlock(&clp->cl_lock);
- return nfserr_locks_held;
+ /* see if there are still any locks associated with it */
+ lo = lockowner(sop);
+ list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
+ if (check_for_locks(stp->st_stid.sc_file, lo)) {
+ status = nfserr_locks_held;
+ spin_unlock(&clp->cl_lock);
+ return status;
+ }
}
- lo = lockowner(sop);
nfs4_get_stateowner(sop);
break;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 96b79bd90631..33827cdd8066 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2984,18 +2984,9 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
__be32 nfserr;
int ignore_crossmnt = 0;
- dentry = lookup_one_len_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
+ dentry = lookup_positive_unlocked(name, cd->rd_fhp->fh_dentry, namlen);
if (IS_ERR(dentry))
return nfserrno(PTR_ERR(dentry));
- if (d_really_is_negative(dentry)) {
- /*
- * we're not holding the i_mutex here, so there's
- * a window where this directory entry could have gone
- * away.
- */
- dput(dentry);
- return nfserr_noent;
- }
exp_get(exp);
/*
@@ -3102,6 +3093,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
case nfserr_noent:
xdr_truncate_encode(xdr, start_offset);
goto skip_entry;
+ case nfserr_jukebox:
+ /*
+ * The pseudoroot should only display dentries that lead to
+ * exports. If we get EJUKEBOX here, then we can't tell whether
+ * this entry should be included. Just fail the whole READDIR
+ * with NFS4ERR_DELAY in that case, and hope that the situation
+ * will resolve itself by the client's next attempt.
+ */
+ if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT)
+ goto fail;
+ /* fallthrough */
default:
/*
* If the client requested the RDATTR_ERROR attribute,
@@ -3392,7 +3394,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
p = xdr_reserve_space(xdr, 32);
if (!p)
return nfserr_resource;
- *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(open->op_recall);
/*
* TODO: space_limit's in delegations
@@ -4124,20 +4126,17 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = cpu_to_be32(gdev->gd_layout_type);
- /* If maxcount is 0 then just update notifications */
- if (gdev->gd_maxcount != 0) {
- ops = nfsd4_layout_ops[gdev->gd_layout_type];
- nfserr = ops->encode_getdeviceinfo(xdr, gdev);
- if (nfserr) {
- /*
- * We don't bother to burden the layout drivers with
- * enforcing gd_maxcount, just tell the client to
- * come back with a bigger buffer if it's not enough.
- */
- if (xdr->buf->len + 4 > gdev->gd_maxcount)
- goto toosmall;
- return nfserr;
- }
+ ops = nfsd4_layout_ops[gdev->gd_layout_type];
+ nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+ if (nfserr) {
+ /*
+ * We don't bother to burden the layout drivers with
+ * enforcing gd_maxcount, just tell the client to
+ * come back with a bigger buffer if it's not enough.
+ */
+ if (xdr->buf->len + 4 > gdev->gd_maxcount)
+ goto toosmall;
+ return nfserr;
}
if (gdev->gd_notify_types) {
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ff9899cc9913..7af48d306f20 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -347,7 +347,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
{
char *dname, *path;
- int uninitialized_var(maxsize);
+ int maxsize;
char *mesg = buf;
int len;
struct auth_domain *dom;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 28e7f86c8c94..a7231d17e359 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1691,6 +1691,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
+ err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+ if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
+ goto out;
+ if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
+ goto out;
+
host_err = fh_want_write(ffhp);
if (host_err) {
err = nfserrno(host_err);
@@ -1724,12 +1730,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (ndentry == trap)
goto out_dput_new;
- host_err = -EXDEV;
- if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
- goto out_dput_new;
- if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
- goto out_dput_new;
-
host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
if (!host_err) {
host_err = commit_metadata(tfhp);
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 235b959fc2b3..bbd82f650e93 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -205,7 +205,8 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff,
int ret;
spin_lock(lock);
- if (prev->bh && blkoff == prev->blkoff) {
+ if (prev->bh && blkoff == prev->blkoff &&
+ likely(buffer_uptodate(prev->bh))) {
get_bh(prev->bh);
*bhp = prev->bh;
spin_unlock(lock);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index fb5a9a8a13cf..2ba57e4b4f0a 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -67,20 +67,28 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
- if (ret < 0) {
- ret = nilfs_bmap_convert_error(bmap, __func__, ret);
+ if (ret < 0)
goto out;
- }
+
if (NILFS_BMAP_USE_VBN(bmap)) {
ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
&blocknr);
if (!ret)
*ptrp = blocknr;
+ else if (ret == -ENOENT) {
+ /*
+ * If there was no valid entry in DAT for the block
+ * address obtained by b_ops->bop_lookup, then pass
+ * internal code -EINVAL to nilfs_bmap_convert_error
+ * to treat it as metadata corruption.
+ */
+ ret = -EINVAL;
+ }
}
out:
up_read(&bmap->b_sem);
- return ret;
+ return nilfs_bmap_convert_error(bmap, __func__, ret);
}
int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 138ebbb7a1ee..677ff78d54fb 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -295,6 +295,14 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
radix_tree_delete(&btnc->i_pages, newkey);
xa_unlock_irq(&btnc->i_pages);
unlock_page(ctxt->bh->b_page);
- } else
- brelse(nbh);
+ } else {
+ /*
+ * When canceling a buffer that a prepare operation has
+ * allocated to copy a node block to another location, use
+ * nilfs_btnode_delete() to initialize and release the buffer
+ * so that the buffer flags will not be in an inconsistent
+ * state when it is reallocated.
+ */
+ nilfs_btnode_delete(nbh);
+ }
}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 919d1238ce45..a0e37530dcf3 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
&submit_ptr);
if (ret) {
- if (ret != -EEXIST)
- return ret;
- goto out_check;
+ if (likely(ret == -EEXIST))
+ goto out_check;
+ if (ret == -ENOENT) {
+ /*
+ * Block address translation failed due to invalid
+ * value of 'ptr'. In this case, return internal code
+ * -EINVAL (broken bmap) to notify bmap layer of fatal
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
if (ra) {
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 114774ac2185..cef46650102e 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -73,10 +73,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
- if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
- brelse(bh);
+ if (unlikely(err)) /* -EIO, -ENOMEM, -ENOENT */
goto failed;
- }
}
lock_buffer(bh);
@@ -102,6 +100,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
failed:
unlock_page(bh->b_page);
put_page(bh->b_page);
+ if (unlikely(err))
+ brelse(bh);
return err;
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index cf01aa55dd44..ea94dc21af0c 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -930,6 +930,7 @@ void nilfs_evict_inode(struct inode *inode)
struct nilfs_transaction_info ti;
struct super_block *sb = inode->i_sb;
struct nilfs_inode_info *ii = NILFS_I(inode);
+ struct the_nilfs *nilfs;
int ret;
if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
@@ -942,6 +943,23 @@ void nilfs_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
+ nilfs = sb->s_fs_info;
+ if (unlikely(sb_rdonly(sb) || !nilfs->ns_writer)) {
+ /*
+ * If this inode is about to be disposed after the file system
+ * has been degraded to read-only due to file system corruption
+ * or after the writer has been detached, do not make any
+ * changes that cause writes, just clear it.
+ * Do this check after read-locking ns_segctor_sem by
+ * nilfs_transaction_begin() in order to avoid a race with
+ * the writer detach operation.
+ */
+ clear_inode(inode);
+ nilfs_clear_inode(inode);
+ nilfs_transaction_abort(sb);
+ return;
+ }
+
/* TODO: some of the following operations may fail. */
nilfs_truncate_bmap(ii, 0);
nilfs_mark_inode_dirty(inode);
@@ -1018,7 +1036,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
int err;
spin_lock(&nilfs->ns_inode_lock);
- if (ii->i_bh == NULL) {
+ if (ii->i_bh == NULL || unlikely(!buffer_uptodate(ii->i_bh))) {
spin_unlock(&nilfs->ns_inode_lock);
err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
inode->i_ino, pbh);
@@ -1027,7 +1045,10 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
spin_lock(&nilfs->ns_inode_lock);
if (ii->i_bh == NULL)
ii->i_bh = *pbh;
- else {
+ else if (unlikely(!buffer_uptodate(ii->i_bh))) {
+ __brelse(ii->i_bh);
+ ii->i_bh = *pbh;
+ } else {
brelse(*pbh);
*pbh = ii->i_bh;
}
@@ -1094,9 +1115,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
struct buffer_head *ibh;
int err;
+ /*
+ * Do not dirty inodes after the log writer has been detached
+ * and its nilfs_root struct has been freed.
+ */
+ if (unlikely(nilfs_purging(nilfs)))
+ return 0;
+
err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) {
nilfs_msg(inode->i_sb, KERN_WARNING,
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 9b96d79eea6c..dfb2083b8ce1 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -70,7 +70,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
if (argv->v_index > ~(__u64)0 - argv->v_nmembs)
return -EINVAL;
- buf = (void *)__get_free_pages(GFP_NOFS, 0);
+ buf = (void *)get_zeroed_page(GFP_NOFS);
if (unlikely(!buf))
return -ENOMEM;
maxmembs = PAGE_SIZE / argv->v_size;
@@ -1135,7 +1135,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
minseg = range[0] + segbytes - 1;
do_div(minseg, segbytes);
+
+ if (range[1] < 4096)
+ goto out;
+
maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
+ if (maxseg < segbytes)
+ goto out;
+
do_div(maxseg, segbytes);
maxseg--;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index c726b42ca92d..e5fee7fac915 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -372,7 +372,15 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
struct page *page = pvec.pages[i];
lock_page(page);
- nilfs_clear_dirty_page(page, silent);
+
+ /*
+ * This page may have been removed from the address
+ * space by truncation or invalidation when the lock
+ * was acquired. Skip processing in that case.
+ */
+ if (likely(page->mapping == mapping))
+ nilfs_clear_dirty_page(page, silent);
+
unlock_page(page);
}
pagevec_release(&pvec);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 20c479b5e41b..e72466fc8ca9 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
if (unlikely(!bh))
return -ENOMEM;
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ memset(bh->b_data, 0, bh->b_size);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
nilfs_segbuf_add_segsum_buffer(segbuf, bh);
return 0;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 11914b3585b3..fdcbed6ee832 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -435,6 +435,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
return 0;
}
+/**
+ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
+ * @sci: segment constructor object
+ *
+ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
+ * the current segment summary block.
+ */
+static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
+{
+ struct nilfs_segsum_pointer *ssp;
+
+ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
+ if (ssp->offset < ssp->bh->b_size)
+ memset(ssp->bh->b_data + ssp->offset, 0,
+ ssp->bh->b_size - ssp->offset);
+}
+
static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{
sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
@@ -443,6 +460,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
* The current segment is filled up
* (internal code)
*/
+ nilfs_segctor_zeropad_segsum(sci);
sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
return nilfs_segctor_reset_segment_buffer(sci);
}
@@ -547,6 +565,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
goto retry;
}
if (unlikely(required)) {
+ nilfs_segctor_zeropad_segsum(sci);
err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err))
goto failed;
@@ -711,6 +730,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
struct page *page = pvec.pages[i];
lock_page(page);
+ if (unlikely(page->mapping != mapping)) {
+ /* Exclude pages removed from the address space */
+ unlock_page(page);
+ continue;
+ }
if (!page_has_buffers(page))
create_empty_buffers(page, i_blocksize(inode), 0);
unlock_page(page);
@@ -965,10 +989,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
unsigned int isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
+
+ lock_buffer(bh_sr);
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
isz = nilfs->ns_inode_size;
srsz = NILFS_SR_BYTES(isz);
+ raw_sr->sr_sum = 0; /* Ensure initialization within this update */
raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
@@ -982,6 +1009,8 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
NILFS_SR_SUFILE_OFFSET(isz), 1);
memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
+ set_buffer_uptodate(bh_sr);
+ unlock_buffer(bh_sr);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -1531,6 +1560,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
+ nilfs_segctor_zeropad_segsum(sci);
nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
return 0;
@@ -1758,6 +1788,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1769,6 +1800,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
b_assoc_buffers) {
clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
+ clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
@@ -2019,6 +2051,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
int err;
+ if (sb_rdonly(sci->sc_super))
+ return -EROFS;
+
nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
sci->sc_cno = nilfs->ns_cno;
@@ -2609,11 +2644,10 @@ static int nilfs_segctor_thread(void *arg)
goto loop;
end_thread:
- spin_unlock(&sci->sc_state_lock);
-
/* end sync. */
sci->sc_task = NULL;
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
+ spin_unlock(&sci->sc_state_lock);
return 0;
}
@@ -2705,7 +2739,7 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
flush_work(&sci->sc_iput_work);
- } while (ret && retrycount-- > 0);
+ } while (ret && ret != -EROFS && retrycount-- > 0);
}
/**
@@ -2816,6 +2850,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
nilfs_segctor_destroy(nilfs->ns_writer);
nilfs->ns_writer = NULL;
}
+ set_nilfs_purging(nilfs);
/* Force to free the list of dirty files */
spin_lock(&nilfs->ns_inode_lock);
@@ -2828,4 +2863,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
up_write(&nilfs->ns_segctor_sem);
nilfs_dispose_list(nilfs, &garbage_list, 1);
+ clear_nilfs_purging(nilfs);
}
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 150845a43225..4626540008c1 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -504,15 +504,38 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
down_write(&NILFS_MDT(sufile)->mi_sem);
ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
- if (!ret) {
- mark_buffer_dirty(bh);
- nilfs_mdt_mark_dirty(sufile);
- kaddr = kmap_atomic(bh->b_page);
- su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ if (ret)
+ goto out_sem;
+
+ kaddr = kmap_atomic(bh->b_page);
+ su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
+ if (unlikely(nilfs_segment_usage_error(su))) {
+ struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
+
+ kunmap_atomic(kaddr);
+ brelse(bh);
+ if (nilfs_segment_is_active(nilfs, segnum)) {
+ nilfs_error(sufile->i_sb,
+ "active segment %llu is erroneous",
+ (unsigned long long)segnum);
+ } else {
+ /*
+ * Segments marked erroneous are never allocated by
+ * nilfs_sufile_alloc(); only active segments, ie,
+ * the segments indexed by ns_segnum or ns_nextnum,
+ * can be erroneous here.
+ */
+ WARN_ON_ONCE(1);
+ }
+ ret = -EIO;
+ } else {
nilfs_segment_usage_set_dirty(su);
kunmap_atomic(kaddr);
+ mark_buffer_dirty(bh);
+ nilfs_mdt_mark_dirty(sufile);
brelse(bh);
}
+out_sem:
up_write(&NILFS_MDT(sufile)->mi_sem);
return ret;
}
@@ -539,9 +562,14 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
kaddr = kmap_atomic(bh->b_page);
su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr);
- WARN_ON(nilfs_segment_usage_error(su));
- if (modtime)
+ if (modtime) {
+ /*
+ * Check segusage error and set su_lastmod only when updating
+ * this entry with a valid timestamp, not for cancellation.
+ */
+ WARN_ON_ONCE(nilfs_segment_usage_error(su));
su->su_lastmod = cpu_to_le64(modtime);
+ }
su->su_nblocks = cpu_to_le32(nblocks);
kunmap_atomic(kaddr);
@@ -782,6 +810,15 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
goto out_header;
sui->ncleansegs -= nsegs - newnsegs;
+
+ /*
+ * If the sufile is successfully truncated, immediately adjust
+ * the segment allocation space while locking the semaphore
+ * "mi_sem" so that nilfs_sufile_alloc() never allocates
+ * segments in the truncated space.
+ */
+ sui->allocmax = newnsegs - 1;
+ sui->allocmin = 0;
}
kaddr = kmap_atomic(header_bh->b_page);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5e4d7d19102c..99bcb4ab47a6 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -374,10 +374,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
goto out;
}
nsbp = (void *)nsbh->b_data + offset;
- memset(nsbp, 0, nilfs->ns_blocksize);
+ lock_buffer(nsbh);
if (sb2i >= 0) {
+ /*
+ * The position of the second superblock only changes by 4KiB,
+ * which is larger than the maximum superblock data size
+ * (= 1KiB), so there is no need to use memmove() to allow
+ * overlap between source and destination.
+ */
memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+
+ /*
+ * Zero fill after copy to avoid overwriting in case of move
+ * within the same block.
+ */
+ memset(nsbh->b_data, 0, offset);
+ memset((void *)nsbp + nilfs->ns_sbsize, 0,
+ nsbh->b_size - offset - nilfs->ns_sbsize);
+ } else {
+ memset(nsbh->b_data, 0, nsbh->b_size);
+ }
+ set_buffer_uptodate(nsbh);
+ unlock_buffer(nsbh);
+
+ if (sb2i >= 0) {
brelse(nilfs->ns_sbh[sb2i]);
nilfs->ns_sbh[sb2i] = nsbh;
nilfs->ns_sbp[sb2i] = nsbp;
@@ -411,6 +432,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
goto out;
/*
+ * Prevent underflow in second superblock position calculation.
+ * The exact minimum size check is done in nilfs_sufile_resize().
+ */
+ if (newsize < 4096) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ /*
* Write lock is required to protect some functions depending
* on the number of segments, the number of reserved segments,
* and so forth.
@@ -475,6 +505,7 @@ static void nilfs_put_super(struct super_block *sb)
up_write(&nilfs->ns_sem);
}
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
@@ -1101,6 +1132,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
nilfs_put_root(fsroot);
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index fb61c33c6004..c8d869bc25b0 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/random.h>
+#include <linux/log2.h>
#include <linux/crc32.h>
#include "nilfs.h"
#include "segment.h"
@@ -86,7 +87,6 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
- nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@@ -274,6 +274,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
}
+ err = nilfs_sysfs_create_device_group(sb);
+ if (unlikely(err))
+ goto sysfs_error;
+
if (valid_fs)
goto skip_recovery;
@@ -335,6 +339,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
failed_unload:
+ nilfs_sysfs_delete_device_group(nilfs);
+
+ sysfs_error:
iput(nilfs->ns_cpfile);
iput(nilfs->ns_sufile);
iput(nilfs->ns_dat);
@@ -368,6 +375,18 @@ unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
100));
}
+/**
+ * nilfs_max_segment_count - calculate the maximum number of segments
+ * @nilfs: nilfs object
+ */
+static u64 nilfs_max_segment_count(struct the_nilfs *nilfs)
+{
+ u64 max_count = U64_MAX;
+
+ do_div(max_count, nilfs->ns_blocks_per_segment);
+ return min_t(u64, max_count, ULONG_MAX);
+}
+
void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
{
nilfs->ns_nsegments = nsegs;
@@ -377,6 +396,8 @@ void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
struct nilfs_super_block *sbp)
{
+ u64 nsegments, nblocks;
+
if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) {
nilfs_msg(nilfs->ns_sb, KERN_ERR,
"unsupported revision (superblock rev.=%d.%d, current rev.=%d.%d). Please check the version of mkfs.nilfs(2).",
@@ -423,7 +444,35 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
return -EINVAL;
}
- nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
+ nsegments = le64_to_cpu(sbp->s_nsegments);
+ if (nsegments > nilfs_max_segment_count(nilfs)) {
+ nilfs_msg(nilfs->ns_sb, KERN_ERR,
+ "segment count %llu exceeds upper limit (%llu segments)",
+ (unsigned long long)nsegments,
+ (unsigned long long)nilfs_max_segment_count(nilfs));
+ return -EINVAL;
+ }
+
+ nblocks = (u64)i_size_read(nilfs->ns_sb->s_bdev->bd_inode) >>
+ nilfs->ns_sb->s_blocksize_bits;
+ if (nblocks) {
+ u64 min_block_count = nsegments * nilfs->ns_blocks_per_segment;
+ /*
+ * To avoid failing to mount early device images without a
+ * second superblock, exclude that block count from the
+ * "min_block_count" calculation.
+ */
+
+ if (nblocks < min_block_count) {
+ nilfs_msg(nilfs->ns_sb, KERN_ERR,
+ "total number of segment blocks %llu exceeds device size (%llu blocks)",
+ (unsigned long long)min_block_count,
+ (unsigned long long)nblocks);
+ return -EINVAL;
+ }
+ }
+
+ nilfs_set_nsegments(nilfs, nsegments);
nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
return 0;
}
@@ -448,11 +497,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
return crc == le32_to_cpu(sbp->s_sum);
}
-static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
+/**
+ * nilfs_sb2_bad_offset - check the location of the second superblock
+ * @sbp: superblock raw data buffer
+ * @offset: byte offset of second superblock calculated from device size
+ *
+ * nilfs_sb2_bad_offset() checks if the position on the second
+ * superblock is valid or not based on the filesystem parameters
+ * stored in @sbp. If @offset points to a location within the segment
+ * area, or if the parameters themselves are not normal, it is
+ * determined to be invalid.
+ *
+ * Return Value: true if invalid, false if valid.
+ */
+static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset)
{
- return offset < ((le64_to_cpu(sbp->s_nsegments) *
- le32_to_cpu(sbp->s_blocks_per_segment)) <<
- (le32_to_cpu(sbp->s_log_block_size) + 10));
+ unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size);
+ u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment);
+ u64 nsegments = le64_to_cpu(sbp->s_nsegments);
+ u64 index;
+
+ if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS ||
+ shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS)
+ return true;
+
+ index = offset >> (shift_bits + BLOCK_SIZE_BITS);
+ do_div(index, blocks_per_segment);
+ return index < nsegments;
}
static void nilfs_release_super_block(struct the_nilfs *nilfs)
@@ -494,9 +565,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
{
struct nilfs_super_block **sbp = nilfs->ns_sbp;
struct buffer_head **sbh = nilfs->ns_sbh;
- u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size);
+ u64 sb2off, devsize = nilfs->ns_bdev->bd_inode->i_size;
int valid[2], swp = 0;
+ if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) {
+ nilfs_msg(sb, KERN_ERR, "device size too small");
+ return -EINVAL;
+ }
+ sb2off = NILFS_SB2_OFFSET_BYTES(devsize);
+
sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize,
&sbh[0]);
sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]);
@@ -611,7 +688,11 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
goto failed_sbh;
}
nilfs_release_super_block(nilfs);
- sb_set_blocksize(sb, blocksize);
+ if (!sb_set_blocksize(sb, blocksize)) {
+ nilfs_msg(sb, KERN_ERR, "bad blocksize %d", blocksize);
+ err = -EINVAL;
+ goto out;
+ }
err = nilfs_load_super_block(nilfs, sb, blocksize, &sbp);
if (err)
@@ -639,10 +720,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
- err = nilfs_sysfs_create_device_group(sb);
- if (err)
- goto failed_sbh;
-
set_nilfs_init(nilfs);
err = 0;
out:
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 380a543c5b19..de6e24d80eb6 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -29,6 +29,7 @@ enum {
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
+ THE_NILFS_PURGING, /* disposing dirty files for cleanup */
};
/**
@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
/*
* Mount option operations
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 52ccd34b1e79..a026dbd3593f 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -272,7 +272,7 @@ int unregister_nls(struct nls_table * nls)
return -EINVAL;
}
-static struct nls_table *find_nls(char *charset)
+static struct nls_table *find_nls(const char *charset)
{
struct nls_table *nls;
spin_lock(&nls_lock);
@@ -288,7 +288,7 @@ static struct nls_table *find_nls(char *charset)
return nls;
}
-struct nls_table *load_nls(char *charset)
+struct nls_table *load_nls(const char *charset)
{
return try_then_request_module(find_nls(charset), "nls_%s", charset);
}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 046f5e3c9622..c7cf0913229c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4722,7 +4722,7 @@ int ocfs2_insert_extent(handle_t *handle,
struct ocfs2_alloc_context *meta_ac)
{
int status;
- int uninitialized_var(free_records);
+ int free_records;
struct buffer_head *last_eb_bh = NULL;
struct ocfs2_insert_type insert = {0, };
struct ocfs2_extent_rec rec;
@@ -7052,7 +7052,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
- u64 uninitialized_var(block);
+ u64 block;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b6948813eb06..1353db3f7f48 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2003,11 +2003,25 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (unlikely(copied < len) && wc->w_target_page) {
+ loff_t new_isize;
+
if (!PageUptodate(wc->w_target_page))
copied = 0;
- ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
- start+len);
+ new_isize = max_t(loff_t, i_size_read(inode), pos + copied);
+ if (new_isize > page_offset(wc->w_target_page))
+ ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
+ start+len);
+ else {
+ /*
+ * When page is fully beyond new isize (data copy
+ * failed), do not bother zeroing the page. Invalidate
+ * it instead so that writeback does not get confused
+ * put page & buffer dirty bits into inconsistent
+ * state.
+ */
+ block_invalidatepage(wc->w_target_page, 0, PAGE_SIZE);
+ }
}
if (wc->w_target_page)
flush_dcache_page(wc->w_target_page);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c121abbdfc7d..13f4bb4e174c 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -866,9 +866,9 @@ static int ocfs2_dx_dir_lookup(struct inode *inode,
u64 *ret_phys_blkno)
{
int ret = 0;
- unsigned int cend, uninitialized_var(clen);
- u32 uninitialized_var(cpos);
- u64 uninitialized_var(blkno);
+ unsigned int cend, clen;
+ u32 cpos;
+ u64 blkno;
u32 name_hash = hinfo->major_hash;
ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno,
@@ -912,7 +912,7 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
struct ocfs2_dir_lookup_result *res)
{
int ret, i, found;
- u64 uninitialized_var(phys);
+ u64 phys;
struct buffer_head *dx_leaf_bh = NULL;
struct ocfs2_dx_leaf *dx_leaf;
struct ocfs2_dx_entry *dx_entry = NULL;
@@ -4420,9 +4420,9 @@ out:
int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
{
int ret;
- unsigned int uninitialized_var(clen);
- u32 major_hash = UINT_MAX, p_cpos, uninitialized_var(cpos);
- u64 uninitialized_var(blkno);
+ unsigned int clen;
+ u32 major_hash = UINT_MAX, p_cpos, cpos;
+ u64 blkno;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct buffer_head *dx_root_bh = NULL;
struct ocfs2_dx_root_block *dx_root;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index d06e27ec4be4..fb181f6d6c06 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -704,10 +704,6 @@ struct dlm_begin_reco
__be32 pad2;
};
-
-#define BITS_PER_BYTE 8
-#define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE)
-
struct dlm_query_join_request
{
u8 node_idx;
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 06cb96462bf9..1f41171c1468 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -416,7 +416,7 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
{
int i, ret, tree_height, len;
struct ocfs2_dinode *di;
- struct ocfs2_extent_block *uninitialized_var(eb);
+ struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
struct ocfs2_extent_rec *rec;
struct buffer_head *eb_bh = NULL;
@@ -613,7 +613,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
unsigned int *extent_flags)
{
int ret;
- unsigned int uninitialized_var(hole_len), flags = 0;
+ unsigned int hole_len, flags = 0;
struct buffer_head *di_bh = NULL;
struct ocfs2_extent_rec rec;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0141298bb2e5..c1780b14d23d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2111,14 +2111,20 @@ static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
struct ocfs2_space_resv sr;
int change_size = 1;
int cmd = OCFS2_IOC_RESVSP64;
+ int ret = 0;
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
if (!ocfs2_writes_unwritten_extents(osb))
return -EOPNOTSUPP;
- if (mode & FALLOC_FL_KEEP_SIZE)
+ if (mode & FALLOC_FL_KEEP_SIZE) {
change_size = 0;
+ } else {
+ ret = inode_newsize_ok(inode, offset + len);
+ if (ret)
+ return ret;
+ }
if (mode & FALLOC_FL_PUNCH_HOLE)
cmd = OCFS2_IOC_UNRESVSP64;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 1565dd8e8856..fbbc30f20173 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -115,14 +115,6 @@ static int __ocfs2_move_extent(handle_t *handle,
*/
replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED;
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
- context->et.et_root_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
ret = ocfs2_split_extent(handle, &context->et, path, index,
&replace_rec, context->meta_ac,
&context->dealloc);
@@ -131,8 +123,6 @@ static int __ocfs2_move_extent(handle_t *handle,
goto out;
}
- ocfs2_journal_dirty(handle, context->et.et_root_bh);
-
context->new_phys_cpos = new_p_cpos;
/*
@@ -454,7 +444,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
bg = (struct ocfs2_group_desc *)gd_bh->b_data;
if (vict_blkno < (le64_to_cpu(bg->bg_blkno) +
- le16_to_cpu(bg->bg_bits))) {
+ (le16_to_cpu(bg->bg_bits) << bits_per_unit))) {
*ret_bh = gd_bh;
*vict_bit = (vict_blkno - blkno) >>
@@ -569,6 +559,7 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
last_free_bits++;
if (last_free_bits == move_len) {
+ i -= move_len;
*goal_bit = i;
*phys_cpos = base_cpos + i;
break;
@@ -1040,18 +1031,19 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
context->range = &range;
+ /*
+ * ok, the default theshold for the defragmentation
+ * is 1M, since our maximum clustersize was 1M also.
+ * any thought?
+ */
+ if (!range.me_threshold)
+ range.me_threshold = 1024 * 1024;
+
+ if (range.me_threshold > i_size_read(inode))
+ range.me_threshold = i_size_read(inode);
+
if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
context->auto_defrag = 1;
- /*
- * ok, the default theshold for the defragmentation
- * is 1M, since our maximum clustersize was 1M also.
- * any thought?
- */
- if (!range.me_threshold)
- range.me_threshold = 1024 * 1024;
-
- if (range.me_threshold > i_size_read(inode))
- range.me_threshold = i_size_read(inode);
if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG)
context->partial = 1;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d437b2192522..bb8483510327 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1538,6 +1538,10 @@ static int ocfs2_rename(struct inode *old_dir,
status = ocfs2_add_entry(handle, new_dentry, old_inode,
OCFS2_I(old_inode)->ip_blkno,
new_dir_bh, &target_insert);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
}
old_inode->i_ctime = current_time(old_inode);
@@ -2506,7 +2510,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
struct buffer_head *new_di_bh = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
- u64 uninitialized_var(di_blkno), suballoc_loc;
+ u64 di_blkno, suballoc_loc;
u16 suballoc_bit;
status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index fc197e599e8c..e184b36f8dd3 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1069,7 +1069,7 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
struct buffer_head **ret_bh)
{
int ret = 0, i, found;
- u32 low_cpos, uninitialized_var(cpos_end);
+ u32 low_cpos, cpos_end;
struct ocfs2_extent_list *el;
struct ocfs2_extent_rec *rec = NULL;
struct ocfs2_extent_block *eb = NULL;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index e7eb08ac4215..10d691530d83 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -715,6 +715,8 @@ static struct ctl_table_header *ocfs2_table_header;
static int __init ocfs2_stack_glue_init(void)
{
+ int ret;
+
strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB);
ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
@@ -724,7 +726,11 @@ static int __init ocfs2_stack_glue_init(void)
return -ENOMEM; /* or something. */
}
- return ocfs2_sysfs_init();
+ ret = ocfs2_sysfs_init();
+ if (ret)
+ unregister_sysctl_table(ocfs2_table_header);
+
+ return ret;
}
static void __exit ocfs2_stack_glue_exit(void)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7a08053d95c2..4fd99ef7f334 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -985,8 +985,10 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (!sb_has_quota_loaded(sb, type))
continue;
- oinfo = sb_dqinfo(sb, type)->dqi_priv;
- cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ if (!sb_has_quota_suspended(sb, type)) {
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ }
inode = igrab(sb->s_dquot.files[type]);
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index c146e12a8601..54d881c9ac81 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1219,7 +1219,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
struct ocfs2_xattr_value_root *xv;
size_t size;
int ret = -ENODATA, name_offset, name_len, i;
- int uninitialized_var(block_off);
+ int block_off;
xs->bucket = ocfs2_xattr_bucket_new(inode);
if (!xs->bucket) {
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index bf83e6644333..ce59b2fb50c7 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -220,7 +220,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
struct buffer_head *bh;
sector_t next, offset;
int ret;
- u64 uninitialized_var(new_block);
+ u64 new_block;
u32 max_extents;
int extent_count;
struct omfs_extent *oe;
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index e24738c691f6..f79c015fa7cb 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -254,6 +254,8 @@ out:
void orangefs_debugfs_cleanup(void)
{
debugfs_remove_recursive(debug_dir);
+ kfree(debug_help_string);
+ debug_help_string = NULL;
}
/* open ORANGEFS_KMOD_DEBUG_HELP_FILE */
@@ -709,6 +711,7 @@ int orangefs_prepare_debugfs_help_string(int at_boot)
memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE);
strlcat(debug_help_string, new, string_size);
mutex_unlock(&orangefs_help_file_lock);
+ kfree(new);
}
rc = 0;
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index 85ef87245a87..c8818163e392 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -141,7 +141,7 @@ static int __init orangefs_init(void)
gossip_err("%s: could not initialize device subsystem %d!\n",
__func__,
ret);
- goto cleanup_device;
+ goto cleanup_sysfs;
}
ret = register_filesystem(&orangefs_fs_type);
@@ -153,11 +153,11 @@ static int __init orangefs_init(void)
goto out;
}
- orangefs_sysfs_exit();
-
-cleanup_device:
orangefs_dev_cleanup();
+cleanup_sysfs:
+ orangefs_sysfs_exit();
+
sysfs_init_failed:
debugfs_init_failed:
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 30abafcd4ecc..debcac35a51d 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -195,7 +195,7 @@ static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
{
struct iattr attr = {
.ia_valid =
- ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
+ ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_CTIME,
.ia_atime = stat->atime,
.ia_mtime = stat->mtime,
};
@@ -713,7 +713,7 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
struct path upperpath, datapath;
int err;
char *capability = NULL;
- ssize_t uninitialized_var(cap_size);
+ ssize_t cap_size;
ovl_path_upper(c->dentry, &upperpath);
if (WARN_ON(upperpath.dentry == NULL))
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 06dc649629c7..9fa64dbde97a 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -561,28 +561,42 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
goto out_revert_creds;
}
- err = -ENOMEM;
- override_cred = prepare_creds();
- if (override_cred) {
+ if (!attr->hardlink) {
+ err = -ENOMEM;
+ override_cred = prepare_creds();
+ if (!override_cred)
+ goto out_revert_creds;
+ /*
+ * In the creation cases(create, mkdir, mknod, symlink),
+ * ovl should transfer current's fs{u,g}id to underlying
+ * fs. Because underlying fs want to initialize its new
+ * inode owner using current's fs{u,g}id. And in this
+ * case, the @inode is a new inode that is initialized
+ * in inode_init_owner() to current's fs{u,g}id. So use
+ * the inode's i_{u,g}id to override the cred's fs{u,g}id.
+ *
+ * But in the other hardlink case, ovl_link() does not
+ * create a new inode, so just use the ovl mounter's
+ * fs{u,g}id.
+ */
override_cred->fsuid = inode->i_uid;
override_cred->fsgid = inode->i_gid;
- if (!attr->hardlink) {
- err = security_dentry_create_files_as(dentry,
- attr->mode, &dentry->d_name, old_cred,
- override_cred);
- if (err) {
- put_cred(override_cred);
- goto out_revert_creds;
- }
+ err = security_dentry_create_files_as(dentry,
+ attr->mode, &dentry->d_name, old_cred,
+ override_cred);
+ if (err) {
+ put_cred(override_cred);
+ goto out_revert_creds;
}
put_cred(override_creds(override_cred));
put_cred(override_cred);
-
- if (!ovl_dentry_is_whiteout(dentry))
- err = ovl_create_upper(dentry, inode, attr);
- else
- err = ovl_create_over_whiteout(dentry, inode, attr);
}
+
+ if (!ovl_dentry_is_whiteout(dentry))
+ err = ovl_create_upper(dentry, inode, attr);
+ else
+ err = ovl_create_over_whiteout(dentry, inode, attr);
+
out_revert_creds:
revert_creds(old_cred);
return err;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index badf039267a2..e51dc7f16596 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -203,7 +203,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
int err;
bool last_element = !post[0];
- this = lookup_one_len_unlocked(name, base, namelen);
+ this = lookup_positive_unlocked(name, base, namelen);
if (IS_ERR(this)) {
err = PTR_ERR(this);
this = NULL;
@@ -211,8 +211,6 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
goto out;
goto out_err;
}
- if (!this->d_inode)
- goto put_and_out;
if (ovl_dentry_weird(this)) {
/* Don't support traversing automounts and other weirdness */
@@ -654,7 +652,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
if (err)
return ERR_PTR(err);
- index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
kfree(name.name);
if (IS_ERR(index)) {
if (PTR_ERR(index) == -ENOENT)
@@ -662,9 +660,7 @@ struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
return index;
}
- if (d_is_negative(index))
- err = 0;
- else if (ovl_is_whiteout(index))
+ if (ovl_is_whiteout(index))
err = -ESTALE;
else if (ovl_dentry_weird(index))
err = -EIO;
@@ -688,7 +684,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
if (err)
return ERR_PTR(err);
- index = lookup_one_len_unlocked(name.name, ofs->indexdir, name.len);
+ index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
if (IS_ERR(index)) {
err = PTR_ERR(index);
if (err == -ENOENT) {
@@ -703,9 +699,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
}
inode = d_inode(index);
- if (d_is_negative(index)) {
- goto out_dput;
- } else if (ovl_is_whiteout(index) && !verify) {
+ if (ovl_is_whiteout(index) && !verify) {
/*
* When index lookup is called with !verify for decoding an
* overlay file handle, a whiteout index implies that decode
@@ -1134,7 +1128,7 @@ bool ovl_lower_positive(struct dentry *dentry)
struct dentry *this;
struct dentry *lowerdir = poe->lowerstack[i].dentry;
- this = lookup_one_len_unlocked(name->name, lowerdir,
+ this = lookup_positive_unlocked(name->name, lowerdir,
name->len);
if (IS_ERR(this)) {
switch (PTR_ERR(this)) {
@@ -1151,10 +1145,8 @@ bool ovl_lower_positive(struct dentry *dentry)
break;
}
} else {
- if (this->d_inode) {
- positive = !ovl_is_whiteout(this);
- done = true;
- }
+ positive = !ovl_is_whiteout(this);
+ done = true;
dput(this);
}
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 1a7a1e298885..1c1eb873e6ec 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -268,8 +268,8 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
return 0;
/*
- * If this is a sync(2) call or an emergency sync, all the super blocks
- * will be iterated, including upper_sb, so no need to do anything.
+ * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
+ * All the super blocks will be iterated, including upper_sb.
*
* If this is a syncfs(2) call, then we do need to call
* sync_filesystem() on upper_sb, but enough if we do it when being
@@ -1664,6 +1664,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_xattr = ovl_xattr_handlers;
sb->s_fs_info = ofs;
sb->s_flags |= SB_POSIXACL;
+ sb->s_iflags |= SB_I_SKIP_SYNC;
err = -ENOMEM;
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
diff --git a/fs/pnode.c b/fs/pnode.c
index 7910ae91f17e..d27b7b97c4c1 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -245,7 +245,7 @@ static int propagate_one(struct mount *m)
}
do {
struct mount *parent = last_source->mnt_parent;
- if (last_source == first_source)
+ if (peers(last_source, first_source))
break;
done = parent->mnt_master == p;
if (done && peers(n, parent))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e3f10c110b74..69f48794b550 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3338,7 +3338,8 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
}
static const struct inode_operations proc_tid_comm_inode_operations = {
- .permission = proc_tid_comm_permission,
+ .setattr = proc_setattr,
+ .permission = proc_tid_comm_permission,
};
/*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index c95f32b83a94..7c62a526506c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -13,6 +13,7 @@
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <linux/kmemleak.h>
#include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations;
@@ -1376,6 +1377,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab
}
EXPORT_SYMBOL(register_sysctl);
+/**
+ * __register_sysctl_init() - register sysctl table to path
+ * @path: path name for sysctl base
+ * @table: This is the sysctl table that needs to be registered to the path
+ * @table_name: The name of sysctl table, only used for log printing when
+ * registration fails
+ *
+ * The sysctl interface is used by userspace to query or modify at runtime
+ * a predefined value set on a variable. These variables however have default
+ * values pre-set. Code which depends on these variables will always work even
+ * if register_sysctl() fails. If register_sysctl() fails you'd just loose the
+ * ability to query or modify the sysctls dynamically at run time. Chances of
+ * register_sysctl() failing on init are extremely low, and so for both reasons
+ * this function does not return any error as it is used by initialization code.
+ *
+ * Context: Can only be called after your respective sysctl base path has been
+ * registered. So for instance, most base directories are registered early on
+ * init before init levels are processed through proc_sys_init() and
+ * sysctl_init().
+ */
+void __init __register_sysctl_init(const char *path, struct ctl_table *table,
+ const char *table_name)
+{
+ struct ctl_table_header *hdr = register_sysctl(path, table);
+
+ if (unlikely(!hdr)) {
+ pr_err("failed when register_sysctl %s to %s\n", table_name, path);
+ return;
+ }
+ kmemleak_not_leak(hdr);
+}
+
static char *append_path(const char *path, char *pos, const char *name)
{
int namelen;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dc63d4c6029e..2f2afc3c6fc6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -701,9 +701,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
page = device_private_entry_to_page(swpent);
}
if (page) {
- int mapcount = page_mapcount(page);
-
- if (mapcount >= 2)
+ if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 503086f7f7c1..d5fb6d95d4d4 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -117,6 +117,7 @@ config PSTORE_CONSOLE
config PSTORE_PMSG
bool "Log user space messages"
depends on PSTORE
+ select RT_MUTEXES
help
When the option is enabled, pstore will export a character
interface /dev/pmsg0 to log user space messages. On reboot
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index bafbab2dd039..33294dee7d7f 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -753,6 +753,7 @@ static int ramoops_probe(struct platform_device *pdev)
/* Make sure we didn't get bogus platform data pointer. */
if (!pdata) {
pr_err("NULL platform data\n");
+ err = -EINVAL;
goto fail_out;
}
@@ -760,6 +761,7 @@ static int ramoops_probe(struct platform_device *pdev)
!pdata->ftrace_size && !pdata->pmsg_size)) {
pr_err("The memory size and the record/console size must be "
"non-zero\n");
+ err = -EINVAL;
goto fail_out;
}
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 3c777ec80d47..a6e5022469ab 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -189,7 +189,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
{
int numerr;
struct persistent_ram_buffer *buffer = prz->buffer;
- int ecc_blocks;
+ size_t ecc_blocks;
size_t ecc_total;
if (!ecc_info || !ecc_info->ecc_size)
@@ -426,7 +426,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size,
phys_addr_t addr = page_start + i * PAGE_SIZE;
pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
}
- vaddr = vmap(pages, page_count, VM_MAP, prot);
+ /*
+ * VM_IOREMAP used here to bypass this region during vread()
+ * and kmap_atomic() (i.e. kcore) to avoid __va() failures.
+ */
+ vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot);
kfree(pages);
/*
@@ -496,7 +500,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
sig ^= PERSISTENT_RAM_SIG;
if (prz->buffer->sig == sig) {
- if (buffer_size(prz) == 0) {
+ if (buffer_size(prz) == 0 && buffer_start(prz) == 0) {
pr_debug("found existing empty buffer\n");
return 0;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ddb379abd919..868936076f41 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -223,18 +223,31 @@ static void put_quota_format(struct quota_format_type *fmt)
/*
* Dquot List Management:
- * The quota code uses three lists for dquot management: the inuse_list,
- * free_dquots, and dquot_hash[] array. A single dquot structure may be
- * on all three lists, depending on its current state.
+ * The quota code uses five lists for dquot management: the inuse_list,
+ * releasing_dquots, free_dquots, dqi_dirty_list, and dquot_hash[] array.
+ * A single dquot structure may be on some of those lists, depending on
+ * its current state.
*
* All dquots are placed to the end of inuse_list when first created, and this
* list is used for invalidate operation, which must look at every dquot.
*
- * Unused dquots (dq_count == 0) are added to the free_dquots list when freed,
- * and this list is searched whenever we need an available dquot. Dquots are
- * removed from the list as soon as they are used again, and
- * dqstats.free_dquots gives the number of dquots on the list. When
- * dquot is invalidated it's completely released from memory.
+ * When the last reference of a dquot is dropped, the dquot is added to
+ * releasing_dquots. We'll then queue work item which will call
+ * synchronize_srcu() and after that perform the final cleanup of all the
+ * dquots on the list. Each cleaned up dquot is moved to free_dquots list.
+ * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot
+ * struct.
+ *
+ * Unused and cleaned up dquots are in the free_dquots list and this list is
+ * searched whenever we need an available dquot. Dquots are removed from the
+ * list as soon as they are used again and dqstats.free_dquots gives the number
+ * of dquots on the list. When dquot is invalidated it's completely released
+ * from memory.
+ *
+ * Dirty dquots are added to the dqi_dirty_list of quota_info when mark
+ * dirtied, and this list is searched when writing dirty dquots back to
+ * quota file. Note that some filesystems do dirty dquot tracking on their
+ * own (e.g. in a journal) and thus don't use dqi_dirty_list.
*
* Dquots with a specific identity (device, type and id) are placed on
* one of the dquot_hash[] hash chains. The provides an efficient search
@@ -243,6 +256,7 @@ static void put_quota_format(struct quota_format_type *fmt)
static LIST_HEAD(inuse_list);
static LIST_HEAD(free_dquots);
+static LIST_HEAD(releasing_dquots);
static unsigned int dq_hash_bits, dq_hash_mask;
static struct hlist_head *dquot_hash;
@@ -253,6 +267,9 @@ static qsize_t inode_get_rsv_space(struct inode *inode);
static qsize_t __inode_get_rsv_space(struct inode *inode);
static int __dquot_initialize(struct inode *inode, int type);
+static void quota_release_workfn(struct work_struct *work);
+static DECLARE_DELAYED_WORK(quota_release_work, quota_release_workfn);
+
static inline unsigned int
hashfn(const struct super_block *sb, struct kqid qid)
{
@@ -300,12 +317,21 @@ static inline void put_dquot_last(struct dquot *dquot)
dqstats_inc(DQST_FREE_DQUOTS);
}
+static inline void put_releasing_dquots(struct dquot *dquot)
+{
+ list_add_tail(&dquot->dq_free, &releasing_dquots);
+ set_bit(DQ_RELEASING_B, &dquot->dq_flags);
+}
+
static inline void remove_free_dquot(struct dquot *dquot)
{
if (list_empty(&dquot->dq_free))
return;
list_del_init(&dquot->dq_free);
- dqstats_dec(DQST_FREE_DQUOTS);
+ if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags))
+ dqstats_dec(DQST_FREE_DQUOTS);
+ else
+ clear_bit(DQ_RELEASING_B, &dquot->dq_flags);
}
static inline void put_inuse(struct dquot *dquot)
@@ -331,6 +357,11 @@ static void wait_on_dquot(struct dquot *dquot)
mutex_unlock(&dquot->dq_lock);
}
+static inline int dquot_active(struct dquot *dquot)
+{
+ return test_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+}
+
static inline int dquot_dirty(struct dquot *dquot)
{
return test_bit(DQ_MOD_B, &dquot->dq_flags);
@@ -346,14 +377,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
{
int ret = 1;
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ if (!dquot_active(dquot))
return 0;
if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NOLIST_DIRTY)
return test_and_set_bit(DQ_MOD_B, &dquot->dq_flags);
/* If quota is dirty already, we don't have to acquire dq_list_lock */
- if (test_bit(DQ_MOD_B, &dquot->dq_flags))
+ if (dquot_dirty(dquot))
return 1;
spin_lock(&dq_list_lock);
@@ -432,7 +463,7 @@ int dquot_acquire(struct dquot *dquot)
smp_mb__before_atomic();
set_bit(DQ_READ_B, &dquot->dq_flags);
/* Instantiate dquot if needed */
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) {
+ if (!dquot_active(dquot) && !dquot->dq_off) {
ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
/* Write the info if needed */
if (info_dirty(&dqopt->info[dquot->dq_id.type])) {
@@ -471,7 +502,7 @@ int dquot_commit(struct dquot *dquot)
goto out_lock;
/* Inactive dquot can be only if there was error during read/init
* => we have better not writing it */
- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ if (dquot_active(dquot))
ret = dqopt->ops[dquot->dq_id.type]->commit_dqblk(dquot);
else
ret = -EIO;
@@ -532,6 +563,8 @@ static void invalidate_dquots(struct super_block *sb, int type)
struct dquot *dquot, *tmp;
restart:
+ flush_delayed_work(&quota_release_work);
+
spin_lock(&dq_list_lock);
list_for_each_entry_safe(dquot, tmp, &inuse_list, dq_inuse) {
if (dquot->dq_sb != sb)
@@ -540,7 +573,7 @@ restart:
continue;
/* Wait for dquot users */
if (atomic_read(&dquot->dq_count)) {
- dqgrab(dquot);
+ atomic_inc(&dquot->dq_count);
spin_unlock(&dq_list_lock);
/*
* Once dqput() wakes us up, we know it's time to free
@@ -559,6 +592,15 @@ restart:
goto restart;
}
/*
+ * The last user already dropped its reference but dquot didn't
+ * get fully cleaned up yet. Restart the scan which flushes the
+ * work cleaning up released dquots.
+ */
+ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
+ spin_unlock(&dq_list_lock);
+ goto restart;
+ }
+ /*
* Quota now has no users and it has been written on last
* dqput()
*/
@@ -582,14 +624,13 @@ int dquot_scan_active(struct super_block *sb,
spin_lock(&dq_list_lock);
list_for_each_entry(dquot, &inuse_list, dq_inuse) {
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ if (!dquot_active(dquot))
continue;
if (dquot->dq_sb != sb)
continue;
/* Now we have active dquot so we can just increase use count */
atomic_inc(&dquot->dq_count);
spin_unlock(&dq_list_lock);
- dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
/*
@@ -598,7 +639,7 @@ int dquot_scan_active(struct super_block *sb,
* outstanding call and recheck the DQ_ACTIVE_B after that.
*/
wait_on_dquot(dquot);
- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ if (dquot_active(dquot)) {
ret = fn(dquot, priv);
if (ret < 0)
goto out;
@@ -614,6 +655,18 @@ out:
}
EXPORT_SYMBOL(dquot_scan_active);
+static inline int dquot_write_dquot(struct dquot *dquot)
+{
+ int ret = dquot->dq_sb->dq_op->write_dquot(dquot);
+ if (ret < 0) {
+ quota_error(dquot->dq_sb, "Can't write quota structure "
+ "(error %d). Quota may get out of sync!", ret);
+ /* Clear dirty bit anyway to avoid infinite loop. */
+ clear_dquot_dirty(dquot);
+ }
+ return ret;
+}
+
/* Write all dquot structures to quota files */
int dquot_writeback_dquots(struct super_block *sb, int type)
{
@@ -637,24 +690,23 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
dquot = list_first_entry(&dirty, struct dquot,
dq_dirty);
- WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags));
+ WARN_ON(!dquot_active(dquot));
+ /* If the dquot is releasing we should not touch it */
+ if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) {
+ spin_unlock(&dq_list_lock);
+ flush_delayed_work(&quota_release_work);
+ spin_lock(&dq_list_lock);
+ continue;
+ }
/* Now we have active dquot from which someone is
* holding reference so we can safely just increase
* use count */
dqgrab(dquot);
spin_unlock(&dq_list_lock);
- dqstats_inc(DQST_LOOKUPS);
- err = sb->dq_op->write_dquot(dquot);
- if (err) {
- /*
- * Clear dirty bit anyway to avoid infinite
- * loop here.
- */
- clear_dquot_dirty(dquot);
- if (!ret)
- ret = err;
- }
+ err = dquot_write_dquot(dquot);
+ if (err && !ret)
+ ret = err;
dqput(dquot);
spin_lock(&dq_list_lock);
}
@@ -748,12 +800,52 @@ static struct shrinker dqcache_shrinker = {
};
/*
+ * Safely release dquot and put reference to dquot.
+ */
+static void quota_release_workfn(struct work_struct *work)
+{
+ struct dquot *dquot;
+ struct list_head rls_head;
+
+ spin_lock(&dq_list_lock);
+ /* Exchange the list head to avoid livelock. */
+ list_replace_init(&releasing_dquots, &rls_head);
+ spin_unlock(&dq_list_lock);
+ synchronize_srcu(&dquot_srcu);
+
+restart:
+ spin_lock(&dq_list_lock);
+ while (!list_empty(&rls_head)) {
+ dquot = list_first_entry(&rls_head, struct dquot, dq_free);
+ WARN_ON_ONCE(atomic_read(&dquot->dq_count));
+ /*
+ * Note that DQ_RELEASING_B protects us from racing with
+ * invalidate_dquots() calls so we are safe to work with the
+ * dquot even after we drop dq_list_lock.
+ */
+ if (dquot_dirty(dquot)) {
+ spin_unlock(&dq_list_lock);
+ /* Commit dquot before releasing */
+ dquot_write_dquot(dquot);
+ goto restart;
+ }
+ if (dquot_active(dquot)) {
+ spin_unlock(&dq_list_lock);
+ dquot->dq_sb->dq_op->release_dquot(dquot);
+ goto restart;
+ }
+ /* Dquot is inactive and clean, now move it to free list */
+ remove_free_dquot(dquot);
+ put_dquot_last(dquot);
+ }
+ spin_unlock(&dq_list_lock);
+}
+
+/*
* Put reference to dquot
*/
void dqput(struct dquot *dquot)
{
- int ret;
-
if (!dquot)
return;
#ifdef CONFIG_QUOTA_DEBUG
@@ -765,7 +857,7 @@ void dqput(struct dquot *dquot)
}
#endif
dqstats_inc(DQST_DROPS);
-we_slept:
+
spin_lock(&dq_list_lock);
if (atomic_read(&dquot->dq_count) > 1) {
/* We have more than one user... nothing to do */
@@ -777,35 +869,16 @@ we_slept:
spin_unlock(&dq_list_lock);
return;
}
+
/* Need to release dquot? */
- if (dquot_dirty(dquot)) {
- spin_unlock(&dq_list_lock);
- /* Commit dquot before releasing */
- ret = dquot->dq_sb->dq_op->write_dquot(dquot);
- if (ret < 0) {
- quota_error(dquot->dq_sb, "Can't write quota structure"
- " (error %d). Quota may get out of sync!",
- ret);
- /*
- * We clear dirty bit anyway, so that we avoid
- * infinite loop here
- */
- clear_dquot_dirty(dquot);
- }
- goto we_slept;
- }
- if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
- spin_unlock(&dq_list_lock);
- dquot->dq_sb->dq_op->release_dquot(dquot);
- goto we_slept;
- }
- atomic_dec(&dquot->dq_count);
#ifdef CONFIG_QUOTA_DEBUG
/* sanity check */
BUG_ON(!list_empty(&dquot->dq_free));
#endif
- put_dquot_last(dquot);
+ put_releasing_dquots(dquot);
+ atomic_dec(&dquot->dq_count);
spin_unlock(&dq_list_lock);
+ queue_delayed_work(system_unbound_wq, &quota_release_work, 1);
}
EXPORT_SYMBOL(dqput);
@@ -892,10 +965,10 @@ we_slept:
dqstats_inc(DQST_LOOKUPS);
}
/* Wait for dq_lock - after this we know that either dquot_release() is
- * already finished or it will be canceled due to dq_count > 1 test */
+ * already finished or it will be canceled due to dq_count > 0 test */
wait_on_dquot(dquot);
/* Read the dquot / allocate space in quota file */
- if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ if (!dquot_active(dquot)) {
int err;
err = sb->dq_op->acquire_dquot(dquot);
@@ -1408,7 +1481,7 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
return QUOTA_NL_NOWARN;
}
-static int dquot_active(const struct inode *inode)
+static int inode_quota_active(const struct inode *inode)
{
struct super_block *sb = inode->i_sb;
@@ -1431,7 +1504,7 @@ static int __dquot_initialize(struct inode *inode, int type)
qsize_t rsv;
int ret = 0;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return 0;
dquots = i_dquot(inode);
@@ -1539,7 +1612,7 @@ bool dquot_initialize_needed(struct inode *inode)
struct dquot **dquots;
int i;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return false;
dquots = i_dquot(inode);
@@ -1650,7 +1723,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
int reserve = flags & DQUOT_SPACE_RESERVE;
struct dquot **dquots;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
if (reserve) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) += number;
@@ -1722,7 +1795,7 @@ int dquot_alloc_inode(struct inode *inode)
struct dquot_warn warn[MAXQUOTAS];
struct dquot * const *dquots;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warn[cnt].w_type = QUOTA_NL_NOWARN;
@@ -1765,7 +1838,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
struct dquot **dquots;
int cnt, index;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) -= number;
__inode_add_bytes(inode, number);
@@ -1807,7 +1880,7 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
struct dquot **dquots;
int cnt, index;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) += number;
__inode_sub_bytes(inode, number);
@@ -1851,7 +1924,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
struct dquot **dquots;
int reserve = flags & DQUOT_SPACE_RESERVE, index;
- if (!dquot_active(inode)) {
+ if (!inode_quota_active(inode)) {
if (reserve) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) -= number;
@@ -1906,7 +1979,7 @@ void dquot_free_inode(struct inode *inode)
struct dquot * const *dquots;
int index;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return;
dquots = i_dquot(inode);
@@ -2077,7 +2150,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
struct super_block *sb = inode->i_sb;
int ret;
- if (!dquot_active(inode))
+ if (!inode_quota_active(inode))
return 0;
if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)){
@@ -2298,28 +2371,76 @@ EXPORT_SYMBOL(dquot_quota_off);
* Turn quotas on on a device
*/
-/*
- * Helper function to turn quotas on when we already have the inode of
- * quota file and no quota information is loaded.
- */
-static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
+static int vfs_setup_quota_inode(struct inode *inode, int type)
+{
+ struct super_block *sb = inode->i_sb;
+ struct quota_info *dqopt = sb_dqopt(sb);
+
+ if (is_bad_inode(inode))
+ return -EUCLEAN;
+ if (!S_ISREG(inode->i_mode))
+ return -EACCES;
+ if (IS_RDONLY(inode))
+ return -EROFS;
+ if (sb_has_quota_loaded(sb, type))
+ return -EBUSY;
+
+ /*
+ * Quota files should never be encrypted. They should be thought of as
+ * filesystem metadata, not user data. New-style internal quota files
+ * cannot be encrypted by users anyway, but old-style external quota
+ * files could potentially be incorrectly created in an encrypted
+ * directory, hence this explicit check. Some reasons why encrypted
+ * quota files don't work include: (1) some filesystems that support
+ * encryption don't handle it in their quota_read and quota_write, and
+ * (2) cleaning up encrypted quota files at unmount would need special
+ * consideration, as quota files are cleaned up later than user files.
+ */
+ if (IS_ENCRYPTED(inode))
+ return -EINVAL;
+
+ dqopt->files[type] = igrab(inode);
+ if (!dqopt->files[type])
+ return -EIO;
+ if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+ /* We don't want quota and atime on quota files (deadlocks
+ * possible) Also nobody should write to the file - we use
+ * special IO operations which ignore the immutable bit. */
+ inode_lock(inode);
+ inode->i_flags |= S_NOQUOTA;
+ inode_unlock(inode);
+ /*
+ * When S_NOQUOTA is set, remove dquot references as no more
+ * references can be added
+ */
+ __dquot_drop(inode);
+ }
+ return 0;
+}
+
+static void vfs_cleanup_quota_inode(struct super_block *sb, int type)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ struct inode *inode = dqopt->files[type];
+
+ if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+ inode_lock(inode);
+ inode->i_flags &= ~S_NOQUOTA;
+ inode_unlock(inode);
+ }
+ dqopt->files[type] = NULL;
+ iput(inode);
+}
+
+int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
unsigned int flags)
{
struct quota_format_type *fmt = find_quota_format(format_id);
- struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
int error;
if (!fmt)
return -ESRCH;
- if (!S_ISREG(inode->i_mode)) {
- error = -EACCES;
- goto out_fmt;
- }
- if (IS_RDONLY(inode)) {
- error = -EROFS;
- goto out_fmt;
- }
if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
(type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
error = -EINVAL;
@@ -2351,27 +2472,9 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
invalidate_bdev(sb->s_bdev);
}
- if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
- /* We don't want quota and atime on quota files (deadlocks
- * possible) Also nobody should write to the file - we use
- * special IO operations which ignore the immutable bit. */
- inode_lock(inode);
- inode->i_flags |= S_NOQUOTA;
- inode_unlock(inode);
- /*
- * When S_NOQUOTA is set, remove dquot references as no more
- * references can be added
- */
- __dquot_drop(inode);
- }
-
- error = -EIO;
- dqopt->files[type] = igrab(inode);
- if (!dqopt->files[type])
- goto out_file_flags;
error = -EINVAL;
if (!fmt->qf_ops->check_quota_file(sb, type))
- goto out_file_init;
+ goto out_fmt;
dqopt->ops[type] = fmt->qf_ops;
dqopt->info[type].dqi_format = fmt;
@@ -2379,7 +2482,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list);
error = dqopt->ops[type]->read_file_info(sb, type);
if (error < 0)
- goto out_file_init;
+ goto out_fmt;
if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) {
spin_lock(&dq_data_lock);
dqopt->info[type].dqi_flags |= DQF_SYS_FILE;
@@ -2391,21 +2494,34 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
error = add_dquot_ref(sb, type);
if (error)
- dquot_disable(sb, type, flags);
+ dquot_disable(sb, type,
+ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
return error;
-out_file_init:
- dqopt->files[type] = NULL;
- iput(inode);
-out_file_flags:
- inode_lock(inode);
- inode->i_flags &= ~S_NOQUOTA;
- inode_unlock(inode);
out_fmt:
put_quota_format(fmt);
return error;
}
+EXPORT_SYMBOL(dquot_load_quota_sb);
+
+/*
+ * Helper function to turn quotas on when we already have the inode of
+ * quota file and no quota information is loaded.
+ */
+static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
+ unsigned int flags)
+{
+ int err;
+
+ err = vfs_setup_quota_inode(inode, type);
+ if (err < 0)
+ return err;
+ err = dquot_load_quota_sb(inode->i_sb, type, format_id, flags);
+ if (err < 0)
+ vfs_cleanup_quota_inode(inode->i_sb, type);
+ return err;
+}
/* Reenable quotas on remount RW */
int dquot_resume(struct super_block *sb, int type)
@@ -2506,21 +2622,15 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
struct dentry *dentry;
int error;
- dentry = lookup_one_len_unlocked(qf_name, sb->s_root, strlen(qf_name));
+ dentry = lookup_positive_unlocked(qf_name, sb->s_root, strlen(qf_name));
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (d_really_is_negative(dentry)) {
- error = -ENOENT;
- goto out;
- }
-
error = security_quota_on(dentry);
if (!error)
error = vfs_load_quota_inode(d_inode(dentry), type, format_id,
DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
-out:
dput(dentry);
return error;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 78be6dbcd762..3425a04bc8a0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2336,7 +2336,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
int i, j;
bh = __getblk(dev, block, bufsize);
- if (buffer_uptodate(bh))
+ if (!bh || buffer_uptodate(bh))
return (bh);
if (block + BUFNR > max_block) {
@@ -2346,6 +2346,8 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
j = 1;
for (i = 1; i < blocks; i++) {
bh = __getblk(dev, block + i, bufsize);
+ if (!bh)
+ break;
if (buffer_uptodate(bh)) {
brelse(bh);
break;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 959a066b7bb0..2843b7cf4d7a 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -695,6 +695,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
out_failed:
reiserfs_write_unlock(dir->i_sb);
+ reiserfs_security_free(&security);
return retval;
}
@@ -778,6 +779,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
out_failed:
reiserfs_write_unlock(dir->i_sb);
+ reiserfs_security_free(&security);
return retval;
}
@@ -876,6 +878,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(dir->i_sb);
+ reiserfs_security_free(&security);
return retval;
}
@@ -1191,6 +1194,7 @@ static int reiserfs_symlink(struct inode *parent_dir,
retval = journal_end(&th);
out_failed:
reiserfs_write_unlock(parent_dir->i_sb);
+ reiserfs_security_free(&security);
return retval;
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 831a542c22c6..e5be1d747c03 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1443,7 +1443,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
unsigned long safe_mask = 0;
unsigned int commit_max_age = (unsigned int)-1;
struct reiserfs_journal *journal = SB_JOURNAL(s);
- char *new_opts;
int err;
char *qf_names[REISERFS_MAXQUOTAS];
unsigned int qfmt = 0;
@@ -1451,10 +1450,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
int i;
#endif
- new_opts = kstrdup(arg, GFP_KERNEL);
- if (arg && !new_opts)
- return -ENOMEM;
-
sync_filesystem(s);
reiserfs_write_lock(s);
@@ -1605,7 +1600,6 @@ out_ok_unlocked:
out_err_unlock:
reiserfs_write_unlock(s);
out_err:
- kfree(new_opts);
return err;
}
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 20be9a0e5870..159af6c26f4b 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -49,6 +49,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
int error;
sec->name = NULL;
+ sec->value = NULL;
/* Don't add selinux attributes on xattrs - they'll never get used */
if (IS_PRIVATE(dir))
@@ -80,11 +81,15 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th,
struct inode *inode,
struct reiserfs_security_handle *sec)
{
+ char xattr_name[XATTR_NAME_MAX + 1] = XATTR_SECURITY_PREFIX;
int error;
- if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX))
+
+ if (XATTR_SECURITY_PREFIX_LEN + strlen(sec->name) > XATTR_NAME_MAX)
return -EINVAL;
- error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value,
+ strlcat(xattr_name, sec->name, sizeof(xattr_name));
+
+ error = reiserfs_xattr_set_handle(th, inode, xattr_name, sec->value,
sec->length, XATTR_CREATE);
if (error == -ENODATA || error == -EOPNOTSUPP)
error = 0;
@@ -94,7 +99,6 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th,
void reiserfs_security_free(struct reiserfs_security_handle *sec)
{
- kfree(sec->name);
kfree(sec->value);
sec->name = NULL;
sec->value = NULL;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 10e93345b615..75886e6d9c97 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -196,7 +196,7 @@ static inline int squashfs_block_size(__le32 raw)
#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
sizeof(u64))
/* xattr id lookup table defines */
-#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
+#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
#define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
SQUASHFS_METADATA_SIZE)
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 5234c19a0eab..7ec30a11273e 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -76,7 +76,7 @@ struct squashfs_sb_info {
long long bytes_used;
unsigned int inodes;
unsigned int fragments;
- int xattr_ids;
+ unsigned int xattr_ids;
unsigned int ids;
};
#endif
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 86b0a0073e51..f360f27e38f3 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -23,12 +23,12 @@
#ifdef CONFIG_SQUASHFS_XATTR
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
- u64 *, int *);
+ u64 *, unsigned int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
- u64 start, u64 *xattr_table_start, int *xattr_ids)
+ u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_xattr_id_table *id_table;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index 7f718d2bf357..fe266b3e95f8 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -69,7 +69,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
- u64 *xattr_table_start, int *xattr_ids)
+ u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
unsigned int len, indexes;
diff --git a/fs/statfs.c b/fs/statfs.c
index 56f655f757ff..29786598c2b5 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -114,6 +114,7 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
if (sizeof(buf) == sizeof(*st))
memcpy(&buf, st, sizeof(*st));
else {
+ memset(&buf, 0, sizeof(buf));
if (sizeof buf.f_blocks == 4) {
if ((st->f_blocks | st->f_bfree | st->f_bavail |
st->f_bsize | st->f_frsize) &
@@ -142,7 +143,6 @@ static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
buf.f_namelen = st->f_namelen;
buf.f_frsize = st->f_frsize;
buf.f_flags = st->f_flags;
- memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
if (copy_to_user(p, &buf, sizeof(buf)))
return -EFAULT;
@@ -155,6 +155,7 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
if (sizeof(buf) == sizeof(*st))
memcpy(&buf, st, sizeof(*st));
else {
+ memset(&buf, 0, sizeof(buf));
buf.f_type = st->f_type;
buf.f_bsize = st->f_bsize;
buf.f_blocks = st->f_blocks;
@@ -166,7 +167,6 @@ static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
buf.f_namelen = st->f_namelen;
buf.f_frsize = st->f_frsize;
buf.f_flags = st->f_flags;
- memset(buf.f_spare, 0, sizeof(buf.f_spare));
}
if (copy_to_user(p, &buf, sizeof(buf)))
return -EFAULT;
diff --git a/fs/sync.c b/fs/sync.c
index b54e0541ad89..917ebd12c251 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -76,7 +76,8 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg)
static void sync_fs_one_sb(struct super_block *sb, void *arg)
{
- if (!sb_rdonly(sb) && sb->s_op->sync_fs)
+ if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
+ sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, *(int *)arg);
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 011e391497f4..cd70dbeeab22 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -599,7 +599,7 @@ int sysfs_emit_at(char *buf, int at, const char *fmt, ...)
va_list args;
int len;
- if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE,
+ if (WARN(!buf || at < 0 || at >= PAGE_SIZE,
"invalid sysfs_emit_at: buf:%p at:%d\n", buf, at))
return 0;
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index bcb67b0cabe7..e3d1673b8ec9 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -145,6 +145,10 @@ static int alloc_branch(struct inode *inode,
*/
parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key);
bh = sb_getblk(inode->i_sb, parent);
+ if (!bh) {
+ sysv_free_block(inode->i_sb, branch[n].key);
+ break;
+ }
lock_buffer(bh);
memset(bh->b_data, 0, blocksize);
branch[n].bh = bh;
@@ -438,7 +442,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
res += blocks;
direct = 1;
}
- return blocks;
+ return res;
}
int sysv_getattr(const struct path *path, struct kstat *stat,
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 7ef22baf9d15..30c7bd63c2ad 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -224,11 +224,10 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
subtract_lebs += 1;
/*
- * The GC journal head LEB is not really accessible. And since
- * different write types go to different heads, we may count only on
- * one head's space.
+ * Since different write types go to different heads, we should
+ * reserve one leb for each head.
*/
- subtract_lebs += c->jhead_cnt - 1;
+ subtract_lebs += c->jhead_cnt;
/* We also reserve one LEB for deletions, which bypass budgeting */
subtract_lebs += 1;
@@ -415,7 +414,7 @@ static int calc_dd_growth(const struct ubifs_info *c,
dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
if (req->dirtied_ino)
- dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
+ dd_growth += c->bi.inode_budget * req->dirtied_ino;
if (req->mod_dent)
dd_growth += c->bi.dent_budget;
dd_growth += req->dirtied_ino_d;
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 591f2c7a48f0..583e20787689 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -564,11 +564,11 @@ out:
*/
int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
{
- int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
+ int lnum, offs, len, err = 0, last_level, child_cnt;
int first = 1, iip;
struct ubifs_debug_info *d = c->dbg;
- union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
- unsigned long long uninitialized_var(last_sqnum);
+ union ubifs_key lower_key, upper_key, l_key, u_key;
+ unsigned long long last_sqnum;
struct ubifs_idx_node *idx;
struct list_head list;
struct idx_node *i;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 111905ddbfc2..39296e801242 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -445,6 +445,7 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry,
mutex_unlock(&dir_ui->ui_mutex);
ubifs_release_budget(c, &req);
+ fscrypt_free_filename(&nm);
return 0;
@@ -1141,7 +1142,6 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
int err, sz_change, len = strlen(symname);
struct fscrypt_str disk_link;
struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
- .new_ino_d = ALIGN(len, 8),
.dirtied_ino = 1 };
struct fscrypt_name nm;
@@ -1157,6 +1157,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
* Budget request settings: new inode, new direntry and changing parent
* directory inode.
*/
+ req.new_ino_d = ALIGN(disk_link.len - 1, 8);
err = ubifs_budget_space(c, &req);
if (err)
return err;
@@ -1293,7 +1294,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
struct timespec64 time;
- unsigned int uninitialized_var(saved_nlink);
+ unsigned int saved_nlink;
struct fscrypt_name old_nm, new_nm;
/*
@@ -1309,9 +1310,13 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dentry, old_inode->i_ino, old_dir->i_ino,
new_dentry, new_dir->i_ino, flags);
- if (unlink)
+ if (unlink) {
ubifs_assert(c, inode_is_locked(new_inode));
+ /* Budget for old inode's data when its nlink > 1. */
+ req.dirtied_ino_d = ALIGN(ubifs_inode(new_inode)->data_len, 8);
+ }
+
if (unlink && is_dir) {
err = ubifs_check_dir_empty(new_inode);
if (err)
@@ -1549,6 +1554,10 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
return err;
}
+ err = ubifs_budget_space(c, &req);
+ if (err)
+ goto out;
+
lock_4_inodes(old_dir, new_dir, NULL, NULL);
time = current_time(old_dir);
@@ -1574,6 +1583,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
unlock_4_inodes(old_dir, new_dir, NULL, NULL);
ubifs_release_budget(c, &req);
+out:
fscrypt_free_filename(&fst_nm);
fscrypt_free_filename(&snd_nm);
return err;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 3dbb5ac630e4..fca3b7f483c7 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -234,7 +234,7 @@ static int write_begin_slow(struct address_space *mapping,
struct ubifs_info *c = inode->i_sb->s_fs_info;
pgoff_t index = pos >> PAGE_SHIFT;
struct ubifs_budget_req req = { .new_page = 1 };
- int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+ int err, appending = !!(pos + len > inode->i_size);
struct page *page;
dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
@@ -438,7 +438,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
pgoff_t index = pos >> PAGE_SHIFT;
- int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+ int err, appending = !!(pos + len > inode->i_size);
int skipped_read = 0;
struct page *page;
@@ -1043,7 +1043,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
if (page->index >= synced_i_size >> PAGE_SHIFT) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
- goto out_unlock;
+ goto out_redirty;
/*
* The inode has been written, but the write-buffer has
* not been synchronized, so in case of an unclean
@@ -1071,11 +1071,17 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
if (i_size > synced_i_size) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
- goto out_unlock;
+ goto out_redirty;
}
return do_writepage(page, len);
-
+out_redirty:
+ /*
+ * redirty_page_for_writepage() won't call ubifs_dirty_inode() because
+ * it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so
+ * there is no need to do space budget for dirty inode.
+ */
+ redirty_page_for_writepage(wbc, page);
out_unlock:
unlock_page(page);
return err;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 802565a17733..0a60a065c7e8 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1355,7 +1355,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
union ubifs_key key, to_key;
struct ubifs_ino_node *ino;
struct ubifs_trun_node *trun;
- struct ubifs_data_node *uninitialized_var(dn);
+ struct ubifs_data_node *dn;
int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode);
struct ubifs_inode *ui = ubifs_inode(inode);
ino_t inum = inode->i_ino;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 31393370e334..433bfcddc497 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -287,7 +287,7 @@ uint32_t ubifs_unpack_bits(const struct ubifs_info *c, uint8_t **addr, int *pos,
const int k = 32 - nrbits;
uint8_t *p = *addr;
int b = *pos;
- uint32_t uninitialized_var(val);
+ uint32_t val;
const int bytes = (nrbits + b + 7) >> 3;
ubifs_assert(c, nrbits > 0);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f15ac37956e7..330ccf72745b 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -56,6 +56,33 @@ enum {
NOT_ON_MEDIA = 3,
};
+static void do_insert_old_idx(struct ubifs_info *c,
+ struct ubifs_old_idx *old_idx)
+{
+ struct ubifs_old_idx *o;
+ struct rb_node **p, *parent = NULL;
+
+ p = &c->old_idx.rb_node;
+ while (*p) {
+ parent = *p;
+ o = rb_entry(parent, struct ubifs_old_idx, rb);
+ if (old_idx->lnum < o->lnum)
+ p = &(*p)->rb_left;
+ else if (old_idx->lnum > o->lnum)
+ p = &(*p)->rb_right;
+ else if (old_idx->offs < o->offs)
+ p = &(*p)->rb_left;
+ else if (old_idx->offs > o->offs)
+ p = &(*p)->rb_right;
+ else {
+ ubifs_err(c, "old idx added twice!");
+ kfree(old_idx);
+ }
+ }
+ rb_link_node(&old_idx->rb, parent, p);
+ rb_insert_color(&old_idx->rb, &c->old_idx);
+}
+
/**
* insert_old_idx - record an index node obsoleted since the last commit start.
* @c: UBIFS file-system description object
@@ -81,35 +108,15 @@ enum {
*/
static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
{
- struct ubifs_old_idx *old_idx, *o;
- struct rb_node **p, *parent = NULL;
+ struct ubifs_old_idx *old_idx;
old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
if (unlikely(!old_idx))
return -ENOMEM;
old_idx->lnum = lnum;
old_idx->offs = offs;
+ do_insert_old_idx(c, old_idx);
- p = &c->old_idx.rb_node;
- while (*p) {
- parent = *p;
- o = rb_entry(parent, struct ubifs_old_idx, rb);
- if (lnum < o->lnum)
- p = &(*p)->rb_left;
- else if (lnum > o->lnum)
- p = &(*p)->rb_right;
- else if (offs < o->offs)
- p = &(*p)->rb_left;
- else if (offs > o->offs)
- p = &(*p)->rb_right;
- else {
- ubifs_err(c, "old idx added twice!");
- kfree(old_idx);
- return 0;
- }
- }
- rb_link_node(&old_idx->rb, parent, p);
- rb_insert_color(&old_idx->rb, &c->old_idx);
return 0;
}
@@ -211,23 +218,6 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c,
__set_bit(DIRTY_ZNODE, &zn->flags);
__clear_bit(COW_ZNODE, &zn->flags);
- ubifs_assert(c, !ubifs_zn_obsolete(znode));
- __set_bit(OBSOLETE_ZNODE, &znode->flags);
-
- if (znode->level != 0) {
- int i;
- const int n = zn->child_cnt;
-
- /* The children now have new parent */
- for (i = 0; i < n; i++) {
- struct ubifs_zbranch *zbr = &zn->zbranch[i];
-
- if (zbr->znode)
- zbr->znode->parent = zn;
- }
- }
-
- atomic_long_inc(&c->dirty_zn_cnt);
return zn;
}
@@ -246,6 +236,42 @@ static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt)
}
/**
+ * replace_znode - replace old znode with new znode.
+ * @c: UBIFS file-system description object
+ * @new_zn: new znode
+ * @old_zn: old znode
+ * @zbr: the branch of parent znode
+ *
+ * Replace old znode with new znode in TNC.
+ */
+static void replace_znode(struct ubifs_info *c, struct ubifs_znode *new_zn,
+ struct ubifs_znode *old_zn, struct ubifs_zbranch *zbr)
+{
+ ubifs_assert(c, !ubifs_zn_obsolete(old_zn));
+ __set_bit(OBSOLETE_ZNODE, &old_zn->flags);
+
+ if (old_zn->level != 0) {
+ int i;
+ const int n = new_zn->child_cnt;
+
+ /* The children now have new parent */
+ for (i = 0; i < n; i++) {
+ struct ubifs_zbranch *child = &new_zn->zbranch[i];
+
+ if (child->znode)
+ child->znode->parent = new_zn;
+ }
+ }
+
+ zbr->znode = new_zn;
+ zbr->lnum = 0;
+ zbr->offs = 0;
+ zbr->len = 0;
+
+ atomic_long_inc(&c->dirty_zn_cnt);
+}
+
+/**
* dirty_cow_znode - ensure a znode is not being committed.
* @c: UBIFS file-system description object
* @zbr: branch of znode to check
@@ -277,21 +303,32 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
return zn;
if (zbr->len) {
- err = insert_old_idx(c, zbr->lnum, zbr->offs);
- if (unlikely(err))
- return ERR_PTR(err);
+ struct ubifs_old_idx *old_idx;
+
+ old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
+ if (unlikely(!old_idx)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ old_idx->lnum = zbr->lnum;
+ old_idx->offs = zbr->offs;
+
err = add_idx_dirt(c, zbr->lnum, zbr->len);
- } else
- err = 0;
+ if (err) {
+ kfree(old_idx);
+ goto out;
+ }
- zbr->znode = zn;
- zbr->lnum = 0;
- zbr->offs = 0;
- zbr->len = 0;
+ do_insert_old_idx(c, old_idx);
+ }
+
+ replace_znode(c, zn, znode, zbr);
- if (unlikely(err))
- return ERR_PTR(err);
return zn;
+
+out:
+ kfree(zn);
+ return ERR_PTR(err);
}
/**
@@ -899,7 +936,7 @@ static int fallible_resolve_collision(struct ubifs_info *c,
int adding)
{
struct ubifs_znode *o_znode = NULL, *znode = *zn;
- int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n;
+ int o_n, err, cmp, unsure = 0, nn = *n;
cmp = fallible_matches_name(c, &znode->zbranch[nn], nm);
if (unlikely(cmp < 0))
@@ -1521,8 +1558,8 @@ out:
*/
int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
{
- int n, err = 0, lnum = -1, uninitialized_var(offs);
- int uninitialized_var(len);
+ int n, err = 0, lnum = -1, offs;
+ int len;
unsigned int block = key_block(c, &bu->key);
struct ubifs_znode *znode;
@@ -3046,6 +3083,21 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
cnext = cnext->cnext;
if (ubifs_zn_obsolete(znode))
kfree(znode);
+ else if (!ubifs_zn_cow(znode)) {
+ /*
+ * Don't forget to update clean znode count after
+ * committing failed, because ubifs will check this
+ * count while closing tnc. Non-obsolete znode could
+ * be re-dirtied during committing process, so dirty
+ * flag is untrustable. The flag 'COW_ZNODE' is set
+ * for each dirty znode before committing, and it is
+ * cleared as long as the znode become clean, so we
+ * can statistic clean znode count according to this
+ * flag.
+ */
+ atomic_long_inc(&c->clean_zn_cnt);
+ atomic_long_inc(&ubifs_clean_zn_cnt);
+ }
} while (cnext && cnext != c->cnext);
}
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index d90ee01076a9..fe3b52d2749b 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -138,8 +138,8 @@ int ubifs_search_zbranch(const struct ubifs_info *c,
const struct ubifs_znode *znode,
const union ubifs_key *key, int *n)
{
- int beg = 0, end = znode->child_cnt, uninitialized_var(mid);
- int uninitialized_var(cmp);
+ int beg = 0, end = znode->child_cnt, mid;
+ int cmp;
const struct ubifs_zbranch *zbr = &znode->zbranch[0];
ubifs_assert(c, end > beg);
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index fcda0fc97b90..0dc98bbad9c4 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -36,18 +36,41 @@ static int read_block_bitmap(struct super_block *sb,
unsigned long bitmap_nr)
{
struct buffer_head *bh = NULL;
- int retval = 0;
+ int i;
+ int max_bits, off, count;
struct kernel_lb_addr loc;
loc.logicalBlockNum = bitmap->s_extPosition;
loc.partitionReferenceNum = UDF_SB(sb)->s_partition;
bh = udf_tread(sb, udf_get_lb_pblock(sb, &loc, block));
+ bitmap->s_block_bitmap[bitmap_nr] = bh;
if (!bh)
- retval = -EIO;
+ return -EIO;
- bitmap->s_block_bitmap[bitmap_nr] = bh;
- return retval;
+ /* Check consistency of Space Bitmap buffer. */
+ max_bits = sb->s_blocksize * 8;
+ if (!bitmap_nr) {
+ off = sizeof(struct spaceBitmapDesc) << 3;
+ count = min(max_bits - off, bitmap->s_nr_groups);
+ } else {
+ /*
+ * Rough check if bitmap number is too big to have any bitmap
+ * blocks reserved.
+ */
+ if (bitmap_nr >
+ (bitmap->s_nr_groups >> (sb->s_blocksize_bits + 3)) + 2)
+ return 0;
+ off = 0;
+ count = bitmap->s_nr_groups - bitmap_nr * max_bits +
+ (sizeof(struct spaceBitmapDesc) << 3);
+ count = min(count, max_bits);
+ }
+
+ for (i = 0; i < count; i++)
+ if (udf_test_bit(i + off, bh->b_data))
+ return -EFSCORRUPTED;
+ return 0;
}
static int __load_block_bitmap(struct super_block *sb,
@@ -555,7 +578,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb,
udf_pblk_t newblock = 0;
uint32_t adsize;
uint32_t elen, goal_elen = 0;
- struct kernel_lb_addr eloc, uninitialized_var(goal_eloc);
+ struct kernel_lb_addr eloc, goal_eloc;
struct extent_position epos, goal_epos;
int8_t etype;
struct udf_inode_info *iinfo = UDF_I(table);
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index d9523013096f..73720320f0ab 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -34,7 +34,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
fibh->soffset = fibh->eoffset;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- fi = udf_get_fileident(iinfo->i_ext.i_data -
+ fi = udf_get_fileident(iinfo->i_data -
(iinfo->i_efe ?
sizeof(struct extendedFileEntry) :
sizeof(struct fileEntry)),
diff --git a/fs/udf/file.c b/fs/udf/file.c
index cd31e4f6d6da..8fff7ffc33a8 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -50,7 +50,7 @@ static void __udf_adinicb_readpage(struct page *page)
* So just sample it once and use the same value everywhere.
*/
kaddr = kmap_atomic(page);
- memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, isize);
+ memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr, isize);
memset(kaddr + isize, 0, PAGE_SIZE - isize);
flush_dcache_page(page);
SetPageUptodate(page);
@@ -76,8 +76,7 @@ static int udf_adinicb_writepage(struct page *page,
BUG_ON(!PageLocked(page));
kaddr = kmap_atomic(page);
- memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
- i_size_read(inode));
+ memcpy(iinfo->i_data + iinfo->i_lenEAttr, kaddr, i_size_read(inode));
SetPageUptodate(page);
kunmap_atomic(kaddr);
mark_inode_dirty(inode);
@@ -148,26 +147,24 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
down_write(&iinfo->i_data_sem);
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- loff_t end = iocb->ki_pos + iov_iter_count(from);
-
- if (inode->i_sb->s_blocksize <
- (udf_file_entry_alloc_offset(inode) + end)) {
- err = udf_expand_file_adinicb(inode);
- if (err) {
- inode_unlock(inode);
- udf_debug("udf_expand_adinicb: err=%d\n", err);
- return err;
- }
- } else {
- iinfo->i_lenAlloc = max(end, inode->i_size);
- up_write(&iinfo->i_data_sem);
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB &&
+ inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) +
+ iocb->ki_pos + iov_iter_count(from))) {
+ err = udf_expand_file_adinicb(inode);
+ if (err) {
+ inode_unlock(inode);
+ udf_debug("udf_expand_adinicb: err=%d\n", err);
+ return err;
}
} else
up_write(&iinfo->i_data_sem);
retval = __generic_file_write_iter(iocb, from);
out:
+ down_write(&iinfo->i_data_sem);
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB && retval > 0)
+ iinfo->i_lenAlloc = inode->i_size;
+ up_write(&iinfo->i_data_sem);
inode_unlock(inode);
if (retval > 0) {
@@ -215,7 +212,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg);
case UDF_GETEABLOCK:
return copy_to_user((char __user *)arg,
- UDF_I(inode)->i_ext.i_data,
+ UDF_I(inode)->i_data,
UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0;
default:
return -ENOIOCTLCMD;
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index f8e5872f7cc2..cdaa86e077b2 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -67,16 +67,16 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
iinfo->i_efe = 1;
if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev)
sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE;
- iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
- sizeof(struct extendedFileEntry),
- GFP_KERNEL);
+ iinfo->i_data = kzalloc(inode->i_sb->s_blocksize -
+ sizeof(struct extendedFileEntry),
+ GFP_KERNEL);
} else {
iinfo->i_efe = 0;
- iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize -
- sizeof(struct fileEntry),
- GFP_KERNEL);
+ iinfo->i_data = kzalloc(inode->i_sb->s_blocksize -
+ sizeof(struct fileEntry),
+ GFP_KERNEL);
}
- if (!iinfo->i_ext.i_data) {
+ if (!iinfo->i_data) {
iput(inode);
return ERR_PTR(-ENOMEM);
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ec8089a31390..08d7208eb7b7 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -50,15 +50,15 @@ static int udf_update_inode(struct inode *, int);
static int udf_sync_inode(struct inode *inode);
static int udf_alloc_i_data(struct inode *inode, size_t size);
static sector_t inode_getblk(struct inode *, sector_t, int *, int *);
-static int8_t udf_insert_aext(struct inode *, struct extent_position,
- struct kernel_lb_addr, uint32_t);
+static int udf_insert_aext(struct inode *, struct extent_position,
+ struct kernel_lb_addr, uint32_t);
static void udf_split_extents(struct inode *, int *, int, udf_pblk_t,
struct kernel_long_ad *, int *);
static void udf_prealloc_extents(struct inode *, int, int,
struct kernel_long_ad *, int *);
static void udf_merge_extents(struct inode *, struct kernel_long_ad *, int *);
-static void udf_update_extents(struct inode *, struct kernel_long_ad *, int,
- int, struct extent_position *);
+static int udf_update_extents(struct inode *, struct kernel_long_ad *, int,
+ int, struct extent_position *);
static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
static void __udf_clear_extent_cache(struct inode *inode)
@@ -150,8 +150,8 @@ void udf_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode);
clear_inode(inode);
- kfree(iinfo->i_ext.i_data);
- iinfo->i_ext.i_data = NULL;
+ kfree(iinfo->i_data);
+ iinfo->i_data = NULL;
udf_clear_extent_cache(inode);
if (want_delete) {
udf_free_inode(inode);
@@ -278,14 +278,14 @@ int udf_expand_file_adinicb(struct inode *inode)
kaddr = kmap_atomic(page);
memset(kaddr + iinfo->i_lenAlloc, 0x00,
PAGE_SIZE - iinfo->i_lenAlloc);
- memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr,
+ memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr,
iinfo->i_lenAlloc);
flush_dcache_page(page);
SetPageUptodate(page);
kunmap_atomic(kaddr);
}
down_write(&iinfo->i_data_sem);
- memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00,
+ memset(iinfo->i_data + iinfo->i_lenEAttr, 0x00,
iinfo->i_lenAlloc);
iinfo->i_lenAlloc = 0;
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
@@ -303,8 +303,7 @@ int udf_expand_file_adinicb(struct inode *inode)
lock_page(page);
down_write(&iinfo->i_data_sem);
kaddr = kmap_atomic(page);
- memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
- inode->i_size);
+ memcpy(iinfo->i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
kunmap_atomic(kaddr);
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
@@ -392,8 +391,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode,
}
mark_buffer_dirty_inode(dbh, inode);
- memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0,
- iinfo->i_lenAlloc);
+ memset(iinfo->i_data + iinfo->i_lenEAttr, 0, iinfo->i_lenAlloc);
iinfo->i_lenAlloc = 0;
eloc.logicalBlockNum = *block;
eloc.partitionReferenceNum =
@@ -434,6 +432,12 @@ static int udf_get_block(struct inode *inode, sector_t block,
iinfo->i_next_alloc_goal++;
}
+ /*
+ * Block beyond EOF and prealloc extents? Just discard preallocation
+ * as it is not useful and complicates things.
+ */
+ if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents)
+ udf_discard_prealloc(inode);
udf_clear_extent_cache(inode);
phys = inode_getblk(inode, block, &err, &new);
if (!phys)
@@ -483,8 +487,6 @@ static int udf_do_extend_file(struct inode *inode,
uint32_t add;
int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
struct super_block *sb = inode->i_sb;
- struct kernel_lb_addr prealloc_loc = {};
- uint32_t prealloc_len = 0;
struct udf_inode_info *iinfo;
int err;
@@ -505,19 +507,6 @@ static int udf_do_extend_file(struct inode *inode,
~(sb->s_blocksize - 1);
}
- /* Last extent are just preallocated blocks? */
- if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
- EXT_NOT_RECORDED_ALLOCATED) {
- /* Save the extent so that we can reattach it to the end */
- prealloc_loc = last_ext->extLocation;
- prealloc_len = last_ext->extLength;
- /* Mark the extent as a hole */
- last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
- last_ext->extLocation.logicalBlockNum = 0;
- last_ext->extLocation.partitionReferenceNum = 0;
- }
-
/* Can we merge with the previous extent? */
if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) ==
EXT_NOT_RECORDED_NOT_ALLOCATED) {
@@ -530,8 +519,10 @@ static int udf_do_extend_file(struct inode *inode,
}
if (fake) {
- udf_add_aext(inode, last_pos, &last_ext->extLocation,
- last_ext->extLength, 1);
+ err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
+ last_ext->extLength, 1);
+ if (err < 0)
+ goto out_err;
count++;
} else {
struct kernel_lb_addr tmploc;
@@ -545,7 +536,7 @@ static int udf_do_extend_file(struct inode *inode,
* more extents, we may need to enter possible following
* empty indirect extent.
*/
- if (new_block_bytes || prealloc_len)
+ if (new_block_bytes)
udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
}
@@ -565,7 +556,7 @@ static int udf_do_extend_file(struct inode *inode,
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
- return err;
+ goto out_err;
count++;
}
if (new_block_bytes) {
@@ -574,22 +565,11 @@ static int udf_do_extend_file(struct inode *inode,
err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
if (err)
- return err;
+ goto out_err;
count++;
}
out:
- /* Do we have some preallocated blocks saved? */
- if (prealloc_len) {
- err = udf_add_aext(inode, last_pos, &prealloc_loc,
- prealloc_len, 1);
- if (err)
- return err;
- last_ext->extLocation = prealloc_loc;
- last_ext->extLength = prealloc_len;
- count++;
- }
-
/* last_pos should point to the last written extent... */
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
last_pos->offset -= sizeof(struct short_ad);
@@ -599,19 +579,28 @@ out:
return -EIO;
return count;
+out_err:
+ /* Remove extents we've created so far */
+ udf_clear_extent_cache(inode);
+ udf_truncate_extents(inode);
+ return err;
}
/* Extend the final block of the file to final_block_len bytes */
static void udf_do_extend_final_block(struct inode *inode,
struct extent_position *last_pos,
struct kernel_long_ad *last_ext,
- uint32_t final_block_len)
+ uint32_t new_elen)
{
- struct super_block *sb = inode->i_sb;
uint32_t added_bytes;
- added_bytes = final_block_len -
- (last_ext->extLength & (sb->s_blocksize - 1));
+ /*
+ * Extent already large enough? It may be already rounded up to block
+ * size...
+ */
+ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK))
+ return;
+ added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
last_ext->extLength += added_bytes;
UDF_I(inode)->i_lenExtents += added_bytes;
@@ -628,12 +617,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
int8_t etype;
struct super_block *sb = inode->i_sb;
sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
- unsigned long partial_final_block;
+ loff_t new_elen;
int adsize;
struct udf_inode_info *iinfo = UDF_I(inode);
struct kernel_long_ad extent;
int err = 0;
- int within_final_block;
+ bool within_last_ext;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
adsize = sizeof(struct short_ad);
@@ -642,8 +631,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
else
BUG();
+ /*
+ * When creating hole in file, just don't bother with preserving
+ * preallocation. It likely won't be very useful anyway.
+ */
+ udf_discard_prealloc(inode);
+
etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
- within_final_block = (etype != -1);
+ within_last_ext = (etype != -1);
+ /* We don't expect extents past EOF... */
+ WARN_ON_ONCE(within_last_ext &&
+ elen > ((loff_t)offset + 1) << inode->i_blkbits);
if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
(epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
@@ -659,19 +657,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize)
extent.extLength |= etype << 30;
}
- partial_final_block = newsize & (sb->s_blocksize - 1);
+ new_elen = ((loff_t)offset << inode->i_blkbits) |
+ (newsize & (sb->s_blocksize - 1));
/* File has extent covering the new size (could happen when extending
* inside a block)?
*/
- if (within_final_block) {
+ if (within_last_ext) {
/* Extending file within the last file block */
- udf_do_extend_final_block(inode, &epos, &extent,
- partial_final_block);
+ udf_do_extend_final_block(inode, &epos, &extent, new_elen);
} else {
- loff_t add = ((loff_t)offset << sb->s_blocksize_bits) |
- partial_final_block;
- err = udf_do_extend_file(inode, &epos, &extent, add);
+ err = udf_do_extend_file(inode, &epos, &extent, new_elen);
}
if (err < 0)
@@ -693,7 +689,7 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
struct kernel_lb_addr eloc, tmpeloc;
int c = 1;
loff_t lbcount = 0, b_off = 0;
- udf_pblk_t newblocknum, newblock;
+ udf_pblk_t newblocknum, newblock = 0;
sector_t offset = 0;
int8_t etype;
struct udf_inode_info *iinfo = UDF_I(inode);
@@ -772,10 +768,11 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
goto out_free;
}
- /* Are we beyond EOF? */
+ /* Are we beyond EOF and preallocated extent? */
if (etype == -1) {
int ret;
loff_t hole_len;
+
isBeyondEOF = true;
if (count) {
if (c)
@@ -795,25 +792,22 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
ret = udf_do_extend_file(inode, &prev_epos, laarr, hole_len);
if (ret < 0) {
*err = ret;
- newblock = 0;
goto out_free;
}
c = 0;
offset = 0;
count += ret;
- /* We are not covered by a preallocated extent? */
- if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) !=
- EXT_NOT_RECORDED_ALLOCATED) {
- /* Is there any real extent? - otherwise we overwrite
- * the fake one... */
- if (count)
- c = !c;
- laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
- inode->i_sb->s_blocksize;
- memset(&laarr[c].extLocation, 0x00,
- sizeof(struct kernel_lb_addr));
- count++;
- }
+ /*
+ * Is there any real extent? - otherwise we overwrite the fake
+ * one...
+ */
+ if (count)
+ c = !c;
+ laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
+ inode->i_sb->s_blocksize;
+ memset(&laarr[c].extLocation, 0x00,
+ sizeof(struct kernel_lb_addr));
+ count++;
endnum = c + 1;
lastblock = 1;
} else {
@@ -860,7 +854,6 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
goal, err);
if (!newblocknum) {
*err = -ENOSPC;
- newblock = 0;
goto out_free;
}
if (isBeyondEOF)
@@ -886,7 +879,9 @@ static sector_t inode_getblk(struct inode *inode, sector_t block,
/* write back the new extents, inserting new extents if the new number
* of extents is greater than the old number, and deleting extents if
* the new number of extents is less than the old number */
- udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
+ *err = udf_update_extents(inode, laarr, startnum, endnum, &prev_epos);
+ if (*err < 0)
+ goto out_free;
newblock = udf_get_pblock(inode->i_sb, newblocknum,
iinfo->i_location.partitionReferenceNum, 0);
@@ -1090,23 +1085,8 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
blocksize - 1) >> blocksize_bits)))) {
if (((li->extLength & UDF_EXTENT_LENGTH_MASK) +
- (lip1->extLength & UDF_EXTENT_LENGTH_MASK) +
- blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) {
- lip1->extLength = (lip1->extLength -
- (li->extLength &
- UDF_EXTENT_LENGTH_MASK) +
- UDF_EXTENT_LENGTH_MASK) &
- ~(blocksize - 1);
- li->extLength = (li->extLength &
- UDF_EXTENT_FLAG_MASK) +
- (UDF_EXTENT_LENGTH_MASK + 1) -
- blocksize;
- lip1->extLocation.logicalBlockNum =
- li->extLocation.logicalBlockNum +
- ((li->extLength &
- UDF_EXTENT_LENGTH_MASK) >>
- blocksize_bits);
- } else {
+ (lip1->extLength & UDF_EXTENT_LENGTH_MASK) +
+ blocksize - 1) <= UDF_EXTENT_LENGTH_MASK) {
li->extLength = lip1->extLength +
(((li->extLength &
UDF_EXTENT_LENGTH_MASK) +
@@ -1169,21 +1149,30 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr,
}
}
-static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
- int startnum, int endnum,
- struct extent_position *epos)
+static int udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr,
+ int startnum, int endnum,
+ struct extent_position *epos)
{
int start = 0, i;
struct kernel_lb_addr tmploc;
uint32_t tmplen;
+ int err;
if (startnum > endnum) {
for (i = 0; i < (startnum - endnum); i++)
udf_delete_aext(inode, *epos);
} else if (startnum < endnum) {
for (i = 0; i < (endnum - startnum); i++) {
- udf_insert_aext(inode, *epos, laarr[i].extLocation,
- laarr[i].extLength);
+ err = udf_insert_aext(inode, *epos,
+ laarr[i].extLocation,
+ laarr[i].extLength);
+ /*
+ * If we fail here, we are likely corrupting the extent
+ * list and leaking blocks. At least stop early to
+ * limit the damage.
+ */
+ if (err < 0)
+ return err;
udf_next_aext(inode, epos, &laarr[i].extLocation,
&laarr[i].extLength, 1);
start++;
@@ -1195,6 +1184,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr
udf_write_aext(inode, epos, &laarr[i].extLocation,
laarr[i].extLength, 1);
}
+ return 0;
}
struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block,
@@ -1259,7 +1249,7 @@ set_size:
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
down_write(&iinfo->i_data_sem);
udf_clear_extent_cache(inode);
- memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + newsize,
+ memset(iinfo->i_data + iinfo->i_lenEAttr + newsize,
0x00, bsize - newsize -
udf_file_entry_alloc_offset(inode));
iinfo->i_lenAlloc = newsize;
@@ -1395,6 +1385,7 @@ reread:
ret = -EIO;
goto out;
}
+ iinfo->i_hidden = hidden_inode;
iinfo->i_unique = 0;
iinfo->i_lenEAttr = 0;
iinfo->i_lenExtents = 0;
@@ -1408,7 +1399,7 @@ reread:
sizeof(struct extendedFileEntry));
if (ret)
goto out;
- memcpy(iinfo->i_ext.i_data,
+ memcpy(iinfo->i_data,
bh->b_data + sizeof(struct extendedFileEntry),
bs - sizeof(struct extendedFileEntry));
} else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) {
@@ -1417,7 +1408,7 @@ reread:
ret = udf_alloc_i_data(inode, bs - sizeof(struct fileEntry));
if (ret)
goto out;
- memcpy(iinfo->i_ext.i_data,
+ memcpy(iinfo->i_data,
bh->b_data + sizeof(struct fileEntry),
bs - sizeof(struct fileEntry));
} else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) {
@@ -1430,7 +1421,7 @@ reread:
sizeof(struct unallocSpaceEntry));
if (ret)
goto out;
- memcpy(iinfo->i_ext.i_data,
+ memcpy(iinfo->i_data,
bh->b_data + sizeof(struct unallocSpaceEntry),
bs - sizeof(struct unallocSpaceEntry));
return 0;
@@ -1488,6 +1479,8 @@ reread:
iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs);
iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint);
+ iinfo->i_streamdir = 0;
+ iinfo->i_lenStreams = 0;
} else {
inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
(inode->i_sb->s_blocksize_bits - 9);
@@ -1501,6 +1494,16 @@ reread:
iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
+
+ /* Named streams */
+ iinfo->i_streamdir = (efe->streamDirectoryICB.extLength != 0);
+ iinfo->i_locStreamdir =
+ lelb_to_cpu(efe->streamDirectoryICB.extLocation);
+ iinfo->i_lenStreams = le64_to_cpu(efe->objectSize);
+ if (iinfo->i_lenStreams >= inode->i_size)
+ iinfo->i_lenStreams -= inode->i_size;
+ else
+ iinfo->i_lenStreams = 0;
}
inode->i_generation = iinfo->i_unique;
@@ -1597,8 +1600,8 @@ out:
static int udf_alloc_i_data(struct inode *inode, size_t size)
{
struct udf_inode_info *iinfo = UDF_I(inode);
- iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL);
- if (!iinfo->i_ext.i_data)
+ iinfo->i_data = kmalloc(size, GFP_KERNEL);
+ if (!iinfo->i_data)
return -ENOMEM;
return 0;
}
@@ -1672,7 +1675,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
use->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc);
memcpy(bh->b_data + sizeof(struct unallocSpaceEntry),
- iinfo->i_ext.i_data, inode->i_sb->s_blocksize -
+ iinfo->i_data, inode->i_sb->s_blocksize -
sizeof(struct unallocSpaceEntry));
use->descTag.tagIdent = cpu_to_le16(TAG_IDENT_USE);
crclen = sizeof(struct unallocSpaceEntry);
@@ -1702,8 +1705,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0)
fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1);
- else
- fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
+ else {
+ if (iinfo->i_hidden)
+ fe->fileLinkCount = cpu_to_le16(0);
+ else
+ fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
+ }
fe->informationLength = cpu_to_le64(inode->i_size);
@@ -1741,7 +1748,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
if (iinfo->i_efe == 0) {
memcpy(bh->b_data + sizeof(struct fileEntry),
- iinfo->i_ext.i_data,
+ iinfo->i_data,
inode->i_sb->s_blocksize - sizeof(struct fileEntry));
fe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
@@ -1760,12 +1767,22 @@ static int udf_update_inode(struct inode *inode, int do_sync)
crclen = sizeof(struct fileEntry);
} else {
memcpy(bh->b_data + sizeof(struct extendedFileEntry),
- iinfo->i_ext.i_data,
+ iinfo->i_data,
inode->i_sb->s_blocksize -
sizeof(struct extendedFileEntry));
- efe->objectSize = cpu_to_le64(inode->i_size);
+ efe->objectSize =
+ cpu_to_le64(inode->i_size + iinfo->i_lenStreams);
efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded);
+ if (iinfo->i_streamdir) {
+ struct long_ad *icb_lad = &efe->streamDirectoryICB;
+
+ icb_lad->extLocation =
+ cpu_to_lelb(iinfo->i_locStreamdir);
+ icb_lad->extLength =
+ cpu_to_le32(inode->i_sb->s_blocksize);
+ }
+
udf_adjust_time(iinfo, inode->i_atime);
udf_adjust_time(iinfo, inode->i_mtime);
udf_adjust_time(iinfo, inode->i_ctime);
@@ -1864,8 +1881,13 @@ struct inode *__udf_iget(struct super_block *sb, struct kernel_lb_addr *ino,
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (!(inode->i_state & I_NEW)) {
+ if (UDF_I(inode)->i_hidden != hidden_inode) {
+ iput(inode);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
return inode;
+ }
memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr));
err = udf_read_inode(inode, hidden_inode);
@@ -2046,7 +2068,7 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos,
struct udf_inode_info *iinfo = UDF_I(inode);
if (!epos->bh)
- ptr = iinfo->i_ext.i_data + epos->offset -
+ ptr = iinfo->i_data + epos->offset -
udf_file_entry_alloc_offset(inode) +
iinfo->i_lenEAttr;
else
@@ -2138,7 +2160,7 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
if (!epos->bh) {
if (!epos->offset)
epos->offset = udf_file_entry_alloc_offset(inode);
- ptr = iinfo->i_ext.i_data + epos->offset -
+ ptr = iinfo->i_data + epos->offset -
udf_file_entry_alloc_offset(inode) +
iinfo->i_lenEAttr;
alen = udf_file_entry_alloc_offset(inode) +
@@ -2179,12 +2201,13 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos,
return etype;
}
-static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
- struct kernel_lb_addr neloc, uint32_t nelen)
+static int udf_insert_aext(struct inode *inode, struct extent_position epos,
+ struct kernel_lb_addr neloc, uint32_t nelen)
{
struct kernel_lb_addr oeloc;
uint32_t oelen;
int8_t etype;
+ int err;
if (epos.bh)
get_bh(epos.bh);
@@ -2194,10 +2217,10 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
neloc = oeloc;
nelen = (etype << 30) | oelen;
}
- udf_add_aext(inode, &epos, &neloc, nelen, 1);
+ err = udf_add_aext(inode, &epos, &neloc, nelen, 1);
brelse(epos.bh);
- return (nelen >> 30);
+ return err;
}
int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 853bcff51043..1614d308d0f0 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -52,9 +52,9 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size,
uint16_t crclen;
struct udf_inode_info *iinfo = UDF_I(inode);
- ea = iinfo->i_ext.i_data;
+ ea = iinfo->i_data;
if (iinfo->i_lenEAttr) {
- ad = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
+ ad = iinfo->i_data + iinfo->i_lenEAttr;
} else {
ad = ea;
size += sizeof(struct extendedAttrHeaderDesc);
@@ -153,7 +153,7 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
uint32_t offset;
struct udf_inode_info *iinfo = UDF_I(inode);
- ea = iinfo->i_ext.i_data;
+ ea = iinfo->i_data;
if (iinfo->i_lenEAttr) {
struct extendedAttrHeaderDesc *eahd;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index d13ded8e2c30..05dd1f45ba90 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -478,8 +478,7 @@ add:
if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
block = dinfo->i_location.logicalBlockNum;
fi = (struct fileIdentDesc *)
- (dinfo->i_ext.i_data +
- fibh->soffset -
+ (dinfo->i_data + fibh->soffset -
udf_ext0_offset(dir) +
dinfo->i_lenEAttr);
} else {
@@ -962,7 +961,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
mark_buffer_dirty_inode(epos.bh, inode);
ea = epos.bh->b_data + udf_ext0_offset(inode);
} else
- ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
+ ea = iinfo->i_data + iinfo->i_lenEAttr;
eoffset = sb->s_blocksize - udf_ext0_offset(inode);
pc = (struct pathComponent *)ea;
@@ -1106,8 +1105,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
return -EINVAL;
ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
- if (IS_ERR(ofi)) {
- retval = PTR_ERR(ofi);
+ if (!ofi || IS_ERR(ofi)) {
+ if (IS_ERR(ofi))
+ retval = PTR_ERR(ofi);
goto end_rename;
}
@@ -1116,8 +1116,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
brelse(ofibh.sbh);
tloc = lelb_to_cpu(ocfi.icb.extLocation);
- if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0)
- != old_inode->i_ino)
+ if (udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) != old_inode->i_ino)
goto end_rename;
nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi);
@@ -1142,7 +1141,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = -EIO;
if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
dir_fi = udf_get_fileident(
- old_iinfo->i_ext.i_data -
+ old_iinfo->i_data -
(old_iinfo->i_efe ?
sizeof(struct extendedFileEntry) :
sizeof(struct fileEntry)),
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 090baff83990..4cbf40575965 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -65,7 +65,7 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block,
}
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- loc = le32_to_cpu(((__le32 *)(iinfo->i_ext.i_data +
+ loc = le32_to_cpu(((__le32 *)(iinfo->i_data +
vdata->s_start_offset))[block]);
goto translate;
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b7fb7cd35d89..bce48a07790c 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -146,9 +146,12 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
ei->i_unique = 0;
ei->i_lenExtents = 0;
+ ei->i_lenStreams = 0;
ei->i_next_alloc_block = 0;
ei->i_next_alloc_goal = 0;
ei->i_strat4096 = 0;
+ ei->i_streamdir = 0;
+ ei->i_hidden = 0;
init_rwsem(&ei->i_data_sem);
ei->cached_extent.lstart = -1;
spin_lock_init(&ei->i_extent_cache_lock);
@@ -172,7 +175,7 @@ static void init_once(void *foo)
{
struct udf_inode_info *ei = (struct udf_inode_info *)foo;
- ei->i_ext.i_data = NULL;
+ ei->i_data = NULL;
inode_init_once(&ei->vfs_inode);
}
@@ -572,6 +575,11 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
if (!remount) {
if (uopt->nls_map)
unload_nls(uopt->nls_map);
+ /*
+ * load_nls() failure is handled later in
+ * udf_fill_super() after all options are
+ * parsed.
+ */
uopt->nls_map = load_nls(args[0].from);
uopt->flags |= (1 << UDF_FLAG_NLS_MAP);
}
@@ -1200,7 +1208,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
vat20 = (struct virtualAllocationTable20 *)bh->b_data;
} else {
vat20 = (struct virtualAllocationTable20 *)
- vati->i_ext.i_data;
+ vati->i_data;
}
map->s_type_specific.s_virtual.s_start_offset =
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 6023c97c6da2..aef3e4d9014d 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -122,7 +122,7 @@ static int udf_symlink_filler(struct file *file, struct page *page)
down_read(&iinfo->i_data_sem);
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
- symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr;
+ symlink = iinfo->i_data + iinfo->i_lenEAttr;
} else {
bh = sb_bread(inode->i_sb, pos);
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 94220ba85628..b0c71edc83f7 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -120,60 +120,42 @@ void udf_truncate_tail_extent(struct inode *inode)
void udf_discard_prealloc(struct inode *inode)
{
- struct extent_position epos = { NULL, 0, {0, 0} };
+ struct extent_position epos = {};
+ struct extent_position prev_epos = {};
struct kernel_lb_addr eloc;
uint32_t elen;
uint64_t lbcount = 0;
int8_t etype = -1, netype;
- int adsize;
struct udf_inode_info *iinfo = UDF_I(inode);
+ int bsize = 1 << inode->i_blkbits;
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB ||
- inode->i_size == iinfo->i_lenExtents)
+ ALIGN(inode->i_size, bsize) == ALIGN(iinfo->i_lenExtents, bsize))
return;
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
- adsize = sizeof(struct short_ad);
- else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
- adsize = sizeof(struct long_ad);
- else
- adsize = 0;
-
epos.block = iinfo->i_location;
/* Find the last extent in the file */
- while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
- etype = netype;
+ while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 0)) != -1) {
+ brelse(prev_epos.bh);
+ prev_epos = epos;
+ if (prev_epos.bh)
+ get_bh(prev_epos.bh);
+
+ etype = udf_next_aext(inode, &epos, &eloc, &elen, 1);
lbcount += elen;
}
if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) {
- epos.offset -= adsize;
lbcount -= elen;
- extent_trunc(inode, &epos, &eloc, etype, elen, 0);
- if (!epos.bh) {
- iinfo->i_lenAlloc =
- epos.offset -
- udf_file_entry_alloc_offset(inode);
- mark_inode_dirty(inode);
- } else {
- struct allocExtDesc *aed =
- (struct allocExtDesc *)(epos.bh->b_data);
- aed->lengthAllocDescs =
- cpu_to_le32(epos.offset -
- sizeof(struct allocExtDesc));
- if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) ||
- UDF_SB(inode->i_sb)->s_udfrev >= 0x0201)
- udf_update_tag(epos.bh->b_data, epos.offset);
- else
- udf_update_tag(epos.bh->b_data,
- sizeof(struct allocExtDesc));
- mark_buffer_dirty_inode(epos.bh, inode);
- }
+ udf_delete_aext(inode, prev_epos);
+ udf_free_blocks(inode->i_sb, inode, &eloc, 0,
+ DIV_ROUND_UP(elen, 1 << inode->i_blkbits));
}
/* This inode entry is in-memory only and thus we don't have to mark
* the inode dirty */
iinfo->i_lenExtents = lbcount;
brelse(epos.bh);
+ brelse(prev_epos.bh);
}
static void udf_update_alloc_ext_desc(struct inode *inode,
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index 2ef0e212f08a..b77bf713a1b6 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -42,12 +42,12 @@ struct udf_inode_info {
unsigned i_efe : 1; /* extendedFileEntry */
unsigned i_use : 1; /* unallocSpaceEntry */
unsigned i_strat4096 : 1;
- unsigned reserved : 26;
- union {
- struct short_ad *i_sad;
- struct long_ad *i_lad;
- __u8 *i_data;
- } i_ext;
+ unsigned i_streamdir : 1;
+ unsigned i_hidden : 1; /* hidden system inode */
+ unsigned reserved : 24;
+ __u8 *i_data;
+ struct kernel_lb_addr i_locStreamdir;
+ __u64 i_lenStreams;
struct rw_semaphore i_data_sem;
struct udf_ext_cache cached_extent;
/* Spinlock for protecting extent cache */
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index d12e507e9eb2..aa58173b468f 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -57,6 +57,8 @@
#define MF_DUPLICATE_MD 0x01
#define MF_MIRROR_FE_LOADED 0x02
+#define EFSCORRUPTED EUCLEAN
+
struct udf_meta_data {
__u32 s_meta_file_loc;
__u32 s_mirror_file_loc;
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 5fcfa96463eb..85521d6b0237 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -247,7 +247,7 @@ static int udf_name_from_CS0(struct super_block *sb,
}
if (translate) {
- if (str_o_len <= 2 && str_o[0] == '.' &&
+ if (str_o_len > 0 && str_o_len <= 2 && str_o[0] == '.' &&
(str_o_len == 1 || str_o[1] == '.'))
needsCRC = 1;
if (needsCRC) {
diff --git a/fs/xattr.c b/fs/xattr.c
index 470ee0af3200..5c3407e18e15 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1012,7 +1012,7 @@ static int xattr_list_one(char **buffer, ssize_t *remaining_size,
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size)
{
- bool trusted = capable(CAP_SYS_ADMIN);
+ bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
struct simple_xattr *xattr;
ssize_t remaining_size = size;
int err = 0;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3e1dd66bd676..734b80a4220c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -130,7 +130,7 @@ xfs_bmap_rtalloc(
* pick an extent that will space things out in the rt area.
*/
if (ap->eof && ap->offset == 0) {
- xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
+ xfs_rtblock_t rtx; /* realtime extent no */
error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
if (error)