aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c5
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/affs/amigaffs.c27
-rw-r--r--fs/affs/file.c26
-rw-r--r--fs/affs/namei.c4
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_misc.c29
-rw-r--r--fs/block_dev.c42
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/async-thread.c14
-rw-r--r--fs/btrfs/backref.c1
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c43
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c18
-rw-r--r--fs/btrfs/disk-io.c29
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/export.h5
-rw-r--r--fs/btrfs/extent-tree.c40
-rw-r--r--fs/btrfs/extent_io.c57
-rw-r--r--fs/btrfs/extent_io.h6
-rw-r--r--fs/btrfs/file-item.c16
-rw-r--r--fs/btrfs/file.c60
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/inode.c192
-rw-r--r--fs/btrfs/ioctl.c64
-rw-r--r--fs/btrfs/print-tree.c12
-rw-r--r--fs/btrfs/qgroup.c15
-rw-r--r--fs/btrfs/raid56.c58
-rw-r--r--fs/btrfs/reada.c2
-rw-r--r--fs/btrfs/relocation.c10
-rw-r--r--fs/btrfs/send.c152
-rw-r--r--fs/btrfs/super.c26
-rw-r--r--fs/btrfs/sysfs.c4
-rw-r--r--fs/btrfs/tests/btrfs-tests.c8
-rw-r--r--fs/btrfs/tests/inode-tests.c1
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c125
-rw-r--r--fs/btrfs/volumes.c35
-rw-r--r--fs/buffer.c25
-rw-r--r--fs/cachefiles/rdwr.c5
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/caps.c29
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/ceph/inode.c1
-rw-r--r--fs/ceph/mds_client.c14
-rw-r--r--fs/cifs/asn1.c16
-rw-r--r--fs/cifs/cifs_unicode.c17
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/cifs/cifsglob.h9
-rw-r--r--fs/cifs/connect.c10
-rw-r--r--fs/cifs/dir.c22
-rw-r--r--fs/cifs/file.c24
-rw-r--r--fs/cifs/inode.c9
-rw-r--r--fs/cifs/misc.c17
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb1ops.c8
-rw-r--r--fs/cifs/smb2misc.c36
-rw-r--r--fs/cifs/smb2ops.c64
-rw-r--r--fs/cifs/smb2pdu.c8
-rw-r--r--fs/cifs/smb2pdu.h4
-rw-r--r--fs/configfs/dir.c1
-rw-r--r--fs/configfs/file.c16
-rw-r--r--fs/crypto/fname.c9
-rw-r--r--fs/crypto/policy.c3
-rw-r--r--fs/direct-io.c5
-rw-r--r--fs/dlm/config.c12
-rw-r--r--fs/dlm/debug_fs.c1
-rw-r--r--fs/dlm/dlm_internal.h1
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/ecryptfs/crypto.c6
-rw-r--r--fs/ecryptfs/main.c6
-rw-r--r--fs/efivarfs/inode.c2
-rw-r--r--fs/efivarfs/super.c3
-rw-r--r--fs/eventpoll.c94
-rw-r--r--fs/exec.c17
-rw-r--r--fs/ext2/balloc.c14
-rw-r--r--fs/ext2/ialloc.c3
-rw-r--r--fs/ext4/block_validity.c72
-rw-r--r--fs/ext4/dir.c6
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/ext4_extents.h9
-rw-r--r--fs/ext4/extents.c64
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/fsmap.c3
-rw-r--r--fs/ext4/fsync.c28
-rw-r--r--fs/ext4/ialloc.c59
-rw-r--r--fs/ext4/indirect.c6
-rw-r--r--fs/ext4/inline.c7
-rw-r--r--fs/ext4/inode.c62
-rw-r--r--fs/ext4/ioctl.c3
-rw-r--r--fs/ext4/mballoc.c14
-rw-r--r--fs/ext4/namei.c94
-rw-r--r--fs/ext4/resize.c4
-rw-r--r--fs/ext4/super.c114
-rw-r--r--fs/ext4/xattr.c13
-rw-r--r--fs/f2fs/checkpoint.c8
-rw-r--r--fs/f2fs/dir.c12
-rw-r--r--fs/f2fs/file.c3
-rw-r--r--fs/f2fs/inline.c3
-rw-r--r--fs/f2fs/namei.c6
-rw-r--r--fs/f2fs/node.c3
-rw-r--r--fs/f2fs/segment.c11
-rw-r--r--fs/f2fs/segment.h4
-rw-r--r--fs/f2fs/super.c9
-rw-r--r--fs/f2fs/sysfs.c1
-rw-r--r--fs/f2fs/xattr.c9
-rw-r--r--fs/fat/inode.c6
-rw-r--r--fs/file.c21
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/fs-writeback.c155
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c54
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c12
-rw-r--r--fs/gfs2/glock.c8
-rw-r--r--fs/gfs2/glops.c2
-rw-r--r--fs/gfs2/inode.c13
-rw-r--r--fs/gfs2/lock_dlm.c13
-rw-r--r--fs/gfs2/log.c13
-rw-r--r--fs/gfs2/ops_fstype.c32
-rw-r--r--fs/gfs2/quota.c3
-rw-r--r--fs/gfs2/quota.h3
-rw-r--r--fs/gfs2/rgrp.c9
-rw-r--r--fs/gfs2/super.c13
-rw-r--r--fs/gfs2/trans.c2
-rw-r--r--fs/hfs/bfind.c14
-rw-r--r--fs/hfs/bnode.c25
-rw-r--r--fs/hfs/btree.h7
-rw-r--r--fs/hfs/super.c10
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/isofs/dir.c1
-rw-r--r--fs/isofs/inode.c29
-rw-r--r--fs/isofs/isofs.h1
-rw-r--r--fs/isofs/joliet.c4
-rw-r--r--fs/isofs/namei.c1
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/jbd2/transaction.c26
-rw-r--r--fs/jffs2/compr_rtime.c3
-rw-r--r--fs/jffs2/dir.c6
-rw-r--r--fs/jffs2/readinode.c16
-rw-r--r--fs/jffs2/scan.c2
-rw-r--r--fs/jffs2/summary.c3
-rw-r--r--fs/jfs/inode.c3
-rw-r--r--fs/jfs/jfs_dmap.c2
-rw-r--r--fs/jfs/jfs_dmap.h2
-rw-r--r--fs/jfs/jfs_filsys.h1
-rw-r--r--fs/jfs/jfs_logmgr.c1
-rw-r--r--fs/jfs/jfs_mount.c61
-rw-r--r--fs/kernfs/mount.c1
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/lockd/host.c20
-rw-r--r--fs/minix/inode.c36
-rw-r--r--fs/minix/itree_common.c8
-rw-r--r--fs/namespace.c57
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c13
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c4
-rw-r--r--fs/nfs/inode.c14
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/namespace.c12
-rw-r--r--fs/nfs/nfs3proc.c4
-rw-r--r--fs/nfs/nfs3xdr.c3
-rw-r--r--fs/nfs/nfs42proc.c26
-rw-r--r--fs/nfs/nfs42xdr.c3
-rw-r--r--fs/nfs/nfs4client.c2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c45
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nfs/pagelist.c79
-rw-r--r--fs/nfs/pnfs.c29
-rw-r--r--fs/nfs/pnfs_nfs.c56
-rw-r--r--fs/nfs/write.c27
-rw-r--r--fs/nfs_common/grace.c6
-rw-r--r--fs/nfsd/nfs3xdr.c7
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c9
-rw-r--r--fs/nfsd/nfs4xdr.c19
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/nfsd/nfsproc.c16
-rw-r--r--fs/nfsd/nfssvc.c3
-rw-r--r--fs/nfsd/vfs.c6
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/sysfs.c27
-rw-r--r--fs/ntfs/inode.c14
-rw-r--r--fs/ocfs2/alloc.c46
-rw-r--r--fs/ocfs2/aops.c11
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/dlmglue.c3
-rw-r--r--fs/ocfs2/file.c90
-rw-r--r--fs/ocfs2/filecheck.c6
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/ocfs2_fs.h4
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/ocfs2/suballoc.c13
-rw-r--r--fs/ocfs2/super.c19
-rw-r--r--fs/orangefs/dcache.c4
-rw-r--r--fs/orangefs/super.c2
-rw-r--r--fs/overlayfs/copy_up.c17
-rw-r--r--fs/overlayfs/dir.c12
-rw-r--r--fs/overlayfs/inode.c2
-rw-r--r--fs/pipe.c19
-rw-r--r--fs/proc/base.c18
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/self.c9
-rw-r--r--fs/proc/thread_self.c2
-rw-r--r--fs/proc/vmcore.c15
-rw-r--r--fs/qnx4/dir.c69
-rw-r--r--fs/quota/quota_tree.c23
-rw-r--r--fs/quota/quota_v2.c25
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/readdir.c6
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/reiserfs/journal.c14
-rw-r--r--fs/reiserfs/stree.c27
-rw-r--r--fs/reiserfs/super.c16
-rw-r--r--fs/reiserfs/xattr.c7
-rw-r--r--fs/reiserfs/xattr.h2
-rw-r--r--fs/romfs/storage.c4
-rw-r--r--fs/select.c10
-rw-r--r--fs/seq_file.c3
-rw-r--r--fs/signalfd.c12
-rw-r--r--fs/squashfs/export.c45
-rw-r--r--fs/squashfs/file.c6
-rw-r--r--fs/squashfs/id.c42
-rw-r--r--fs/squashfs/squashfs_fs.h1
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c6
-rw-r--r--fs/squashfs/xattr.h10
-rw-r--r--fs/squashfs/xattr_id.c68
-rw-r--r--fs/super.c33
-rw-r--r--fs/sysfs/file.c55
-rw-r--r--fs/sysfs/mount.c6
-rw-r--r--fs/tracefs/inode.c79
-rw-r--r--fs/ubifs/debug.c1
-rw-r--r--fs/ubifs/dir.c13
-rw-r--r--fs/ubifs/io.c29
-rw-r--r--fs/udf/inode.c34
-rw-r--r--fs/udf/misc.c13
-rw-r--r--fs/udf/namei.c4
-rw-r--r--fs/udf/super.c31
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/xattr.c84
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c16
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c2
-rw-r--r--fs/xfs/xfs_fsmap.c3
-rw-r--r--fs/xfs/xfs_ioctl.c3
-rw-r--r--fs/xfs/xfs_iops.c12
-rw-r--r--fs/xfs/xfs_log.c9
-rw-r--r--fs/xfs/xfs_pnfs.c2
-rw-r--r--fs/xfs/xfs_reflink.c21
-rw-r--r--fs/xfs/xfs_rtalloc.c21
-rw-r--r--fs/xfs/xfs_sysfs.h6
-rw-r--r--fs/xfs/xfs_trans_dquot.c2
261 files changed, 3215 insertions, 1533 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c52f10efdc9c..b4dbe7b71151 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -513,10 +513,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
}
#ifdef CONFIG_9P_FSCACHE
- if (v9ses->fscache) {
+ if (v9ses->fscache)
v9fs_cache_session_put_cookie(v9ses);
- kfree(v9ses->cachetag);
- }
+ kfree(v9ses->cachetag);
#endif
kfree(v9ses->uname);
kfree(v9ses->aname);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2651192f0166..13e126f57851 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -624,9 +624,9 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = WB_SYNC_ALL,
- .range_start = vma->vm_pgoff * PAGE_SIZE,
+ .range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE,
/* absolute end, byte at end included */
- .range_end = vma->vm_pgoff * PAGE_SIZE +
+ .range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE +
(vma->vm_end - vma->vm_start - 1),
};
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 185d5ab7e986..a80ce75b5bad 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -419,24 +419,51 @@ affs_mode_to_prot(struct inode *inode)
u32 prot = AFFS_I(inode)->i_protect;
umode_t mode = inode->i_mode;
+ /*
+ * First, clear all RWED bits for owner, group, other.
+ * Then, recalculate them afresh.
+ *
+ * We'll always clear the delete-inhibit bit for the owner, as that is
+ * the classic single-user mode AmigaOS protection bit and we need to
+ * stay compatible with all scenarios.
+ *
+ * Since multi-user AmigaOS is an extension, we'll only set the
+ * delete-allow bit if any of the other bits in the same user class
+ * (group/other) are used.
+ */
+ prot &= ~(FIBF_NOEXECUTE | FIBF_NOREAD
+ | FIBF_NOWRITE | FIBF_NODELETE
+ | FIBF_GRP_EXECUTE | FIBF_GRP_READ
+ | FIBF_GRP_WRITE | FIBF_GRP_DELETE
+ | FIBF_OTR_EXECUTE | FIBF_OTR_READ
+ | FIBF_OTR_WRITE | FIBF_OTR_DELETE);
+
+ /* Classic single-user AmigaOS flags. These are inverted. */
if (!(mode & 0100))
prot |= FIBF_NOEXECUTE;
if (!(mode & 0400))
prot |= FIBF_NOREAD;
if (!(mode & 0200))
prot |= FIBF_NOWRITE;
+
+ /* Multi-user extended flags. Not inverted. */
if (mode & 0010)
prot |= FIBF_GRP_EXECUTE;
if (mode & 0040)
prot |= FIBF_GRP_READ;
if (mode & 0020)
prot |= FIBF_GRP_WRITE;
+ if (mode & 0070)
+ prot |= FIBF_GRP_DELETE;
+
if (mode & 0001)
prot |= FIBF_OTR_EXECUTE;
if (mode & 0004)
prot |= FIBF_OTR_READ;
if (mode & 0002)
prot |= FIBF_OTR_WRITE;
+ if (mode & 0007)
+ prot |= FIBF_OTR_DELETE;
AFFS_I(inode)->i_protect = prot;
}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index a85817f54483..ba084b0b214b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -428,6 +428,24 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
+static int affs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ int ret;
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
+ return ret;
+}
+
static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,affs_get_block);
@@ -437,7 +455,7 @@ const struct address_space_operations affs_aops = {
.readpage = affs_readpage,
.writepage = affs_writepage,
.write_begin = affs_write_begin,
- .write_end = generic_write_end,
+ .write_end = affs_write_end,
.direct_IO = affs_direct_IO,
.bmap = _affs_bmap
};
@@ -794,6 +812,12 @@ done:
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
+ /* Clear Archived bit on file writes, as AmigaOS would do */
+ if (AFFS_I(inode)->i_protect & FIBF_ARCHIVED) {
+ AFFS_I(inode)->i_protect &= ~FIBF_ARCHIVED;
+ mark_inode_dirty(inode);
+ }
+
err_first_bh:
unlock_page(page);
put_page(page);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 1ed0fa4c4d48..5863e2157ef5 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -461,8 +461,10 @@ affs_xrename(struct inode *old_dir, struct dentry *old_dentry,
return -EIO;
bh_new = affs_bread(sb, d_inode(new_dentry)->i_ino);
- if (!bh_new)
+ if (!bh_new) {
+ affs_brelse(bh_old);
return -EIO;
+ }
/* Remove old header from its parent directory. */
affs_lock_dir(old_dir);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 166846a40078..2c433c95adb5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1740,7 +1740,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
(!regset->active || regset->active(t->task, regset) > 0)) {
int ret;
size_t size = regset->n * regset->size;
- void *data = kmalloc(size, GFP_KERNEL);
+ void *data = kzalloc(size, GFP_KERNEL);
if (unlikely(!data))
return 0;
ret = regset->get(t->task, regset,
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 8311e8ed76de..566296ce7ea8 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -694,12 +694,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
struct super_block *sb = file_inode(file)->i_sb;
struct dentry *root = sb->s_root, *dentry;
int err = 0;
+ struct file *f = NULL;
e = create_entry(buffer, count);
if (IS_ERR(e))
return PTR_ERR(e);
+ if (e->flags & MISC_FMT_OPEN_FILE) {
+ f = open_exec(e->interpreter);
+ if (IS_ERR(f)) {
+ pr_notice("register: failed to install interpreter file %s\n",
+ e->interpreter);
+ kfree(e);
+ return PTR_ERR(f);
+ }
+ e->interp_file = f;
+ }
+
inode_lock(d_inode(root));
dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry);
@@ -723,21 +735,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
goto out2;
}
- if (e->flags & MISC_FMT_OPEN_FILE) {
- struct file *f;
-
- f = open_exec(e->interpreter);
- if (IS_ERR(f)) {
- err = PTR_ERR(f);
- pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
- simple_release_fs(&bm_mnt, &entry_count);
- iput(inode);
- inode = NULL;
- goto out2;
- }
- e->interp_file = f;
- }
-
e->dentry = dget(dentry);
inode->i_private = e;
inode->i_fop = &bm_entry_operations;
@@ -754,6 +751,8 @@ out:
inode_unlock(d_inode(root));
if (err) {
+ if (f)
+ filp_close(f, NULL);
kfree(e);
return err;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 61949e3446e5..a56974d04010 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1439,10 +1439,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
*/
if (!for_part) {
ret = devcgroup_inode_permission(bdev->bd_inode, perm);
- if (ret != 0) {
- bdput(bdev);
+ if (ret != 0)
return ret;
- }
}
restart:
@@ -1515,8 +1513,10 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
BUG_ON(for_part);
ret = __blkdev_get(whole, mode, 1);
- if (ret)
+ if (ret) {
+ bdput(whole);
goto out_clear;
+ }
bdev->bd_contains = whole;
bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
@@ -1570,7 +1570,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
put_disk(disk);
module_put(owner);
out:
- bdput(bdev);
return ret;
}
@@ -1656,6 +1655,9 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
bdput(whole);
}
+ if (res)
+ bdput(bdev);
+
return res;
}
EXPORT_SYMBOL(blkdev_get);
@@ -1775,6 +1777,16 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
+ /*
+ * Sync early if it looks like we're the last one. If someone else
+ * opens the block device between now and the decrement of bd_openers
+ * then we did a sync that we didn't need to, but that's not the end
+ * of the world and we want to avoid long (could be several minute)
+ * syncs while holding the mutex.
+ */
+ if (bdev->bd_openers == 1)
+ sync_blockdev(bdev);
+
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part)
bdev->bd_part_count--;
@@ -1894,6 +1906,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
struct blk_plug plug;
+ size_t shorted = 0;
ssize_t ret;
if (bdev_read_only(I_BDEV(bd_inode)))
@@ -1908,12 +1921,17 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
return -EOPNOTSUPP;
- iov_iter_truncate(from, size - iocb->ki_pos);
+ size -= iocb->ki_pos;
+ if (iov_iter_count(from) > size) {
+ shorted = iov_iter_count(from) - size;
+ iov_iter_truncate(from, size);
+ }
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
+ iov_iter_reexpand(from, iov_iter_count(from) + shorted);
blk_finish_plug(&plug);
return ret;
}
@@ -1925,13 +1943,21 @@ ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
loff_t pos = iocb->ki_pos;
+ size_t shorted = 0;
+ ssize_t ret;
if (pos >= size)
return 0;
size -= pos;
- iov_iter_truncate(to, size);
- return generic_file_read_iter(iocb, to);
+ if (iov_iter_count(to) > size) {
+ shorted = iov_iter_count(to) - size;
+ iov_iter_truncate(to, size);
+ }
+
+ ret = generic_file_read_iter(iocb, to);
+ iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+ return ret;
}
EXPORT_SYMBOL_GPL(blkdev_read_iter);
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index a26c63b4ad68..9dd07eb88455 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -11,6 +11,8 @@ config BTRFS_FS
select RAID6_PQ
select XOR_BLOCKS
select SRCU
+ depends on !PPC_256K_PAGES # powerpc
+ depends on !PAGE_SIZE_256KB # hexagon
help
Btrfs is a general purpose copy-on-write filesystem with extents,
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 92615badc173..3dfe8d35235e 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -283,6 +283,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq,
ordered_list);
if (!test_bit(WORK_DONE_BIT, &work->flags))
break;
+ /*
+ * Orders all subsequent loads after reading WORK_DONE_BIT,
+ * paired with the smp_mb__before_atomic in btrfs_work_helper
+ * this guarantees that the ordered function will see all
+ * updates from ordinary work function.
+ */
+ smp_rmb();
/*
* we are going to call the ordered done function, but
@@ -368,6 +375,13 @@ static void normal_work_helper(struct btrfs_work *work)
thresh_exec_hook(wq);
work->func(work);
if (need_order) {
+ /*
+ * Ensures all memory accesses done in the work function are
+ * ordered before setting the WORK_DONE_BIT. Ensuring the thread
+ * which is going to executed the ordered work sees them.
+ * Pairs with the smp_rmb in run_ordered_work.
+ */
+ smp_mb__before_atomic();
set_bit(WORK_DONE_BIT, &work->flags);
run_ordered_work(wq, work);
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e4d5e6eae409..1cf75d1032e1 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1420,6 +1420,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
ulist_free(*roots);
+ *roots = NULL;
return ret;
}
node = ulist_next(tmp, &uiter);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ccd9c709375e..24341c97c13f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -286,7 +286,7 @@ static void end_compressed_bio_write(struct bio *bio)
cb->start,
cb->start + cb->len - 1,
NULL,
- bio->bi_status ? 0 : 1);
+ !cb->errors);
cb->compressed_pages[0]->mapping = NULL;
end_compressed_writeback(inode, cb);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f5a8c0d26cf3..5f7eea3fa1c6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -282,9 +282,12 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
ret = btrfs_inc_ref(trans, root, cow, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
-
- if (ret)
+ if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
+ btrfs_abort_transaction(trans, ret);
return ret;
+ }
btrfs_mark_buffer_dirty(cow);
*cow_ret = cow;
@@ -1130,6 +1133,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1137,6 +1142,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1168,6 +1175,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (last_ref) {
ret = tree_mod_log_free_eb(fs_info, buf);
if (ret) {
+ btrfs_tree_unlock(cow);
+ free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
@@ -1367,7 +1376,8 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
- extent_buffer_get(eb_rewin);
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
+ eb_rewin, btrfs_header_level(eb_rewin));
btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
@@ -1421,8 +1431,30 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
"failed to read tree block %llu from get_old_root",
logical);
} else {
+ struct tree_mod_elem *tm2;
+
+ btrfs_tree_read_lock(old);
eb = btrfs_clone_extent_buffer(old);
+ /*
+ * After the lookup for the most recent tree mod operation
+ * above and before we locked and cloned the extent buffer
+ * 'old', a new tree mod log operation may have been added.
+ * So lookup for a more recent one to make sure the number
+ * of mod log operations we replay is consistent with the
+ * number of items we have in the cloned extent buffer,
+ * otherwise we can hit a BUG_ON when rewinding the extent
+ * buffer.
+ */
+ tm2 = tree_mod_log_search(fs_info, logical, time_seq);
+ btrfs_tree_read_unlock(old);
free_extent_buffer(old);
+ ASSERT(tm2);
+ ASSERT(tm2 == tm || tm2->seq > tm->seq);
+ if (!tm2 || tm2->seq < tm->seq) {
+ free_extent_buffer(eb);
+ return NULL;
+ }
+ tm = tm2;
}
} else if (old_root) {
eb_root_owner = btrfs_header_owner(eb_root);
@@ -1438,8 +1470,6 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (!eb)
return NULL;
- extent_buffer_get(eb);
- btrfs_tree_read_lock(eb);
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
@@ -1447,6 +1477,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
+ btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
+ btrfs_header_level(eb));
+ btrfs_tree_read_lock(eb);
if (tm)
__tree_mod_log_rewind(fs_info, eb, time_seq, tm);
else
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5412b12491cb..19a668e9164b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1257,6 +1257,7 @@ struct btrfs_root {
int send_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshotted;
+ atomic_t snapshot_force_cow;
/* For qgroup metadata space reserve */
atomic64_t qgroup_meta_rsv;
@@ -3262,6 +3263,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
unsigned long new_flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid);
static inline __printf(2, 3)
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 416fb50a5378..9f276d1dd29c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1064,12 +1064,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
nofs_flag = memalloc_nofs_save();
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
memalloc_nofs_restore(nofs_flag);
- if (ret > 0) {
- btrfs_release_path(path);
- return -ENOENT;
- } else if (ret < 0) {
- return ret;
- }
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ goto out;
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
@@ -1107,6 +1105,14 @@ err_out:
btrfs_delayed_inode_release_metadata(fs_info, node);
btrfs_release_delayed_inode(node);
+ /*
+ * If we fail to update the delayed inode we need to abort the
+ * transaction, because we could leave the inode with the improper
+ * counts behind.
+ */
+ if (ret && ret != -ENOENT)
+ btrfs_abort_transaction(trans, ret);
+
return ret;
search:
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 096c015b22a4..41ebc613ca4c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1200,6 +1200,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
refcount_set(&root->refs, 1);
atomic_set(&root->will_be_snapshotted, 0);
atomic64_set(&root->qgroup_meta_rsv, 0);
+ atomic_set(&root->snapshot_force_cow, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
@@ -1509,9 +1510,16 @@ int btrfs_init_fs_root(struct btrfs_root *root)
spin_lock_init(&root->ino_cache_lock);
init_waitqueue_head(&root->ino_cache_wait);
- ret = get_anon_bdev(&root->anon_dev);
- if (ret)
- goto fail;
+ /*
+ * Don't assign anonymous block device to roots that are not exposed to
+ * userspace, the id pool is limited to 1M
+ */
+ if (is_fstree(root->root_key.objectid) &&
+ btrfs_root_refs(&root->root_item) > 0) {
+ ret = get_anon_bdev(&root->anon_dev);
+ if (ret)
+ goto fail;
+ }
mutex_lock(&root->objectid_mutex);
ret = btrfs_find_highest_objectid(root,
@@ -3331,11 +3339,23 @@ static void btrfs_end_empty_barrier(struct bio *bio)
*/
static void write_dev_flush(struct btrfs_device *device)
{
- struct request_queue *q = bdev_get_queue(device->bdev);
struct bio *bio = device->flush_bio;
+#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ /*
+ * When a disk has write caching disabled, we skip submission of a bio
+ * with flush and sync requests before writing the superblock, since
+ * it's not needed. However when the integrity checker is enabled, this
+ * results in reports that there are metadata blocks referred by a
+ * superblock that were not properly flushed. So don't skip the bio
+ * submission only when the integrity checker is enabled for the sake
+ * of simplicity, since this is a debug tool and not meant for use in
+ * non-debug builds.
+ */
+ struct request_queue *q = bdev_get_queue(device->bdev);
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
return;
+#endif
bio_reset(bio);
bio->bi_end_io = btrfs_end_empty_barrier;
@@ -4337,6 +4357,7 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache)
cache->io_ctl.inode = NULL;
iput(inode);
}
+ ASSERT(cache->io_ctl.pages == NULL);
btrfs_put_block_group(cache);
}
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 3aeb5770f896..b6ce765aa7f3 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -56,9 +56,9 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
return type;
}
-static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
- u64 root_objectid, u32 generation,
- int check_generation)
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation,
+ int check_generation)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root;
@@ -151,7 +151,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
}
-static struct dentry *btrfs_get_parent(struct dentry *child)
+struct dentry *btrfs_get_parent(struct dentry *child)
{
struct inode *dir = d_inode(child);
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
diff --git a/fs/btrfs/export.h b/fs/btrfs/export.h
index 91b3908e7c54..15db02462141 100644
--- a/fs/btrfs/export.h
+++ b/fs/btrfs/export.h
@@ -17,4 +17,9 @@ struct btrfs_fid {
u64 parent_root_objectid;
} __attribute__ ((packed));
+struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
+ u64 root_objectid, u32 generation,
+ int check_generation);
+struct dentry *btrfs_get_parent(struct dentry *child);
+
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 51e26f90f0bb..00481cfe6cfc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1170,12 +1170,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
ASSERT(eb->fs_info);
/*
- * Every shared one has parent tree
- * block, which must be aligned to
- * nodesize.
+ * Every shared one has parent tree block,
+ * which must be aligned to sector size.
*/
if (offset &&
- IS_ALIGNED(offset, eb->fs_info->nodesize))
+ IS_ALIGNED(offset, eb->fs_info->sectorsize))
return type;
}
} else if (is_data == BTRFS_REF_TYPE_DATA) {
@@ -1184,12 +1183,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
if (type == BTRFS_SHARED_DATA_REF_KEY) {
ASSERT(eb->fs_info);
/*
- * Every shared one has parent tree
- * block, which must be aligned to
- * nodesize.
+ * Every shared one has parent tree block,
+ * which must be aligned to sector size.
*/
if (offset &&
- IS_ALIGNED(offset, eb->fs_info->nodesize))
+ IS_ALIGNED(offset, eb->fs_info->sectorsize))
return type;
}
} else {
@@ -1199,8 +1197,9 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
}
btrfs_print_leaf((struct extent_buffer *)eb);
- btrfs_err(eb->fs_info, "eb %llu invalid extent inline ref type %d",
- eb->start, type);
+ btrfs_err(eb->fs_info,
+ "eb %llu iref 0x%lx invalid extent inline ref type %d",
+ eb->start, (unsigned long)iref, type);
WARN_ON(1);
return BTRFS_REF_TYPE_INVALID;
@@ -9365,8 +9364,6 @@ out:
*/
if (!for_reloc && root_dropped == false)
btrfs_add_dead_root(root);
- if (err && err != -EAGAIN)
- btrfs_handle_fs_error(fs_info, err, NULL);
return err;
}
@@ -10554,7 +10551,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
- goto out_put_group;
+ goto out;
}
/*
@@ -10591,7 +10588,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
- goto out_put_group;
+ goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
@@ -10614,13 +10611,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
- goto out_put_group;
+ goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
- goto out_put_group;
+ goto out;
btrfs_release_path(path);
}
@@ -10629,6 +10626,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
+ /* Once for the block groups rbtree */
+ btrfs_put_block_group(block_group);
+
if (fs_info->first_logical_byte == block_group->key.objectid)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -10778,10 +10778,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = remove_block_group_free_space(trans, fs_info, block_group);
if (ret)
- goto out_put_group;
-
- /* Once for the block groups rbtree */
- btrfs_put_block_group(block_group);
+ goto out;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
@@ -10791,10 +10788,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
ret = btrfs_del_item(trans, root, path);
-out_put_group:
+out:
/* Once for the lookup reference */
btrfs_put_block_group(block_group);
-out:
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a8be9478ca3e..fd56c22c12a0 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1721,7 +1721,8 @@ static int __process_pages_contig(struct address_space *mapping,
if (!PageDirty(pages[i]) ||
pages[i]->mapping != mapping) {
unlock_page(pages[i]);
- put_page(pages[i]);
+ for (; i < ret; i++)
+ put_page(pages[i]);
err = -EAGAIN;
goto out;
}
@@ -3868,6 +3869,10 @@ retry:
if (!ret) {
free_extent_buffer(eb);
continue;
+ } else if (ret < 0) {
+ done = 1;
+ free_extent_buffer(eb);
+ break;
}
ret = write_one_eb(eb, fs_info, wbc, &epd);
@@ -4322,6 +4327,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
/* once for us */
free_extent_map(em);
+
+ cond_resched(); /* Allow large-extent preemption. */
}
}
return try_release_extent_state(map, tree, page, mask);
@@ -4862,25 +4869,28 @@ struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
- /* the ref bit is tricky. We have to make sure it is set
- * if we have the buffer dirty. Otherwise the
- * code to free a buffer can end up dropping a dirty
- * page
+ /*
+ * The TREE_REF bit is first set when the extent_buffer is added
+ * to the radix tree. It is also reset, if unset, when a new reference
+ * is created by find_extent_buffer.
*
- * Once the ref bit is set, it won't go away while the
- * buffer is dirty or in writeback, and it also won't
- * go away while we have the reference count on the
- * eb bumped.
+ * It is only cleared in two cases: freeing the last non-tree
+ * reference to the extent_buffer when its STALE bit is set or
+ * calling releasepage when the tree reference is the only reference.
*
- * We can't just set the ref bit without bumping the
- * ref on the eb because free_extent_buffer might
- * see the ref bit and try to clear it. If this happens
- * free_extent_buffer might end up dropping our original
- * ref by mistake and freeing the page before we are able
- * to add one more ref.
+ * In both cases, care is taken to ensure that the extent_buffer's
+ * pages are not under io. However, releasepage can be concurrently
+ * called with creating new references, which is prone to race
+ * conditions between the calls to check_buffer_tree_ref in those
+ * codepaths and clearing TREE_REF in try_release_extent_buffer.
*
- * So bump the ref count first, then set the bit. If someone
- * beat us to it, drop the ref we added.
+ * The actual lifetime of the extent_buffer in the radix tree is
+ * adequately protected by the refcount, but the TREE_REF bit and
+ * its corresponding reference are not. To protect against this
+ * class of races, we call check_buffer_tree_ref from the codepaths
+ * which trigger io after they set eb->io_pages. Note that once io is
+ * initiated, TREE_REF can no longer be cleared, so that is the
+ * moment at which any such race is best fixed.
*/
refs = atomic_read(&eb->refs);
if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
@@ -5344,6 +5354,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
+ /*
+ * It is possible for releasepage to clear the TREE_REF bit before we
+ * set io_pages. See check_buffer_tree_ref for a more detailed comment.
+ */
+ check_buffer_tree_ref(eb);
for (i = 0; i < num_pages; i++) {
page = eb->pages[i];
@@ -5437,9 +5452,9 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
}
}
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dstv,
- unsigned long start, unsigned long len)
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dstv,
+ unsigned long start, unsigned long len)
{
size_t cur;
size_t offset;
@@ -5460,7 +5475,7 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
- if (copy_to_user(dst, kaddr + offset, cur)) {
+ if (probe_user_write(dst, kaddr + offset, cur)) {
ret = -EFAULT;
break;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index e5535bbe6953..90c5095ae97e 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -455,9 +455,9 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
void read_extent_buffer(const struct extent_buffer *eb, void *dst,
unsigned long start,
unsigned long len);
-int read_extent_buffer_to_user(const struct extent_buffer *eb,
- void __user *dst, unsigned long start,
- unsigned long len);
+int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
+ void __user *dst, unsigned long start,
+ unsigned long len);
void write_extent_buffer_fsid(struct extent_buffer *eb, const void *src);
void write_extent_buffer_chunk_tree_uuid(struct extent_buffer *eb,
const void *src);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 717d82d51bb1..684517086138 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -599,7 +599,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
u64 end_byte = bytenr + len;
u64 csum_end;
struct extent_buffer *leaf;
- int ret;
+ int ret = 0;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int blocksize_bits = fs_info->sb->s_blocksize_bits;
@@ -615,6 +615,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
+ ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
@@ -671,7 +672,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, root, path,
path->slots[0], del_nr);
if (ret)
- goto out;
+ break;
if (key.offset == bytenr)
break;
} else if (key.offset < bytenr && csum_end > end_byte) {
@@ -715,8 +716,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
ret = btrfs_split_item(trans, root, path, &key, offset);
if (ret && ret != -EAGAIN) {
btrfs_abort_transaction(trans, ret);
- goto out;
+ break;
}
+ ret = 0;
key.offset = end_byte - 1;
} else {
@@ -726,8 +728,6 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
}
- ret = 0;
-out:
btrfs_free_path(path);
return ret;
}
@@ -795,10 +795,12 @@ again:
nritems = btrfs_header_nritems(path->nodes[0]);
if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
- if (ret == 1)
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
found_next = 1;
- if (ret != 0)
goto insert;
+ }
slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 725544ec9c84..2f386d8dbd0e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1100,7 +1100,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int del_nr = 0;
int del_slot = 0;
int recow;
- int ret;
+ int ret = 0;
u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
@@ -1320,7 +1320,7 @@ again:
}
out:
btrfs_free_path(path);
- return 0;
+ return ret;
}
/*
@@ -2102,53 +2102,12 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
+
/*
- * We might have have had more pages made dirty after calling
- * start_ordered_ops and before acquiring the inode's i_mutex.
+ * We have to do this here to avoid the priority inversion of waiting on
+ * IO of a lower priority task while holding a transaciton open.
*/
- if (full_sync) {
- /*
- * For a full sync, we need to make sure any ordered operations
- * start and finish before we start logging the inode, so that
- * all extents are persisted and the respective file extent
- * items are in the fs/subvol btree.
- */
- ret = btrfs_wait_ordered_range(inode, start, len);
- } else {
- /*
- * Start any new ordered operations before starting to log the
- * inode. We will wait for them to finish in btrfs_sync_log().
- *
- * Right before acquiring the inode's mutex, we might have new
- * writes dirtying pages, which won't immediately start the
- * respective ordered operations - that is done through the
- * fill_delalloc callbacks invoked from the writepage and
- * writepages address space operations. So make sure we start
- * all ordered operations before starting to log our inode. Not
- * doing this means that while logging the inode, writeback
- * could start and invoke writepage/writepages, which would call
- * the fill_delalloc callbacks (cow_file_range,
- * submit_compressed_extents). These callbacks add first an
- * extent map to the modified list of extents and then create
- * the respective ordered operation, which means in
- * tree-log.c:btrfs_log_inode() we might capture all existing
- * ordered operations (with btrfs_get_logged_extents()) before
- * the fill_delalloc callback adds its ordered operation, and by
- * the time we visit the modified list of extent maps (with
- * btrfs_log_changed_extents()), we see and process the extent
- * map they created. We then use the extent map to construct a
- * file extent item for logging without waiting for the
- * respective ordered operation to finish - this file extent
- * item points to a disk location that might not have yet been
- * written to, containing random data - so after a crash a log
- * replay will make our inode have file extent items that point
- * to disk locations containing invalid data, as we returned
- * success to userspace without waiting for the respective
- * ordered operation to finish, because it wasn't captured by
- * btrfs_get_logged_extents().
- */
- ret = start_ordered_ops(inode, start, end);
- }
+ ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
@@ -2283,13 +2242,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
}
- if (!full_sync) {
- ret = btrfs_wait_ordered_range(inode, start, len);
- if (ret) {
- btrfs_end_transaction(trans);
- goto out;
- }
- }
ret = btrfs_commit_transaction(trans);
} else {
ret = btrfs_end_transaction(trans);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index abeb26d48d0a..b272299afb67 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -759,8 +759,10 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
while (num_entries) {
e = kmem_cache_zalloc(btrfs_free_space_cachep,
GFP_NOFS);
- if (!e)
+ if (!e) {
+ ret = -ENOMEM;
goto free_cache;
+ }
ret = io_ctl_read_entry(&io_ctl, e, &type);
if (ret) {
@@ -769,6 +771,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
}
if (!e->bytes) {
+ ret = -1;
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
@@ -788,6 +791,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
num_bitmaps--;
e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
if (!e->bitmap) {
+ ret = -ENOMEM;
kmem_cache_free(
btrfs_free_space_cachep, e);
goto free_cache;
@@ -1169,7 +1173,6 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
ret = update_cache_item(trans, root, inode, path, offset,
io_ctl->entries, io_ctl->bitmaps);
out:
- io_ctl_free(io_ctl);
if (ret) {
invalidate_inode_pages2(inode->i_mapping);
BTRFS_I(inode)->generation = 0;
@@ -1334,6 +1337,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
* them out later
*/
io_ctl_drop_pages(io_ctl);
+ io_ctl_free(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state, GFP_NOFS);
@@ -2169,7 +2173,7 @@ out:
static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, bool update_stat)
{
- struct btrfs_free_space *left_info;
+ struct btrfs_free_space *left_info = NULL;
struct btrfs_free_space *right_info;
bool merged = false;
u64 offset = info->offset;
@@ -2184,7 +2188,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
if (right_info && rb_prev(&right_info->offset_index))
left_info = rb_entry(rb_prev(&right_info->offset_index),
struct btrfs_free_space, offset_index);
- else
+ else if (!right_info)
left_info = tree_search_offset(ctl, offset - 1, 0, 0);
if (right_info && !right_info->bitmap) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2a196bb134d9..692d0d71e8c5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -629,7 +629,21 @@ cont:
btrfs_free_reserved_data_space_noquota(inode,
start,
end - start + 1);
- goto free_pages_out;
+
+ /*
+ * Ensure we only free the compressed pages if we have
+ * them allocated, as we can still reach here with
+ * inode_need_compress() == false.
+ */
+ if (pages) {
+ for (i = 0; i < nr_pages; i++) {
+ WARN_ON(pages[i]->mapping);
+ put_page(pages[i]);
+ }
+ kfree(pages);
+ }
+
+ return;
}
}
@@ -708,13 +722,6 @@ cleanup_and_bail_uncompressed:
*num_added += 1;
return;
-
-free_pages_out:
- for (i = 0; i < nr_pages; i++) {
- WARN_ON(pages[i]->mapping);
- put_page(pages[i]);
- }
- kfree(pages);
}
static void free_async_extent_pages(struct async_extent *async_extent)
@@ -978,8 +985,8 @@ static noinline int cow_file_range(struct inode *inode,
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
- u64 disk_num_bytes;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -996,7 +1003,6 @@ static noinline int cow_file_range(struct inode *inode,
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
- disk_num_bytes = num_bytes;
inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
@@ -1023,17 +1029,32 @@ static noinline int cow_file_range(struct inode *inode,
}
}
- BUG_ON(disk_num_bytes >
- btrfs_super_total_bytes(fs_info->super_copy));
+ BUG_ON(num_bytes > btrfs_super_total_bytes(fs_info->super_copy));
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
- while (disk_num_bytes > 0) {
- cur_alloc_size = disk_num_bytes;
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
+ while (num_bytes > 0) {
+ cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1097,11 +1118,10 @@ static noinline int cow_file_range(struct inode *inode,
delalloc_end, locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
- if (disk_num_bytes < cur_alloc_size)
- disk_num_bytes = 0;
+ if (num_bytes < cur_alloc_size)
+ num_bytes = 0;
else
- disk_num_bytes -= cur_alloc_size;
- num_bytes -= cur_alloc_size;
+ num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
extent_reserved = false;
@@ -1139,8 +1159,8 @@ out_unlock:
*/
if (extent_reserved) {
extent_clear_unlock_delalloc(inode, start,
- start + cur_alloc_size,
- start + cur_alloc_size,
+ start + cur_alloc_size - 1,
+ start + cur_alloc_size - 1,
locked_page,
clear_bits,
page_ops);
@@ -1315,7 +1335,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 disk_num_bytes;
u64 ram_bytes;
int extent_type;
- int ret, err;
+ int ret;
int type;
int nocow;
int check_prev = 1;
@@ -1440,11 +1460,8 @@ next_slot:
* if there are pending snapshots for this root,
* we fall into common COW way.
*/
- if (!nolock) {
- err = btrfs_start_write_no_snapshotting(root);
- if (!err)
- goto out_check;
- }
+ if (!nolock && atomic_read(&root->snapshot_force_cow))
+ goto out_check;
/*
* force cow if csum exists in the range.
* this ensure that csum for a given extent are
@@ -1453,9 +1470,6 @@ next_slot:
ret = csum_exist_in_range(fs_info, disk_bytenr,
num_bytes);
if (ret) {
- if (!nolock)
- btrfs_end_write_no_snapshotting(root);
-
/*
* ret could be -EIO if the above fails to read
* metadata.
@@ -1468,11 +1482,8 @@ next_slot:
WARN_ON_ONCE(nolock);
goto out_check;
}
- if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
- if (!nolock)
- btrfs_end_write_no_snapshotting(root);
+ if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
- }
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
@@ -1485,8 +1496,6 @@ next_slot:
out_check:
if (extent_end <= start) {
path->slots[0]++;
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
goto next_slot;
@@ -1508,8 +1517,6 @@ out_check:
end, page_started, nr_written, 1,
NULL);
if (ret) {
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info,
disk_bytenr);
@@ -1529,8 +1536,6 @@ out_check:
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info,
disk_bytenr);
@@ -1569,8 +1574,6 @@ out_check:
EXTENT_CLEAR_DATA_RESV,
PAGE_UNLOCK | PAGE_SET_PRIVATE2);
- if (!nolock && nocow)
- btrfs_end_write_no_snapshotting(root);
cur_offset = extent_end;
/*
@@ -5581,11 +5584,14 @@ no_delete:
}
/*
- * this returns the key found in the dir entry in the location pointer.
- * If no dir entries were found, location->objectid is 0.
+ * Return the key found in the dir entry in the location pointer, fill @type
+ * with BTRFS_FT_*, and return 0.
+ *
+ * If no dir entries were found, returns -ENOENT.
+ * If found a corrupted location in dir entry, returns -EUCLEAN.
*/
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
- struct btrfs_key *location)
+ struct btrfs_key *location, u8 *type)
{
const char *name = dentry->d_name.name;
int namelen = dentry->d_name.len;
@@ -5600,27 +5606,29 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
name, namelen, 0);
- if (IS_ERR(di))
+ if (!di) {
+ ret = -ENOENT;
+ goto out;
+ }
+ if (IS_ERR(di)) {
ret = PTR_ERR(di);
-
- if (IS_ERR_OR_NULL(di))
- goto out_err;
+ goto out;
+ }
btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
if (location->type != BTRFS_INODE_ITEM_KEY &&
location->type != BTRFS_ROOT_ITEM_KEY) {
+ ret = -EUCLEAN;
btrfs_warn(root->fs_info,
"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
__func__, name, btrfs_ino(BTRFS_I(dir)),
location->objectid, location->type, location->offset);
- goto out_err;
}
+ if (!ret)
+ *type = btrfs_dir_type(path->nodes[0], di);
out:
btrfs_free_path(path);
return ret;
-out_err:
- location->objectid = 0;
- goto out;
}
/*
@@ -5904,6 +5912,11 @@ static struct inode *new_simple_dir(struct super_block *s,
return inode;
}
+static inline u8 btrfs_inode_type(struct inode *inode)
+{
+ return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
+}
+
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
@@ -5911,21 +5924,31 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *sub_root = root;
struct btrfs_key location;
+ u8 di_type = 0;
int index;
int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ret = btrfs_inode_by_name(dir, dentry, &location);
+ ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
if (ret < 0)
return ERR_PTR(ret);
- if (location.objectid == 0)
- return ERR_PTR(-ENOENT);
-
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+ if (IS_ERR(inode))
+ return inode;
+
+ /* Do extra check against inode mode with di_type */
+ if (btrfs_inode_type(inode) != di_type) {
+ btrfs_crit(fs_info,
+"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
+ inode->i_mode, btrfs_inode_type(inode),
+ di_type);
+ iput(inode);
+ return ERR_PTR(-EUCLEAN);
+ }
return inode;
}
@@ -6543,11 +6566,6 @@ fail:
return ERR_PTR(ret);
}
-static inline u8 btrfs_inode_type(struct inode *inode)
-{
- return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
-}
-
/*
* utility function to add 'inode' into 'parent_inode' with
* a give name and a given sequence number.
@@ -7159,6 +7177,14 @@ again:
extent_start = found_key.offset;
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ /* Only regular file could have regular/prealloc extent */
+ if (!S_ISREG(inode->vfs_inode.i_mode)) {
+ err = -EUCLEAN;
+ btrfs_crit(fs_info,
+ "regular/prealloc extent found for non-regular inode %llu",
+ btrfs_ino(inode));
+ goto out;
+ }
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, item);
@@ -8707,7 +8733,6 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
/* bio split */
ASSERT(map_length <= INT_MAX);
- atomic_inc(&dip->pending_bios);
do {
clone_len = min_t(int, submit_len, map_length);
@@ -8758,7 +8783,8 @@ submit:
if (!status)
return 0;
- bio_put(bio);
+ if (bio != orig_bio)
+ bio_put(bio);
out_err:
dip->errors = 1;
/*
@@ -8798,7 +8824,7 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
bio->bi_private = dip;
dip->orig_bio = bio;
dip->dio_bio = dio_bio;
- atomic_set(&dip->pending_bios, 0);
+ atomic_set(&dip->pending_bios, 1);
io_bio = btrfs_io_bio(bio);
io_bio->logical = file_offset;
@@ -8947,9 +8973,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
- goto out;
}
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
offset, count);
@@ -9187,20 +9210,17 @@ again:
/*
* Qgroup reserved space handler
* Page here will be either
- * 1) Already written to disk
- * In this case, its reserved space is released from data rsv map
- * and will be freed by delayed_ref handler finally.
- * So even we call qgroup_free_data(), it won't decrease reserved
- * space.
- * 2) Not written to disk
- * This means the reserved space should be freed here. However,
- * if a truncate invalidates the page (by clearing PageDirty)
- * and the page is accounted for while allocating extent
- * in btrfs_check_data_free_space() we let delayed_ref to
- * free the entire extent.
+ * 1) Already written to disk or ordered extent already submitted
+ * Then its QGROUP_RESERVED bit in io_tree is already cleaned.
+ * Qgroup will be handled by its qgroup_record then.
+ * btrfs_qgroup_free_data() call will do nothing here.
+ *
+ * 2) Not written to disk yet
+ * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
+ * bit of its io_tree, and free the qgroup reserved data space.
+ * Since the IO will never happen for this page.
*/
- if (PageDirty(page))
- btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -9813,8 +9833,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
bool root_log_pinned = false;
bool dest_log_pinned = false;
- /* we only allow rename subvolume link between subvolumes */
- if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+ /*
+ * For non-subvolumes allow exchange only within one subvolume, in the
+ * same inode namespace. Two subvolumes (represented as directory) can
+ * be exchanged as they're a logical link and have a fixed inode number.
+ */
+ if (root != dest &&
+ (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino != BTRFS_FIRST_FREE_OBJECTID))
return -EXDEV;
/* close the race window with snapshot create/destroy ioctl */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e82b4f3f490c..fa64a1127c97 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -527,8 +527,6 @@ static noinline int create_subvol(struct inode *dir,
btrfs_set_root_otransid(root_item, trans->transid);
btrfs_tree_unlock(leaf);
- free_extent_buffer(leaf);
- leaf = NULL;
btrfs_set_root_dirid(root_item, new_dirid);
@@ -537,8 +535,22 @@ static noinline int create_subvol(struct inode *dir,
key.type = BTRFS_ROOT_ITEM_KEY;
ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
root_item);
- if (ret)
+ if (ret) {
+ /*
+ * Since we don't abort the transaction in this case, free the
+ * tree block so that we don't leak space and leave the
+ * filesystem in an inconsistent state (an extent item in the
+ * extent tree without backreferences). Also no need to have
+ * the tree block locked since it is not in any tree at this
+ * point, so no other task can find it and use it.
+ */
+ btrfs_free_tree_block(trans, root, leaf, 0, 1);
+ free_extent_buffer(leaf);
goto fail;
+ }
+
+ free_extent_buffer(leaf);
+ leaf = NULL;
key.offset = (u64)-1;
new_root = btrfs_read_fs_root_no_name(fs_info, &key);
@@ -655,6 +667,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
int ret;
+ bool snapshot_force_cow = false;
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
return -EINVAL;
@@ -671,6 +684,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
goto free_pending;
}
+ /*
+ * Force new buffered writes to reserve space even when NOCOW is
+ * possible. This is to avoid later writeback (running dealloc) to
+ * fallback to COW mode and unexpectedly fail with ENOSPC.
+ */
atomic_inc(&root->will_be_snapshotted);
smp_mb__after_atomic();
btrfs_wait_for_no_snapshotting_writes(root);
@@ -679,6 +697,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
goto dec_and_free;
+ /*
+ * All previous writes have started writeback in NOCOW mode, so now
+ * we force future writes to fallback to COW mode during snapshot
+ * creation.
+ */
+ atomic_inc(&root->snapshot_force_cow);
+ snapshot_force_cow = true;
+
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
@@ -744,6 +770,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
fail:
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
dec_and_free:
+ if (snapshot_force_cow)
+ atomic_dec(&root->snapshot_force_cow);
if (atomic_dec_and_test(&root->will_be_snapshotted))
wake_up_atomic_t(&root->will_be_snapshotted);
free_pending:
@@ -2021,9 +2049,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
sh.len = item_len;
sh.transid = found_transid;
- /* copy search result header */
- if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
- ret = -EFAULT;
+ /*
+ * Copy search result header. If we fault then loop again so we
+ * can fault in the pages and -EFAULT there if there's a
+ * problem. Otherwise we'll fault and then copy the buffer in
+ * properly this next time through
+ */
+ if (probe_user_write(ubuf + *sk_offset, &sh, sizeof(sh))) {
+ ret = 0;
goto out;
}
@@ -2031,10 +2064,14 @@ static noinline int copy_to_sk(struct btrfs_path *path,
if (item_len) {
char __user *up = ubuf + *sk_offset;
- /* copy the item */
- if (read_extent_buffer_to_user(leaf, up,
- item_off, item_len)) {
- ret = -EFAULT;
+ /*
+ * Copy the item, same behavior as above, but reset the
+ * * sk_offset so we copy the full thing again.
+ */
+ if (read_extent_buffer_to_user_nofault(leaf, up,
+ item_off, item_len)) {
+ ret = 0;
+ *sk_offset -= sizeof(sh);
goto out;
}
@@ -2122,6 +2159,11 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = sk->min_offset;
while (1) {
+ ret = fault_in_pages_writeable(ubuf + sk_offset,
+ *buf_size - sk_offset);
+ if (ret)
+ break;
+
ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
@@ -3833,6 +3875,8 @@ process_slot:
ret = -EINTR;
goto out;
}
+
+ cond_resched();
}
ret = 0;
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 47336d4b19d8..f96eb176166d 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -116,9 +116,10 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
* offset is supposed to be a tree block which
* must be aligned to nodesize.
*/
- if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
- pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n",
- offset, (unsigned long long)eb->fs_info->nodesize);
+ if (!IS_ALIGNED(offset, eb->fs_info->sectorsize))
+ pr_info(
+ "\t\t\t(parent %llu not aligned to sectorsize %u)\n",
+ offset, eb->fs_info->sectorsize);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -133,8 +134,9 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
* must be aligned to nodesize.
*/
if (!IS_ALIGNED(offset, eb->fs_info->nodesize))
- pr_info("\t\t\t(parent %llu is NOT ALIGNED to nodesize %llu)\n",
- offset, (unsigned long long)eb->fs_info->nodesize);
+ pr_info(
+ "\t\t\t(parent %llu not aligned to sectorsize %u)\n",
+ offset, eb->fs_info->sectorsize);
break;
default:
pr_cont("(extent %llu has INVALID ref type %d)\n",
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 39a00b57ff01..47c28983fd01 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -424,13 +424,13 @@ next2:
break;
}
out:
+ btrfs_free_path(path);
fs_info->qgroup_flags |= flags;
if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
ret >= 0)
ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
- btrfs_free_path(path);
if (ret < 0) {
ulist_free(fs_info->qgroup_ulist);
@@ -2614,6 +2614,12 @@ out:
return ret;
}
+static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
+{
+ return btrfs_fs_closing(fs_info) ||
+ test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+}
+
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
{
struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
@@ -2622,13 +2628,14 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
struct btrfs_trans_handle *trans = NULL;
int err = -ENOMEM;
int ret = 0;
+ bool stopped = false;
path = btrfs_alloc_path();
if (!path)
goto out;
err = 0;
- while (!err && !btrfs_fs_closing(fs_info)) {
+ while (!err && !(stopped = rescan_should_stop(fs_info))) {
trans = btrfs_start_transaction(fs_info->fs_root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@@ -2671,7 +2678,7 @@ out:
}
mutex_lock(&fs_info->qgroup_rescan_lock);
- if (!btrfs_fs_closing(fs_info))
+ if (!stopped)
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
if (trans) {
ret = update_qgroup_status_item(trans);
@@ -2690,7 +2697,7 @@ out:
btrfs_end_transaction(trans);
- if (btrfs_fs_closing(fs_info)) {
+ if (stopped) {
btrfs_info(fs_info, "qgroup scan paused");
} else if (err >= 0) {
btrfs_info(fs_info, "qgroup scan completed%s",
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 1e35a2327478..5873d4f1094f 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1190,22 +1190,19 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
- int p_stripe = -1;
- int q_stripe = -1;
+ bool has_qstripe;
struct bio_list bio_list;
struct bio *bio;
int ret;
bio_list_init(&bio_list);
- if (rbio->real_stripes - rbio->nr_data == 1) {
- p_stripe = rbio->real_stripes - 1;
- } else if (rbio->real_stripes - rbio->nr_data == 2) {
- p_stripe = rbio->real_stripes - 2;
- q_stripe = rbio->real_stripes - 1;
- } else {
+ if (rbio->real_stripes - rbio->nr_data == 1)
+ has_qstripe = false;
+ else if (rbio->real_stripes - rbio->nr_data == 2)
+ has_qstripe = true;
+ else
BUG();
- }
/* at this point we either have a full stripe,
* or we've read the full stripe from the drive.
@@ -1249,7 +1246,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
SetPageUptodate(p);
pointers[stripe++] = kmap(p);
- if (q_stripe != -1) {
+ if (has_qstripe) {
/*
* raid6, add the qstripe and call the
@@ -2325,8 +2322,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
- int p_stripe = -1;
- int q_stripe = -1;
+ bool has_qstripe;
struct page *p_page = NULL;
struct page *q_page = NULL;
struct bio_list bio_list;
@@ -2336,14 +2332,12 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
bio_list_init(&bio_list);
- if (rbio->real_stripes - rbio->nr_data == 1) {
- p_stripe = rbio->real_stripes - 1;
- } else if (rbio->real_stripes - rbio->nr_data == 2) {
- p_stripe = rbio->real_stripes - 2;
- q_stripe = rbio->real_stripes - 1;
- } else {
+ if (rbio->real_stripes - rbio->nr_data == 1)
+ has_qstripe = false;
+ else if (rbio->real_stripes - rbio->nr_data == 2)
+ has_qstripe = true;
+ else
BUG();
- }
if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
is_replace = 1;
@@ -2365,17 +2359,22 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
goto cleanup;
SetPageUptodate(p_page);
- if (q_stripe != -1) {
+ if (has_qstripe) {
+ /* RAID6, allocate and map temp space for the Q stripe */
q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!q_page) {
__free_page(p_page);
goto cleanup;
}
SetPageUptodate(q_page);
+ pointers[rbio->real_stripes - 1] = kmap(q_page);
}
atomic_set(&rbio->error, 0);
+ /* Map the parity stripe just once */
+ pointers[nr_data] = kmap(p_page);
+
for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
struct page *p;
void *parity;
@@ -2385,17 +2384,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
pointers[stripe] = kmap(p);
}
- /* then add the parity stripe */
- pointers[stripe++] = kmap(p_page);
-
- if (q_stripe != -1) {
-
- /*
- * raid6, add the qstripe and call the
- * library function to fill in our p/q
- */
- pointers[stripe++] = kmap(q_page);
-
+ if (has_qstripe) {
+ /* RAID6, call the library function to fill in our P/Q */
raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
pointers);
} else {
@@ -2416,12 +2406,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
for (stripe = 0; stripe < nr_data; stripe++)
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
- kunmap(p_page);
}
+ kunmap(p_page);
__free_page(p_page);
- if (q_page)
+ if (q_page) {
+ kunmap(q_page);
__free_page(q_page);
+ }
writeback:
/*
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 440c0d5d2050..3a3ef73d877e 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -456,6 +456,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
}
have_zone = 1;
}
+ if (!have_zone)
+ radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index f4397dd19583..313547442a6e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1344,9 +1344,7 @@ static void __del_reloc_root(struct btrfs_root *root)
RB_CLEAR_NODE(&node->rb_node);
}
spin_unlock(&rc->reloc_root_tree.lock);
- if (!node)
- return;
- BUG_ON((struct btrfs_root *)node->data != root);
+ ASSERT(!node || (struct btrfs_root *)node->data == root);
}
spin_lock(&fs_info->trans_lock);
@@ -1810,8 +1808,8 @@ int replace_path(struct btrfs_trans_handle *trans,
int ret;
int slot;
- BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(src->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
+ ASSERT(dest->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
last_snapshot = btrfs_root_last_snapshot(&src->root_item);
again:
@@ -1843,7 +1841,7 @@ again:
parent = eb;
while (1) {
level = btrfs_header_level(parent);
- BUG_ON(level < lowest_level);
+ ASSERT(level >= lowest_level);
ret = btrfs_bin_search(parent, &key, level, &slot);
if (ret && slot > 0)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index ca15d65a2070..ad570c3b6a0b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -35,6 +35,7 @@
#include "btrfs_inode.h"
#include "transaction.h"
#include "compression.h"
+#include "xattr.h"
/*
* Maximum number of references an extent can have in order for us to attempt to
@@ -3820,6 +3821,72 @@ static int update_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
}
/*
+ * When processing the new references for an inode we may orphanize an existing
+ * directory inode because its old name conflicts with one of the new references
+ * of the current inode. Later, when processing another new reference of our
+ * inode, we might need to orphanize another inode, but the path we have in the
+ * reference reflects the pre-orphanization name of the directory we previously
+ * orphanized. For example:
+ *
+ * parent snapshot looks like:
+ *
+ * . (ino 256)
+ * |----- f1 (ino 257)
+ * |----- f2 (ino 258)
+ * |----- d1/ (ino 259)
+ * |----- d2/ (ino 260)
+ *
+ * send snapshot looks like:
+ *
+ * . (ino 256)
+ * |----- d1 (ino 258)
+ * |----- f2/ (ino 259)
+ * |----- f2_link/ (ino 260)
+ * | |----- f1 (ino 257)
+ * |
+ * |----- d2 (ino 258)
+ *
+ * When processing inode 257 we compute the name for inode 259 as "d1", and we
+ * cache it in the name cache. Later when we start processing inode 258, when
+ * collecting all its new references we set a full path of "d1/d2" for its new
+ * reference with name "d2". When we start processing the new references we
+ * start by processing the new reference with name "d1", and this results in
+ * orphanizing inode 259, since its old reference causes a conflict. Then we
+ * move on the next new reference, with name "d2", and we find out we must
+ * orphanize inode 260, as its old reference conflicts with ours - but for the
+ * orphanization we use a source path corresponding to the path we stored in the
+ * new reference, which is "d1/d2" and not "o259-6-0/d2" - this makes the
+ * receiver fail since the path component "d1/" no longer exists, it was renamed
+ * to "o259-6-0/" when processing the previous new reference. So in this case we
+ * must recompute the path in the new reference and use it for the new
+ * orphanization operation.
+ */
+static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref)
+{
+ char *name;
+ int ret;
+
+ name = kmemdup(ref->name, ref->name_len, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ fs_path_reset(ref->full_path);
+ ret = get_cur_path(sctx, ref->dir, ref->dir_gen, ref->full_path);
+ if (ret < 0)
+ goto out;
+
+ ret = fs_path_add(ref->full_path, name, ref->name_len);
+ if (ret < 0)
+ goto out;
+
+ /* Update the reference's base name pointer. */
+ set_ref_path(ref, ref->full_path);
+out:
+ kfree(name);
+ return ret;
+}
+
+/*
* This does all the move/link/unlink/rmdir magic.
*/
static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
@@ -3949,6 +4016,12 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
struct name_cache_entry *nce;
struct waiting_dir_move *wdm;
+ if (orphanized_dir) {
+ ret = refresh_ref_path(sctx, cur);
+ if (ret < 0)
+ goto out;
+ }
+
ret = orphanize_inode(sctx, ow_inode, ow_gen,
cur->full_path);
if (ret < 0)
@@ -4005,6 +4078,17 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
if (ret < 0)
goto out;
} else {
+ /*
+ * If we previously orphanized a directory that
+ * collided with a new reference that we already
+ * processed, recompute the current path because
+ * that directory may be part of the path.
+ */
+ if (orphanized_dir) {
+ ret = refresh_ref_path(sctx, cur);
+ if (ret < 0)
+ goto out;
+ }
ret = send_unlink(sctx, cur->full_path);
if (ret < 0)
goto out;
@@ -4554,6 +4638,10 @@ static int __process_new_xattr(int num, struct btrfs_key *di_key,
struct fs_path *p;
struct posix_acl_xattr_header dummy_acl;
+ /* Capabilities are emitted by finish_inode_if_needed */
+ if (!strncmp(name, XATTR_NAME_CAPS, name_len))
+ return 0;
+
p = fs_path_alloc();
if (!p)
return -ENOMEM;
@@ -5096,6 +5184,64 @@ static int send_extent_data(struct send_ctx *sctx,
return 0;
}
+/*
+ * Search for a capability xattr related to sctx->cur_ino. If the capability is
+ * found, call send_set_xattr function to emit it.
+ *
+ * Return 0 if there isn't a capability, or when the capability was emitted
+ * successfully, or < 0 if an error occurred.
+ */
+static int send_capabilities(struct send_ctx *sctx)
+{
+ struct fs_path *fspath = NULL;
+ struct btrfs_path *path;
+ struct btrfs_dir_item *di;
+ struct extent_buffer *leaf;
+ unsigned long data_ptr;
+ char *buf = NULL;
+ int buf_len;
+ int ret = 0;
+
+ path = alloc_path_for_send();
+ if (!path)
+ return -ENOMEM;
+
+ di = btrfs_lookup_xattr(NULL, sctx->send_root, path, sctx->cur_ino,
+ XATTR_NAME_CAPS, strlen(XATTR_NAME_CAPS), 0);
+ if (!di) {
+ /* There is no xattr for this inode */
+ goto out;
+ } else if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ buf_len = btrfs_dir_data_len(leaf, di);
+
+ fspath = fs_path_alloc();
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!fspath || !buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, fspath);
+ if (ret < 0)
+ goto out;
+
+ data_ptr = (unsigned long)(di + 1) + btrfs_dir_name_len(leaf, di);
+ read_extent_buffer(leaf, buf, data_ptr, buf_len);
+
+ ret = send_set_xattr(sctx, fspath, XATTR_NAME_CAPS,
+ strlen(XATTR_NAME_CAPS), buf, buf_len);
+out:
+ kfree(buf);
+ fs_path_free(fspath);
+ btrfs_free_path(path);
+ return ret;
+}
+
static int clone_range(struct send_ctx *sctx,
struct clone_root *clone_root,
const u64 disk_byte,
@@ -5907,6 +6053,10 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
}
+ ret = send_capabilities(sctx);
+ if (ret < 0)
+ goto out;
+
/*
* If other directory inodes depended on our current directory
* inode's move/rename, now do their move/rename operations.
@@ -6562,7 +6712,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
alloc_size = sizeof(struct clone_root) * (arg->clone_sources_count + 1);
- sctx->clone_roots = kzalloc(alloc_size, GFP_KERNEL);
+ sctx->clone_roots = kvzalloc(alloc_size, GFP_KERNEL);
if (!sctx->clone_roots) {
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 17a8463ef35c..4edfd26594ed 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -939,8 +939,8 @@ out:
return error;
}
-static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
- u64 subvol_objectid)
+char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+ u64 subvol_objectid)
{
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_root *fs_root;
@@ -1221,6 +1221,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
{
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
char *compress_type;
+ const char *subvol_name;
if (btrfs_test_opt(info, DEGRADED))
seq_puts(seq, ",degraded");
@@ -1307,8 +1308,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
#endif
seq_printf(seq, ",subvolid=%llu",
BTRFS_I(d_inode(dentry))->root->root_key.objectid);
- seq_puts(seq, ",subvol=");
- seq_dentry(seq, dentry, " \t\n\\");
+ subvol_name = btrfs_get_subvol_name_from_objectid(info,
+ BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+ if (!IS_ERR(subvol_name)) {
+ seq_puts(seq, ",subvol=");
+ seq_escape(seq, subvol_name, " \t\n\\");
+ kfree(subvol_name);
+ }
return 0;
}
@@ -1427,8 +1433,8 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
goto out;
}
}
- subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
- subvol_objectid);
+ subvol_name = btrfs_get_subvol_name_from_objectid(
+ btrfs_sb(mnt->mnt_sb), subvol_objectid);
if (IS_ERR(subvol_name)) {
root = ERR_CAST(subvol_name);
subvol_name = NULL;
@@ -1778,6 +1784,14 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_scrub_cancel(fs_info);
btrfs_pause_balance(fs_info);
+ /*
+ * Pause the qgroup rescan worker if it is running. We don't want
+ * it to be still running after we are in RO mode, as after that,
+ * by the time we unmount, it might have left a transaction open,
+ * so we would leak the transaction and/or crash.
+ */
+ btrfs_qgroup_wait_for_completion(fs_info, false);
+
ret = btrfs_commit_super(fs_info);
if (ret)
goto restore;
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index f05341bda1d1..383546ff62f0 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -25,6 +25,7 @@
#include <linux/bug.h>
#include <linux/genhd.h>
#include <linux/debugfs.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "disk-io.h"
@@ -749,7 +750,9 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
{
int error = 0;
struct btrfs_device *dev;
+ unsigned int nofs_flag;
+ nofs_flag = memalloc_nofs_save();
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
struct hd_struct *disk;
struct kobject *disk_kobj;
@@ -768,6 +771,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
if (error)
break;
}
+ memalloc_nofs_restore(nofs_flag);
return error;
}
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 6c92101e8092..eb6ae2450fed 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -51,7 +51,13 @@ static struct file_system_type test_type = {
struct inode *btrfs_new_test_inode(void)
{
- return new_inode(test_mnt->mnt_sb);
+ struct inode *inode;
+
+ inode = new_inode(test_mnt->mnt_sb);
+ if (inode)
+ inode_init_owner(inode, NULL, S_IFREG);
+
+ return inode;
}
static int btrfs_init_test_fs(void)
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 8c91d03cc82d..94b139e92ff3 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -245,6 +245,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
return ret;
}
+ inode->i_mode = S_IFREG;
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a066ad581976..da40912fe086 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1319,8 +1319,10 @@ int btrfs_defrag_root(struct btrfs_root *root)
while (1) {
trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
ret = btrfs_defrag_leaves(trans, root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index bcfb7a772c8e..8f2c7aa2e91a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -901,9 +901,11 @@ out:
}
/*
- * helper function to see if a given name and sequence number found
- * in an inode back reference are already in a directory and correctly
- * point to this inode
+ * See if a given name and sequence number found in an inode back reference are
+ * already in a directory and correctly point to this inode.
+ *
+ * Returns: < 0 on error, 0 if the directory entry does not exists and 1 if it
+ * exists.
*/
static noinline int inode_in_dir(struct btrfs_root *root,
struct btrfs_path *path,
@@ -912,29 +914,35 @@ static noinline int inode_in_dir(struct btrfs_root *root,
{
struct btrfs_dir_item *di;
struct btrfs_key location;
- int match = 0;
+ int ret = 0;
di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
index, name, name_len, 0);
- if (di && !IS_ERR(di)) {
+ if (IS_ERR(di)) {
+ if (PTR_ERR(di) != -ENOENT)
+ ret = PTR_ERR(di);
+ goto out;
+ } else if (di) {
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
if (location.objectid != objectid)
goto out;
- } else
+ } else {
goto out;
- btrfs_release_path(path);
+ }
+ btrfs_release_path(path);
di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
- if (di && !IS_ERR(di)) {
- btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
- if (location.objectid != objectid)
- goto out;
- } else
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
goto out;
- match = 1;
+ } else if (di) {
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
+ if (location.objectid == objectid)
+ ret = 1;
+ }
out:
btrfs_release_path(path);
- return match;
+ return ret;
}
/*
@@ -1161,7 +1169,10 @@ next:
/* look for a conflicting sequence number */
di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
ref_index, name, namelen, 0);
- if (di && !IS_ERR(di)) {
+ if (IS_ERR(di)) {
+ if (PTR_ERR(di) != -ENOENT)
+ return PTR_ERR(di);
+ } else if (di) {
ret = drop_one_dir_item(trans, root, path, dir, di);
if (ret)
return ret;
@@ -1171,7 +1182,9 @@ next:
/* look for a conflicing name */
di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
name, namelen, 0);
- if (di && !IS_ERR(di)) {
+ if (IS_ERR(di)) {
+ return PTR_ERR(di);
+ } else if (di) {
ret = drop_one_dir_item(trans, root, path, dir, di);
if (ret)
return ret;
@@ -1314,10 +1327,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (ret)
goto out;
- /* if we already have a perfect match, we're done */
- if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
- btrfs_ino(BTRFS_I(inode)), ref_index,
- name, namelen)) {
+ ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
+ btrfs_ino(BTRFS_I(inode)), ref_index,
+ name, namelen);
+ if (ret < 0) {
+ goto out;
+ } else if (ret == 0) {
/*
* look for a conflicting back reference in the
* metadata. if we find one we have to unlink that name
@@ -1350,6 +1365,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
btrfs_update_inode(trans, root, inode);
}
+ /* Else, ret == 1, we already have a perfect match, we're done. */
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
kfree(name);
@@ -1558,6 +1574,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
if (ret == 1) {
+ ret = 0;
if (path->slots[0] == 0)
break;
path->slots[0]--;
@@ -1570,17 +1587,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
ret = btrfs_del_item(trans, root, path);
if (ret)
- goto out;
+ break;
btrfs_release_path(path);
inode = read_one_inode(root, key.offset);
- if (!inode)
- return -EIO;
+ if (!inode) {
+ ret = -EIO;
+ break;
+ }
ret = fixup_inode_link_count(trans, root, inode);
iput(inode);
if (ret)
- goto out;
+ break;
/*
* fixup on a directory may create new entries,
@@ -1589,8 +1608,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
*/
key.offset = (u64)-1;
}
- ret = 0;
-out:
btrfs_release_path(path);
return ret;
}
@@ -1629,8 +1646,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
- } else {
- BUG(); /* Logic Error */
}
iput(inode);
@@ -1726,8 +1741,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct btrfs_key log_key;
struct inode *dir;
u8 log_type;
- int exists;
- int ret = 0;
+ bool exists;
+ int ret;
bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
bool name_added = false;
@@ -1747,12 +1762,12 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
name_len);
btrfs_dir_item_key_to_cpu(eb, di, &log_key);
- exists = btrfs_lookup_inode(trans, root, path, &log_key, 0);
- if (exists == 0)
- exists = 1;
- else
- exists = 0;
+ ret = btrfs_lookup_inode(trans, root, path, &log_key, 0);
btrfs_release_path(path);
+ if (ret < 0)
+ goto out;
+ exists = (ret == 0);
+ ret = 0;
if (key->type == BTRFS_DIR_ITEM_KEY) {
dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
@@ -1767,7 +1782,14 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
ret = -EINVAL;
goto out;
}
- if (IS_ERR_OR_NULL(dst_di)) {
+
+ if (dst_di == ERR_PTR(-ENOENT))
+ dst_di = NULL;
+
+ if (IS_ERR(dst_di)) {
+ ret = PTR_ERR(dst_di);
+ goto out;
+ } else if (!dst_di) {
/* we need a sequence number to insert, so we only
* do inserts for the BTRFS_DIR_INDEX_KEY types
*/
@@ -2264,7 +2286,9 @@ again:
else {
ret = find_dir_range(log, path, dirid, key_type,
&range_start, &range_end);
- if (ret != 0)
+ if (ret < 0)
+ goto out;
+ else if (ret > 0)
break;
}
@@ -3311,11 +3335,13 @@ fail:
btrfs_free_path(path);
out_unlock:
mutex_unlock(&dir->log_mutex);
- if (ret == -ENOSPC) {
+ if (err == -ENOSPC) {
btrfs_set_log_full_commit(root->fs_info, trans);
- ret = 0;
- } else if (ret < 0)
- btrfs_abort_transaction(trans, ret);
+ err = 0;
+ } else if (err < 0 && err != -ENOENT) {
+ /* ENOENT can be returned if the entry hasn't been fsynced yet */
+ btrfs_abort_transaction(trans, err);
+ }
btrfs_end_log_trans(root);
@@ -3476,6 +3502,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
* search and this search we'll not find the key again and can just
* bail.
*/
+search:
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
if (ret != 0)
goto done;
@@ -3495,6 +3522,13 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
if (min_key.objectid != ino || min_key.type != key_type)
goto done;
+
+ if (need_resched()) {
+ btrfs_release_path(path);
+ cond_resched();
+ goto search;
+ }
+
ret = overwrite_item(trans, log, dst_path, src, i,
&min_key);
if (ret) {
@@ -3854,11 +3888,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
fs_info->csum_root,
ds + cs, ds + cs + cl - 1,
&ordered_sums, 0);
- if (ret) {
- btrfs_release_path(dst_path);
- kfree(ins_data);
- return ret;
- }
+ if (ret)
+ break;
}
}
}
@@ -3871,7 +3902,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
* we have to do this after the loop above to avoid changing the
* log tree while trying to change the log tree.
*/
- ret = 0;
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
@@ -5952,6 +5982,7 @@ next:
error:
if (wc.trans)
btrfs_end_transaction(wc.trans);
+ clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6d34842912e8..3b3c65b7d0c1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -16,6 +16,7 @@
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/buffer_head.h>
@@ -2431,9 +2432,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
tmp = btrfs_super_num_devices(fs_info->super_copy);
btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
- /* add sysfs device entry */
- btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
-
/*
* we've got more storage, clear any full flags on the space
* infos
@@ -2441,6 +2439,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
btrfs_clear_space_info_full(fs_info);
mutex_unlock(&fs_info->chunk_mutex);
+
+ /* Add sysfs device entry */
+ btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
+
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (seeding_dev) {
@@ -4174,6 +4176,7 @@ static int btrfs_uuid_scan_kthread(void *data)
goto skip;
}
update_tree:
+ btrfs_release_path(path);
if (!btrfs_is_empty_uuid(root_item.uuid)) {
ret = btrfs_uuid_tree_add(trans, fs_info,
root_item.uuid,
@@ -4199,6 +4202,7 @@ update_tree:
}
skip:
+ btrfs_release_path(path);
if (trans) {
ret = btrfs_end_transaction(trans);
trans = NULL;
@@ -4206,7 +4210,6 @@ skip:
break;
}
- btrfs_release_path(path);
if (key.offset < (u64)-1) {
key.offset++;
} else if (key.type < BTRFS_ROOT_ITEM_KEY) {
@@ -6277,8 +6280,17 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
u64 devid, u8 *dev_uuid)
{
struct btrfs_device *device;
+ unsigned int nofs_flag;
+ /*
+ * We call this under the chunk_mutex, so we want to use NOFS for this
+ * allocation, however we don't want to change btrfs_alloc_device() to
+ * always do NOFS because we use it in a lot of other GFP_KERNEL safe
+ * places.
+ */
+ nofs_flag = memalloc_nofs_save();
device = btrfs_alloc_device(NULL, &devid, dev_uuid);
+ memalloc_nofs_restore(nofs_flag);
if (IS_ERR(device))
return NULL;
@@ -6394,6 +6406,13 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
return -EIO;
}
+ if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+ (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
+ btrfs_err(fs_info,
+ "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EUCLEAN;
+ }
if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
btrfs_err(fs_info, "missing chunk type flag: 0x%llx", type);
return -EIO;
@@ -6902,6 +6921,14 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
mutex_lock(&fs_info->chunk_mutex);
/*
+ * It is possible for mount and umount to race in such a way that
+ * we execute this code path, but open_fs_devices failed to clear
+ * total_rw_bytes. We certainly want it cleared before reading the
+ * device items, so clear it here.
+ */
+ fs_info->fs_devices->total_rw_bytes = 0;
+
+ /*
* Read all device items, and then all the chunk items. All
* device items are found before any chunk item (their object id
* is smaller than the lowest possible object id for a chunk
diff --git a/fs/buffer.c b/fs/buffer.c
index cae7f24a0410..9ce03fc5eb32 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2799,16 +2799,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
- /*
- * The page may have dirty, unmapped buffers. For example,
- * they may have been added in ext3_writepage(). Make them
- * freeable here, so the page does not leak.
- */
-#if 0
- /* Not really sure about this - do we need this ? */
- if (page->mapping->a_ops->invalidatepage)
- page->mapping->a_ops->invalidatepage(page, offset);
-#endif
unlock_page(page);
return 0; /* don't care */
}
@@ -3003,12 +2993,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
- /*
- * The page may have dirty, unmapped buffers. For example,
- * they may have been added in ext3_writepage(). Make them
- * freeable here, so the page does not leak.
- */
- do_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return 0; /* don't care */
}
@@ -3250,6 +3234,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
+ /*
+ * The bh should be mapped, but it might not be if the
+ * device was hot-removed. Not much we can do but fail the I/O.
+ */
+ if (!buffer_mapped(bh)) {
+ unlock_buffer(bh);
+ return -EIO;
+ }
+
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 5e9176ec0d3a..8300e4755882 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -64,9 +64,9 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
object = container_of(op->op.object, struct cachefiles_object, fscache);
spin_lock(&object->work_lock);
list_add_tail(&monitor->op_link, &op->to_do);
+ fscache_enqueue_retrieval(op);
spin_unlock(&object->work_lock);
- fscache_enqueue_retrieval(op);
fscache_put_retrieval(op);
return 0;
}
@@ -125,7 +125,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
_debug("reissue read");
ret = bmapping->a_ops->readpage(NULL, backpage);
if (ret < 0)
- goto unlock_discard;
+ goto discard;
}
/* but the page may have been read before the monitor was installed, so
@@ -142,6 +142,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
unlock_discard:
unlock_page(backpage);
+discard:
spin_lock_irq(&object->work_lock);
list_del(&monitor->op_link);
spin_unlock_irq(&object->work_lock);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4d622654bfbc..1dba2b95fe8e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -74,10 +74,6 @@ static int ceph_set_page_dirty(struct page *page)
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_snap_context *snapc;
- int ret;
-
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
if (PageDirty(page)) {
dout("%p set_page_dirty %p idx %lu -- already dirty\n",
@@ -123,11 +119,7 @@ static int ceph_set_page_dirty(struct page *page)
page->private = (unsigned long)snapc;
SetPagePrivate(page);
- ret = __set_page_dirty_nobuffers(page);
- WARN_ON(!PageLocked(page));
- WARN_ON(!page->mapping);
-
- return ret;
+ return __set_page_dirty_nobuffers(page);
}
/*
@@ -1443,7 +1435,7 @@ static int ceph_filemap_fault(struct vm_fault *vmf)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
- loff_t off = vmf->pgoff << PAGE_SHIFT;
+ loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
int want, got, ret;
sigset_t oldset;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1b5a50848b5b..a0168d9bb85d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -929,12 +929,19 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
- struct ceph_mds_client *mdsc =
- ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+ struct ceph_mds_client *mdsc;
int removed = 0;
+ /* 'ci' being NULL means the remove have already occurred */
+ if (!ci) {
+ dout("%s: cap inode is NULL\n", __func__);
+ return;
+ }
+
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
+
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap)
@@ -1650,11 +1657,14 @@ static int __mark_caps_flushing(struct inode *inode,
* try to invalidate mapping pages without blocking.
*/
static int try_nonblocking_invalidate(struct inode *inode)
+ __releases(ci->i_ceph_lock)
+ __acquires(ci->i_ceph_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
u32 invalidating_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
+ ceph_fscache_invalidate(inode);
invalidate_mapping_pages(&inode->i_data, 0, -1);
spin_lock(&ci->i_ceph_lock);
@@ -1906,12 +1916,24 @@ ack:
if (mutex_trylock(&session->s_mutex) == 0) {
dout("inverting session/ino locks on %p\n",
session);
+ session = ceph_get_mds_session(session);
spin_unlock(&ci->i_ceph_lock);
if (took_snap_rwsem) {
up_read(&mdsc->snap_rwsem);
took_snap_rwsem = 0;
}
- mutex_lock(&session->s_mutex);
+ if (session) {
+ mutex_lock(&session->s_mutex);
+ ceph_put_mds_session(session);
+ } else {
+ /*
+ * Because we take the reference while
+ * holding the i_ceph_lock, it should
+ * never be NULL. Throw a warning if it
+ * ever is.
+ */
+ WARN_ON_ONCE(true);
+ }
goto retry;
}
}
@@ -3502,6 +3524,7 @@ retry:
WARN_ON(1);
tsession = NULL;
target = -1;
+ mutex_lock(&session->s_mutex);
}
goto retry;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6d653235e323..1f873034f469 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1728,6 +1728,7 @@ const struct file_operations ceph_file_fops = {
.mmap = ceph_mmap,
.fsync = ceph_fsync,
.lock = ceph_lock,
+ .setlease = simple_nosetlease,
.flock = ceph_flock,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5999d806de78..90db2cd07840 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1757,6 +1757,7 @@ static void ceph_invalidate_work(struct work_struct *work)
orig_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
+ ceph_fscache_invalidate(inode);
if (invalidate_inode_pages2(inode->i_mapping) < 0) {
pr_err("invalidate_pages %p fails\n", inode);
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f36ddfea4997..06109314d93c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3518,6 +3518,9 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
ceph_check_delayed_caps(mdsc);
+ if (mdsc->stopping)
+ return;
+
mutex_lock(&mdsc->mutex);
renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
renew_caps = time_after_eq(jiffies, HZ*renew_interval +
@@ -3851,7 +3854,16 @@ void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
{
dout("stop\n");
- cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
+ /*
+ * Make sure the delayed work stopped before releasing
+ * the resources.
+ *
+ * Because the cancel_delayed_work_sync() will only
+ * guarantee that the work finishes executing. But the
+ * delayed work will re-arm itself again after that.
+ */
+ flush_delayed_work(&mdsc->delayed_work);
+
if (mdsc->mdsmap)
ceph_mdsmap_destroy(mdsc->mdsmap);
kfree(mdsc->sessions);
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index a3b56544c21b..ae1f2817bd6a 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -541,8 +541,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -552,8 +552,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 1\n",
+ cls, con, tag, end);
return 0;
}
@@ -563,8 +563,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_CTX) || (con != ASN1_CON)
|| (tag != ASN1_EOC)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 0\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p exit 0\n",
+ cls, con, tag, end);
return 0;
}
@@ -575,8 +575,8 @@ decode_negTokenInit(unsigned char *security_blob, int length,
return 0;
} else if ((cls != ASN1_UNI) || (con != ASN1_CON)
|| (tag != ASN1_SEQ)) {
- cifs_dbg(FYI, "cls = %d con = %d tag = %d end = %p (%d) exit 1\n",
- cls, con, tag, end, *end);
+ cifs_dbg(FYI, "cls = %d con = %d tag = %d sequence_end = %p exit 1\n",
+ cls, con, tag, sequence_end);
return 0;
}
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index a2b2355e7f01..7932e20555d2 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -371,14 +371,9 @@ cifs_strndup_from_utf16(const char *src, const int maxlen,
if (!dst)
return NULL;
cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage,
- NO_MAP_UNI_RSVD);
+ NO_MAP_UNI_RSVD);
} else {
- len = strnlen(src, maxlen);
- len++;
- dst = kmalloc(len, GFP_KERNEL);
- if (!dst)
- return NULL;
- strlcpy(dst, src, len);
+ dst = kstrndup(src, maxlen, GFP_KERNEL);
}
return dst;
@@ -501,7 +496,13 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
else if (map_chars == SFM_MAP_UNI_RSVD) {
bool end_of_string;
- if (i == srclen - 1)
+ /**
+ * Remap spaces and periods found at the end of every
+ * component of the path. The special cases of '.' and
+ * '..' do not need to be dealt with explicitly because
+ * they are addressed in namei.c:link_path_walk().
+ **/
+ if ((i == srclen - 1) || (source[i+1] == '\\'))
end_of_string = true;
else
end_of_string = false;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c5fd5abf7206..74f405a05efc 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -214,7 +214,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
rc = server->ops->queryfs(xid, tcon, buf);
free_xid(xid);
- return 0;
+ return rc;
}
static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 600bb838c15b..f166fcb48ac0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -246,8 +246,9 @@ struct smb_version_operations {
int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *);
- void (*downgrade_oplock)(struct TCP_Server_Info *,
- struct cifsInodeInfo *, bool);
+ void (*downgrade_oplock)(struct TCP_Server_Info *server,
+ struct cifsInodeInfo *cinode, __u32 oplock,
+ unsigned int epoch, bool *purge_cache);
/* process transaction2 response */
bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
char *, int);
@@ -1092,6 +1093,8 @@ struct cifsFileInfo {
unsigned int f_flags;
bool invalidHandle:1; /* file closed via session abend */
bool oplock_break_cancelled:1;
+ unsigned int oplock_epoch; /* epoch from the lease break */
+ __u32 oplock_level; /* oplock/lease level from the lease break */
int count;
spinlock_t file_info_lock; /* protects four flag/count fields above */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
@@ -1223,7 +1226,7 @@ struct cifsInodeInfo {
unsigned int epoch; /* used to track lease state changes */
#define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */
#define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */
-#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */
+#define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */
#define CIFS_INO_DELETE_PENDING (3) /* delete pending on server */
#define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */
#define CIFS_INO_LOCK (5) /* lock bit for synchronization */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 58e7288e5151..bc49cd04b725 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -756,6 +756,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
+ cancel_delayed_work_sync(&server->echo);
+
spin_lock(&GlobalMid_Lock);
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
@@ -3077,9 +3079,10 @@ cifs_match_super(struct super_block *sb, void *data)
spin_lock(&cifs_tcp_ses_lock);
cifs_sb = CIFS_SB(sb);
tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
- if (IS_ERR(tlink)) {
+ if (tlink == NULL) {
+ /* can not match superblock if tlink were ever null */
spin_unlock(&cifs_tcp_ses_lock);
- return rc;
+ return 0;
}
tcon = tlink_tcon(tlink);
ses = tcon->ses;
@@ -4324,9 +4327,12 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
vol_info->retry = master_tcon->retry;
vol_info->nocase = master_tcon->nocase;
vol_info->local_lease = master_tcon->local_lease;
+ vol_info->resilient = master_tcon->use_resilient;
+ vol_info->persistent = master_tcon->use_persistent;
vol_info->no_linux_ext = !master_tcon->unix_ext;
vol_info->sectype = master_tcon->ses->sectype;
vol_info->sign = master_tcon->ses->sign;
+ vol_info->seal = master_tcon->seal;
rc = cifs_set_vol_auth(vol_info, master_tcon->ses);
if (rc) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 03293e543c07..b92804ff3b33 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -841,6 +841,7 @@ static int
cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
{
struct inode *inode;
+ int rc;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -850,8 +851,25 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode)))
CIFS_I(inode)->time = 0; /* force reval */
- if (cifs_revalidate_dentry(direntry))
- return 0;
+ rc = cifs_revalidate_dentry(direntry);
+ if (rc) {
+ cifs_dbg(FYI, "cifs_revalidate_dentry failed with rc=%d", rc);
+ switch (rc) {
+ case -ENOENT:
+ case -ESTALE:
+ /*
+ * Those errors mean the dentry is invalid
+ * (file was deleted or recreated)
+ */
+ return 0;
+ default:
+ /*
+ * Otherwise some unexpected error happened
+ * report it as-is to VFS layer
+ */
+ return rc;
+ }
+ }
else {
/*
* If the inode wasn't known to be a dfs entry when
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 662977b8d6ae..46e8e9324b58 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -163,6 +163,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
goto posix_open_ret;
}
} else {
+ cifs_revalidate_mapping(*pinode);
cifs_fattr_to_inode(*pinode, &fattr);
}
@@ -3496,7 +3497,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
* than it negotiated since it will refuse the read
* then.
*/
- if ((tcon->ses) && !(tcon->ses->capabilities &
+ if (!(tcon->ses->capabilities &
tcon->ses->server->vals->cap_large_files)) {
current_read_size = min_t(uint,
current_read_size, CIFSMaxBufSize);
@@ -3753,7 +3754,8 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
break;
__SetPageLocked(page);
- if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
+ rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
+ if (rc) {
__ClearPageLocked(page);
break;
}
@@ -3769,6 +3771,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
struct list_head *page_list, unsigned num_pages)
{
int rc;
+ int err = 0;
struct list_head tmplist;
struct cifsFileInfo *open_file = file->private_data;
struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
@@ -3809,7 +3812,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
* the order of declining indexes. When we put the pages in
* the rdata->pages, then we want them in increasing order.
*/
- while (!list_empty(page_list)) {
+ while (!list_empty(page_list) && !err) {
unsigned int i, nr_pages, bytes, rsize;
loff_t offset;
struct page *page, *tpage;
@@ -3832,9 +3835,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
return 0;
}
- rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
+ nr_pages = 0;
+ err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
&nr_pages, &offset, &bytes);
- if (rc) {
+ if (!nr_pages) {
add_credits_and_wake_if(server, credits, 0);
break;
}
@@ -4135,12 +4139,13 @@ void cifs_oplock_break(struct work_struct *work)
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct TCP_Server_Info *server = tcon->ses->server;
int rc = 0;
+ bool purge_cache = false;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
- server->ops->downgrade_oplock(server, cinode,
- test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
+ server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
+ cfile->oplock_epoch, &purge_cache);
if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
cifs_has_mand_locks(cinode)) {
@@ -4155,18 +4160,21 @@ void cifs_oplock_break(struct work_struct *work)
else
break_lease(inode, O_WRONLY);
rc = filemap_fdatawrite(inode->i_mapping);
- if (!CIFS_CACHE_READ(cinode)) {
+ if (!CIFS_CACHE_READ(cinode) || purge_cache) {
rc = filemap_fdatawait(inode->i_mapping);
mapping_set_error(inode->i_mapping, rc);
cifs_zap_mapping(inode);
}
cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
+ if (CIFS_CACHE_WRITE(cinode))
+ goto oplock_break_ack;
}
rc = cifs_push_locks(cfile);
if (rc)
cifs_dbg(VFS, "Push locks rc = %d\n", rc);
+oplock_break_ack:
/*
* releasing stale oplock after recent reconnect of smb session using
* a now incorrect file handle is not a data integrity issue but do
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index bdce714e9448..b76e73395299 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2210,6 +2210,15 @@ set_size_out:
if (rc == 0) {
cifsInode->server_eof = attrs->ia_size;
cifs_setsize(inode, attrs->ia_size);
+
+ /*
+ * The man page of truncate says if the size changed,
+ * then the st_ctime and st_mtime fields for the file
+ * are updated.
+ */
+ attrs->ia_ctime = attrs->ia_mtime = current_time(inode);
+ attrs->ia_valid |= ATTR_CTIME | ATTR_MTIME;
+
cifs_truncate_page(inode->i_mapping, inode->i_size);
}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 76f1649ab444..d0e024856c0d 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -473,21 +473,10 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
&pCifsInode->flags);
- /*
- * Set flag if the server downgrades the oplock
- * to L2 else clear.
- */
- if (pSMB->OplockLevel)
- set_bit(
- CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
- &pCifsInode->flags);
- else
- clear_bit(
- CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
- &pCifsInode->flags);
-
- cifs_queue_oplock_break(netfile);
+ netfile->oplock_epoch = 0;
+ netfile->oplock_level = pSMB->OplockLevel;
netfile->oplock_break_cancelled = false;
+ cifs_queue_oplock_break(netfile);
spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index aa23c00367ec..0113dba28eb0 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -602,7 +602,7 @@ sess_alloc_buffer(struct sess_data *sess_data, int wct)
return 0;
out_free_smb_buf:
- kfree(smb_buf);
+ cifs_small_buf_release(smb_buf);
sess_data->iov[0].iov_base = NULL;
sess_data->iov[0].iov_len = 0;
sess_data->buf0_type = CIFS_NO_BUFFER;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 483458340b10..9b271ae641c1 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -379,12 +379,10 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
static void
cifs_downgrade_oplock(struct TCP_Server_Info *server,
- struct cifsInodeInfo *cinode, bool set_level2)
+ struct cifsInodeInfo *cinode, __u32 oplock,
+ unsigned int epoch, bool *purge_cache)
{
- if (set_level2)
- cifs_set_oplock_level(cinode, OPLOCK_READ);
- else
- cifs_set_oplock_level(cinode, 0);
+ cifs_set_oplock_level(cinode, oplock);
}
static bool
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index ff2ad15f67d6..80339e318294 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -496,7 +496,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
cifs_dbg(FYI, "found in the open list\n");
cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
- le32_to_cpu(rsp->NewLeaseState));
+ lease_state);
if (ack_req)
cfile->oplock_break_cancelled = false;
@@ -505,17 +505,8 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
- /*
- * Set or clear flags depending on the lease state being READ.
- * HANDLE caching flag should be added when the client starts
- * to defer closing remote file handles with HANDLE leases.
- */
- if (lease_state & SMB2_LEASE_READ_CACHING_HE)
- set_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
- &cinode->flags);
- else
- clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
- &cinode->flags);
+ cfile->oplock_epoch = le16_to_cpu(rsp->Epoch);
+ cfile->oplock_level = lease_state;
cifs_queue_oplock_break(cfile);
kfree(lw);
@@ -538,7 +529,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
cifs_dbg(FYI, "found in the pending open list\n");
cifs_dbg(FYI, "lease key match, lease break 0x%x\n",
- le32_to_cpu(rsp->NewLeaseState));
+ lease_state);
open->oplock = lease_state;
}
@@ -650,18 +641,9 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
&cinode->flags);
- /*
- * Set flag if the server downgrades the oplock
- * to L2 else clear.
- */
- if (rsp->OplockLevel)
- set_bit(
- CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
- &cinode->flags);
- else
- clear_bit(
- CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
- &cinode->flags);
+ cfile->oplock_epoch = 0;
+ cfile->oplock_level = rsp->OplockLevel;
+
spin_unlock(&cfile->file_info_lock);
cifs_queue_oplock_break(cfile);
@@ -674,8 +656,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
}
}
spin_unlock(&cifs_tcp_ses_lock);
- cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
- return false;
+ cifs_dbg(FYI, "No file id matched, oplock break ignored\n");
+ return true;
}
void
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 951c444d83e7..ba56c00f2650 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -652,7 +652,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
return rc;
}
- len = sizeof(ea) + ea_name_len + ea_value_len + 1;
+ len = sizeof(*ea) + ea_name_len + ea_value_len + 1;
ea = kzalloc(len, GFP_KERNEL);
if (ea == NULL) {
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
@@ -885,6 +885,8 @@ smb2_copychunk_range(const unsigned int xid,
cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
/* Request server copy to target from src identified by key */
+ kfree(retbuf);
+ retbuf = NULL;
rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
true /* is_fsctl */, false /* use_ipc */,
@@ -1755,6 +1757,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
inode = d_inode(cfile->dentry);
cifsi = CIFS_I(inode);
+ /*
+ * We zero the range through ioctl, so we need remove the page caches
+ * first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
/* if file not oplocked can't be sure whether asking to extend size */
if (!CIFS_CACHE_READ(cifsi))
if (keep_size == false) {
@@ -1824,6 +1832,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
return rc;
}
+ /*
+ * We implement the punch hole through ioctl, so we need remove the page
+ * caches first, otherwise the data may be inconsistent with the server.
+ */
+ truncate_pagecache_range(inode, offset, offset + len - 1);
+
cifs_dbg(FYI, "offset %lld len %lld", offset, len);
fsctl_buf.FileOffset = cpu_to_le64(offset);
@@ -1923,22 +1937,38 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
static void
smb2_downgrade_oplock(struct TCP_Server_Info *server,
- struct cifsInodeInfo *cinode, bool set_level2)
+ struct cifsInodeInfo *cinode, __u32 oplock,
+ unsigned int epoch, bool *purge_cache)
{
- if (set_level2)
- server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II,
- 0, NULL);
- else
- server->ops->set_oplock_level(cinode, 0, 0, NULL);
+ server->ops->set_oplock_level(cinode, oplock, 0, NULL);
}
static void
-smb21_downgrade_oplock(struct TCP_Server_Info *server,
- struct cifsInodeInfo *cinode, bool set_level2)
+smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
+ unsigned int epoch, bool *purge_cache);
+
+static void
+smb3_downgrade_oplock(struct TCP_Server_Info *server,
+ struct cifsInodeInfo *cinode, __u32 oplock,
+ unsigned int epoch, bool *purge_cache)
{
- server->ops->set_oplock_level(cinode,
- set_level2 ? SMB2_LEASE_READ_CACHING_HE :
- 0, 0, NULL);
+ unsigned int old_state = cinode->oplock;
+ unsigned int old_epoch = cinode->epoch;
+ unsigned int new_state;
+
+ if (epoch > old_epoch) {
+ smb21_set_oplock_level(cinode, oplock, 0, NULL);
+ cinode->epoch = epoch;
+ }
+
+ new_state = cinode->oplock;
+ *purge_cache = false;
+
+ if ((old_state & CIFS_CACHE_READ_FLG) != 0 &&
+ (new_state & CIFS_CACHE_READ_FLG) == 0)
+ *purge_cache = true;
+ else if (old_state == new_state && (epoch - old_epoch > 1))
+ *purge_cache = true;
}
static void
@@ -2245,7 +2275,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
}
spin_unlock(&cifs_tcp_ses_lock);
- return 1;
+ return -EAGAIN;
}
/*
* Encrypt or decrypt @rqst message. @rqst has the following format:
@@ -2277,7 +2307,7 @@ crypt_message(struct TCP_Server_Info *server, struct smb_rqst *rqst, int enc)
if (rc) {
cifs_dbg(VFS, "%s: Could not get %scryption key\n", __func__,
enc ? "en" : "de");
- return 0;
+ return rc;
}
rc = smb3_crypto_aead_allocate(server);
@@ -2941,7 +2971,7 @@ struct smb_version_operations smb21_operations = {
.print_stats = smb2_print_stats,
.is_oplock_break = smb2_is_valid_oplock_break,
.handle_cancelled_mid = smb2_handle_cancelled_mid,
- .downgrade_oplock = smb21_downgrade_oplock,
+ .downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
.negotiate_wsize = smb2_negotiate_wsize,
@@ -3036,7 +3066,7 @@ struct smb_version_operations smb30_operations = {
.dump_share_caps = smb2_dump_share_caps,
.is_oplock_break = smb2_is_valid_oplock_break,
.handle_cancelled_mid = smb2_handle_cancelled_mid,
- .downgrade_oplock = smb21_downgrade_oplock,
+ .downgrade_oplock = smb3_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
.negotiate_wsize = smb2_negotiate_wsize,
@@ -3141,7 +3171,7 @@ struct smb_version_operations smb311_operations = {
.dump_share_caps = smb2_dump_share_caps,
.is_oplock_break = smb2_is_valid_oplock_break,
.handle_cancelled_mid = smb2_handle_cancelled_mid,
- .downgrade_oplock = smb21_downgrade_oplock,
+ .downgrade_oplock = smb3_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
.negotiate_wsize = smb2_negotiate_wsize,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 1c87a429ce72..77a9aeaf2cb1 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -942,6 +942,8 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
spnego_key = cifs_get_spnego_key(ses);
if (IS_ERR(spnego_key)) {
rc = PTR_ERR(spnego_key);
+ if (rc == -ENOKEY)
+ cifs_dbg(VFS, "Verify user has a krb5 ticket and keyutils is installed\n");
spnego_key = NULL;
goto out;
}
@@ -2516,10 +2518,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
* Related requests use info from previous read request
* in chain.
*/
- shdr->SessionId = 0xFFFFFFFF;
+ shdr->SessionId = 0xFFFFFFFFFFFFFFFF;
shdr->TreeId = 0xFFFFFFFF;
- req->PersistentFileId = 0xFFFFFFFF;
- req->VolatileFileId = 0xFFFFFFFF;
+ req->PersistentFileId = 0xFFFFFFFFFFFFFFFF;
+ req->VolatileFileId = 0xFFFFFFFFFFFFFFFF;
}
}
if (remaining_bytes > io_parms->length)
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index bad458a2b579..407425d31b2e 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -206,7 +206,7 @@ struct smb2_negotiate_req {
__le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
__le16 NegotiateContextCount; /* SMB3.1.1 only. MBZ earlier */
__le16 Reserved2;
- __le16 Dialects[1]; /* One dialect (vers=) at a time for now */
+ __le16 Dialects[4]; /* BB expand this if autonegotiate > 4 dialects */
} __packed;
/* Dialects */
@@ -1046,7 +1046,7 @@ struct smb2_oplock_break {
struct smb2_lease_break {
struct smb2_hdr hdr;
__le16 StructureSize; /* Must be 44 */
- __le16 Reserved;
+ __le16 Epoch;
__le32 Flags;
__u8 LeaseKey[16];
__le32 CurrentLeaseState;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index c2ef617d2f97..c875f246cb0e 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1537,6 +1537,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
spin_lock(&configfs_dirent_lock);
configfs_detach_rollback(dentry);
spin_unlock(&configfs_dirent_lock);
+ config_item_put(parent_item);
return -EINTR;
}
frag->frag_dead = true;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index bb0a427517e9..38eb80e29715 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -392,7 +392,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type
attr = to_attr(dentry);
if (!attr)
- goto out_put_item;
+ goto out_free_buffer;
if (type & CONFIGFS_ITEM_BIN_ATTR) {
buffer->bin_attr = to_bin_attr(dentry);
@@ -405,7 +405,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type
/* Grab the module reference for this attribute if we have one */
error = -ENODEV;
if (!try_module_get(buffer->owner))
- goto out_put_item;
+ goto out_free_buffer;
error = -EACCES;
if (!buffer->item->ci_type)
@@ -449,8 +449,6 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type
out_put_module:
module_put(buffer->owner);
-out_put_item:
- config_item_put(buffer->item);
out_free_buffer:
up_read(&frag->frag_sem);
kfree(buffer);
@@ -498,13 +496,13 @@ static int configfs_release_bin_file(struct inode *inode, struct file *file)
buffer->bin_buffer_size);
}
up_read(&frag->frag_sem);
- /* vfree on NULL is safe */
- vfree(buffer->bin_buffer);
- buffer->bin_buffer = NULL;
- buffer->bin_buffer_size = 0;
- buffer->needs_read_fill = 1;
}
+ vfree(buffer->bin_buffer);
+ buffer->bin_buffer = NULL;
+ buffer->bin_buffer_size = 0;
+ buffer->needs_read_fill = 1;
+
configfs_release(inode, file);
return 0;
}
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 8606da1df0aa..08b13ddba93e 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -304,13 +304,8 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
oname->name);
return 0;
}
- if (hash) {
- digested_name.hash = hash;
- digested_name.minor_hash = minor_hash;
- } else {
- digested_name.hash = 0;
- digested_name.minor_hash = 0;
- }
+ digested_name.hash = hash;
+ digested_name.minor_hash = minor_hash;
memcpy(digested_name.digest,
FSCRYPT_FNAME_DIGEST(iname->name, iname->len),
FSCRYPT_FNAME_DIGEST_SIZE);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index d13a154c8424..4cda0e960bc2 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -153,8 +153,7 @@ EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
* malicious offline violations of this constraint, while the link and rename
* checks are needed to prevent online violations of this constraint.
*
- * Return: 1 if permitted, 0 if forbidden. If forbidden, the caller must fail
- * the filesystem operation with EPERM.
+ * Return: 1 if permitted, 0 if forbidden.
*/
int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
{
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 30bf22c989de..8d9689aac5f4 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -857,6 +857,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
struct buffer_head *map_bh)
{
int ret = 0;
+ int boundary = sdio->boundary; /* dio_send_cur_page may clear it */
if (dio->op == REQ_OP_WRITE) {
/*
@@ -895,10 +896,10 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits;
out:
/*
- * If sdio->boundary then we want to schedule the IO now to
+ * If boundary then we want to schedule the IO now to
* avoid metadata seeks.
*/
- if (sdio->boundary) {
+ if (boundary) {
ret = dio_send_cur_page(dio, sdio, map_bh);
if (sdio->bio)
dio_bio_submit(dio, sdio);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 7211e826d90d..4fb070b7f00f 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -80,6 +80,9 @@ struct dlm_cluster {
unsigned int cl_new_rsb_count;
unsigned int cl_recover_callbacks;
char cl_cluster_name[DLM_LOCKSPACE_LEN];
+
+ struct dlm_spaces *sps;
+ struct dlm_comms *cms;
};
static struct dlm_cluster *config_item_to_cluster(struct config_item *i)
@@ -218,6 +221,7 @@ struct dlm_space {
struct list_head members;
struct mutex members_lock;
int members_count;
+ struct dlm_nodes *nds;
};
struct dlm_comms {
@@ -355,6 +359,9 @@ static struct config_group *make_cluster(struct config_group *g,
if (!cl || !sps || !cms)
goto fail;
+ cl->sps = sps;
+ cl->cms = cms;
+
config_group_init_type_name(&cl->group, name, &cluster_type);
config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type);
config_group_init_type_name(&cms->cs_group, "comms", &comms_type);
@@ -404,6 +411,9 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
static void release_cluster(struct config_item *i)
{
struct dlm_cluster *cl = config_item_to_cluster(i);
+
+ kfree(cl->sps);
+ kfree(cl->cms);
kfree(cl);
}
@@ -426,6 +436,7 @@ static struct config_group *make_space(struct config_group *g, const char *name)
INIT_LIST_HEAD(&sp->members);
mutex_init(&sp->members_lock);
sp->members_count = 0;
+ sp->nds = nds;
return &sp->group;
fail:
@@ -447,6 +458,7 @@ static void drop_space(struct config_group *g, struct config_item *i)
static void release_space(struct config_item *i)
{
struct dlm_space *sp = config_item_to_space(i);
+ kfree(sp->nds);
kfree(sp);
}
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index fa08448e35dd..bb87dad03cd4 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -544,6 +544,7 @@ static void *table_seq_next(struct seq_file *seq, void *iter_ptr, loff_t *pos)
if (bucket >= ls->ls_rsbtbl_size) {
kfree(ri);
+ ++*pos;
return NULL;
}
tree = toss ? &ls->ls_rsbtbl[bucket].toss : &ls->ls_rsbtbl[bucket].keep;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 748e8d59e611..cb287df13a7a 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -99,7 +99,6 @@ do { \
__LINE__, __FILE__, #x, jiffies); \
{do} \
printk("\n"); \
- BUG(); \
panic("DLM: Record message above and reboot.\n"); \
} \
}
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 9c8c9a09b4a6..7a6b0327ceb0 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -633,6 +633,9 @@ static int new_lockspace(const char *name, const char *cluster,
wait_event(ls->ls_recover_lock_wait,
test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags));
+ /* let kobject handle freeing of ls if there's an error */
+ do_unreg = 1;
+
ls->ls_kobj.kset = dlm_kset;
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
"%s", ls->ls_name);
@@ -640,9 +643,6 @@ static int new_lockspace(const char *name, const char *cluster,
goto out_recoverd;
kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
- /* let kobject handle freeing of ls if there's an error */
- do_unreg = 1;
-
/* This uevent triggers dlm_controld in userspace to add us to the
group of nodes that are members of this lockspace (managed by the
cluster infrastructure.) Once it's done that, it tells us who the
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 4813d0e0cd9b..af17fcd798c8 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -595,7 +595,7 @@ static void close_connection(struct connection *con, bool and_other,
}
if (con->othercon && and_other) {
/* Will only re-enter once. */
- close_connection(con->othercon, false, true, true);
+ close_connection(con->othercon, false, tx, rx);
}
if (con->rx_page) {
__free_page(con->rx_page);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index eed38ae86c6c..bd25ab837011 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -339,10 +339,8 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
struct extent_crypt_result ecr;
int rc = 0;
- if (!crypt_stat || !crypt_stat->tfm
- || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
- return -EINVAL;
-
+ BUG_ON(!crypt_stat || !crypt_stat->tfm
+ || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
crypt_stat->key_size);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 6b801186baa5..78d6451e88cd 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -506,6 +506,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out;
}
+ if (!dev_name) {
+ rc = -EINVAL;
+ err = "Device name cannot be null";
+ goto out;
+ }
+
rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
if (rc) {
err = "Error parsing options";
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 71fccccf317e..5decb3e06563 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/fs.h>
#include <linux/ctype.h>
+#include <linux/kmemleak.h>
#include <linux/slab.h>
#include <linux/uuid.h>
@@ -104,6 +105,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
var->var.VariableName[i] = '\0';
inode->i_private = var;
+ kmemleak_ignore(var);
err = efivar_entry_add(var, &efivarfs_list);
if (err)
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 5b68e4294faa..834615f13f3e 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -145,6 +145,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
+ /* replace invalid slashes like kobject_set_name_vargs does for /sys/firmware/efi/vars. */
+ strreplace(name, '/', '!');
+
inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
is_removable);
if (!inode)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c291bf61afb9..c9feb119aa47 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -223,8 +223,7 @@ struct eventpoll {
struct file *file;
/* used to optimize loop detection check */
- int visited;
- struct list_head visited_list_link;
+ u64 gen;
#ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
@@ -273,6 +272,8 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);
+static u64 loop_check_gen = 0;
+
/* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls;
@@ -288,9 +289,6 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-static LIST_HEAD(visited_list);
-
/*
* List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex.
@@ -1391,7 +1389,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
- const char *name;
+ struct name_snapshot n;
struct wakeup_source *ws;
if (!epi->ep->ws) {
@@ -1400,8 +1398,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
return -ENOMEM;
}
- name = epi->ffd.file->f_path.dentry->d_name.name;
- ws = wakeup_source_register(name);
+ take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+ ws = wakeup_source_register(n.name);
+ release_dentry_name_snapshot(&n);
if (!ws)
return -ENOMEM;
@@ -1461,6 +1460,22 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
RCU_INIT_POINTER(epi->ws, NULL);
}
+ /* Add the current item to the list of active epoll hook for this file */
+ spin_lock(&tfile->f_lock);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+ spin_unlock(&tfile->f_lock);
+
+ /*
+ * Add the current item to the RB tree. All RB tree operations are
+ * protected by "mtx", and ep_insert() is called with "mtx" held.
+ */
+ ep_rbtree_insert(ep, epi);
+
+ /* now check if we've created too many backpaths */
+ error = -EINVAL;
+ if (full_check && reverse_path_check())
+ goto error_remove_epi;
+
/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
@@ -1483,22 +1498,6 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
if (epi->nwait < 0)
goto error_unregister;
- /* Add the current item to the list of active epoll hook for this file */
- spin_lock(&tfile->f_lock);
- list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
- spin_unlock(&tfile->f_lock);
-
- /*
- * Add the current item to the RB tree. All RB tree operations are
- * protected by "mtx", and ep_insert() is called with "mtx" held.
- */
- ep_rbtree_insert(ep, epi);
-
- /* now check if we've created too many backpaths */
- error = -EINVAL;
- if (full_check && reverse_path_check())
- goto error_remove_epi;
-
/* We have to drop the new item inside our item list to keep track of it */
spin_lock_irqsave(&ep->lock, flags);
@@ -1527,6 +1526,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
return 0;
+error_unregister:
+ ep_unregister_pollwait(ep, epi);
error_remove_epi:
spin_lock(&tfile->f_lock);
list_del_rcu(&epi->fllink);
@@ -1534,9 +1535,6 @@ error_remove_epi:
rb_erase_cached(&epi->rbn, &ep->rbr);
-error_unregister:
- ep_unregister_pollwait(ep, epi);
-
/*
* We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist
@@ -1878,13 +1876,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1);
- ep->visited = 1;
- list_add(&ep->visited_list_link, &visited_list);
+ ep->gen = loop_check_gen;
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
- if (ep_tovisit->visited)
+ if (ep_tovisit->gen == loop_check_gen)
continue;
error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, epi->ffd.file,
@@ -1900,9 +1897,11 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
- if (list_empty(&epi->ffd.file->f_tfile_llink))
- list_add(&epi->ffd.file->f_tfile_llink,
- &tfile_check_list);
+ if (list_empty(&epi->ffd.file->f_tfile_llink)) {
+ if (get_file_rcu(epi->ffd.file))
+ list_add(&epi->ffd.file->f_tfile_llink,
+ &tfile_check_list);
+ }
}
}
mutex_unlock(&ep->mtx);
@@ -1923,18 +1922,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
- int ret;
- struct eventpoll *ep_cur, *ep_next;
-
- ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, file, ep, current);
- /* clear visited list */
- list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
- visited_list_link) {
- ep_cur->visited = 0;
- list_del(&ep_cur->visited_list_link);
- }
- return ret;
}
static void clear_tfile_check_list(void)
@@ -1946,6 +1935,7 @@ static void clear_tfile_check_list(void)
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
+ fput(file);
}
INIT_LIST_HEAD(&tfile_check_list);
}
@@ -2090,19 +2080,20 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_lock_nested(&ep->mtx, 0);
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
+ ep->gen == loop_check_gen ||
is_file_epoll(tf.file)) {
full_check = 1;
mutex_unlock(&ep->mtx);
mutex_lock(&epmutex);
if (is_file_epoll(tf.file)) {
error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
+ if (ep_loop_check(ep, tf.file) != 0)
goto error_tgt_fput;
- }
- } else
+ } else {
+ get_file(tf.file);
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
+ }
mutex_lock_nested(&ep->mtx, 0);
if (is_file_epoll(tf.file)) {
tep = tf.file->private_data;
@@ -2126,8 +2117,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = ep_insert(ep, &epds, tf.file, fd, full_check);
} else
error = -EEXIST;
- if (full_check)
- clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -2150,8 +2139,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (full_check)
+ if (full_check) {
+ clear_tfile_check_list();
+ loop_check_gen++;
mutex_unlock(&epmutex);
+ }
fdput(tf);
error_fput:
diff --git a/fs/exec.c b/fs/exec.c
index b38d7277b7de..cd84bb439577 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -981,7 +981,7 @@ int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size,
struct fd f = fdget(fd);
int ret = -EBADF;
- if (!f.file)
+ if (!f.file || !(f.file->f_mode & FMODE_READ))
goto out;
ret = kernel_read_file(f.file, buf, size, max_size, id);
@@ -1025,10 +1025,23 @@ static int exec_mmap(struct mm_struct *mm)
}
}
task_lock(tsk);
+
+ local_irq_disable();
active_mm = tsk->active_mm;
- tsk->mm = mm;
tsk->active_mm = mm;
+ tsk->mm = mm;
+ /*
+ * This prevents preemption while active_mm is being loaded and
+ * it and mm are being updated, which could cause problems for
+ * lazy tlb mm refcounting when these are updated by context
+ * switches. Not all architectures can handle irqs off over
+ * activate_mm yet.
+ */
+ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
activate_mm(active_mm, mm);
+ if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
+ local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
vmacache_flush(tsk);
task_unlock(tsk);
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e1b3724bebf2..ccd5a7016c19 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -48,10 +48,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
struct ext2_sb_info *sbi = EXT2_SB(sb);
if (block_group >= sbi->s_groups_count) {
- ext2_error (sb, "ext2_get_group_desc",
- "block_group >= groups_count - "
- "block_group = %d, groups_count = %lu",
- block_group, sbi->s_groups_count);
+ WARN(1, "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sbi->s_groups_count);
return NULL;
}
@@ -59,10 +58,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) {
- ext2_error (sb, "ext2_get_group_desc",
- "Group descriptor not loaded - "
- "block_group = %d, group_desc = %lu, desc = %lu",
- block_group, group_desc, offset);
+ WARN(1, "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+ block_group, group_desc, offset);
return NULL;
}
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index a1fc3dabca41..6d5fce776d8e 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -80,6 +80,7 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir)
if (dir)
le16_add_cpu(&desc->bg_used_dirs_count, -1);
spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));
+ percpu_counter_inc(&EXT2_SB(sb)->s_freeinodes_counter);
if (dir)
percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);
mark_buffer_dirty(bh);
@@ -531,7 +532,7 @@ got:
goto fail;
}
- percpu_counter_add(&sbi->s_freeinodes_counter, -1);
+ percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 13eb028607ca..080410433110 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -24,6 +24,7 @@ struct ext4_system_zone {
struct rb_node node;
ext4_fsblk_t start_blk;
unsigned int count;
+ u32 ino;
};
static struct kmem_cache *ext4_system_zone_cachep;
@@ -44,7 +45,8 @@ void ext4_exit_system_zone(void)
static inline int can_merge(struct ext4_system_zone *entry1,
struct ext4_system_zone *entry2)
{
- if ((entry1->start_blk + entry1->count) == entry2->start_blk)
+ if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
+ entry1->ino == entry2->ino)
return 1;
return 0;
}
@@ -56,9 +58,9 @@ static inline int can_merge(struct ext4_system_zone *entry1,
*/
static int add_system_zone(struct ext4_sb_info *sbi,
ext4_fsblk_t start_blk,
- unsigned int count)
+ unsigned int count, u32 ino)
{
- struct ext4_system_zone *new_entry = NULL, *entry;
+ struct ext4_system_zone *new_entry, *entry;
struct rb_node **n = &sbi->system_blks.rb_node, *node;
struct rb_node *parent = NULL, *new_node = NULL;
@@ -69,30 +71,21 @@ static int add_system_zone(struct ext4_sb_info *sbi,
n = &(*n)->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
- else {
- if (start_blk + count > (entry->start_blk +
- entry->count))
- entry->count = (start_blk + count -
- entry->start_blk);
- new_node = *n;
- new_entry = rb_entry(new_node, struct ext4_system_zone,
- node);
- break;
- }
+ else /* Unexpected overlap of system zones. */
+ return -EFSCORRUPTED;
}
- if (!new_entry) {
- new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
- GFP_KERNEL);
- if (!new_entry)
- return -ENOMEM;
- new_entry->start_blk = start_blk;
- new_entry->count = count;
- new_node = &new_entry->node;
+ new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
+ GFP_KERNEL);
+ if (!new_entry)
+ return -ENOMEM;
+ new_entry->start_blk = start_blk;
+ new_entry->count = count;
+ new_entry->ino = ino;
+ new_node = &new_entry->node;
- rb_link_node(new_node, parent, n);
- rb_insert_color(new_node, &sbi->system_blks);
- }
+ rb_link_node(new_node, parent, n);
+ rb_insert_color(new_node, &sbi->system_blks);
/* Can we merge to the left? */
node = rb_prev(new_node);
@@ -153,6 +146,7 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
return PTR_ERR(inode);
num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
while (i < num) {
+ cond_resched();
map.m_lblk = i;
map.m_len = num - i;
n = ext4_map_blocks(NULL, inode, &map, 0);
@@ -163,16 +157,16 @@ static int ext4_protect_reserved_inode(struct super_block *sb, u32 ino)
if (n == 0) {
i++;
} else {
- if (!ext4_data_block_valid(sbi, map.m_pblk, n)) {
- ext4_error(sb, "blocks %llu-%llu from inode %u "
+ err = add_system_zone(sbi, map.m_pblk, n, ino);
+ if (err < 0) {
+ if (err == -EFSCORRUPTED) {
+ ext4_error(sb,
+ "blocks %llu-%llu from inode %u "
"overlap system zone", map.m_pblk,
map.m_pblk + map.m_len - 1, ino);
- err = -EFSCORRUPTED;
+ }
break;
}
- err = add_system_zone(sbi, map.m_pblk, n);
- if (err < 0)
- break;
i += n;
}
}
@@ -201,16 +195,16 @@ int ext4_setup_system_zone(struct super_block *sb)
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
- ext4_bg_num_gdb(sb, i) + 1);
+ ext4_bg_num_gdb(sb, i) + 1, 0);
gdp = ext4_get_group_desc(sb, i, NULL);
- ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
+ ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1, 0);
if (ret)
return ret;
- ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
+ ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1, 0);
if (ret)
return ret;
ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
- sbi->s_itb_per_group);
+ sbi->s_itb_per_group, 0);
if (ret)
return ret;
}
@@ -243,10 +237,11 @@ void ext4_release_system_zone(struct super_block *sb)
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with filesystem metadata blocks.
*/
-int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
- unsigned int count)
+int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
+ unsigned int count)
{
struct ext4_system_zone *entry;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct rb_node *n = sbi->system_blks.rb_node;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
@@ -262,6 +257,8 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
+ if (entry->ino == inode->i_ino)
+ return 1;
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0;
}
@@ -284,8 +281,7 @@ int ext4_check_blockref(const char *function, unsigned int line,
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
if (blk &&
- unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- blk, 1))) {
+ unlikely(!ext4_inode_block_valid(inode, blk, 1))) {
es->s_last_error_block = cpu_to_le64(blk);
ext4_error_inode(inode, function, line, blk,
"invalid block");
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 90beca85c416..103bb9f1dc86 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -532,7 +532,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
struct dir_private_info *info = file->private_data;
struct inode *inode = file_inode(file);
struct fname *fname;
- int ret;
+ int ret = 0;
if (!info) {
info = ext4_htree_create_dir_info(file, ctx->pos);
@@ -580,7 +580,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
info->curr_minor_hash,
&info->next_hash);
if (ret < 0)
- return ret;
+ goto finished;
if (ret == 0) {
ctx->pos = ext4_get_htree_eof(file);
break;
@@ -611,7 +611,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
}
finished:
info->last_pos = ctx->pos;
- return 0;
+ return ret < 0 ? ret : 0;
}
static int ext4_dir_open(struct inode * inode, struct file * filp)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6c5b4301ee37..5e9ff118977e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2406,7 +2406,8 @@ void ext4_insert_dentry(struct inode *inode,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
- if (!ext4_has_feature_dir_index(inode->i_sb)) {
+ if (!ext4_has_feature_dir_index(inode->i_sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
/* ext4_iget() should have caught this... */
WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
@@ -3150,9 +3151,9 @@ extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
-extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
- ext4_fsblk_t start_blk,
- unsigned int count);
+extern int ext4_inode_block_valid(struct inode *inode,
+ ext4_fsblk_t start_blk,
+ unsigned int count);
extern int ext4_check_blockref(const char *, unsigned int,
struct inode *, __le32 *, unsigned int);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a284fb28944b..63291c265aa0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -169,10 +169,13 @@ struct ext4_ext_path {
(EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
#define EXT_LAST_INDEX(__hdr__) \
(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
-#define EXT_MAX_EXTENT(__hdr__) \
- (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_EXTENT(__hdr__) \
+ ((le16_to_cpu((__hdr__)->eh_max)) ? \
+ ((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \
+ : 0)
#define EXT_MAX_INDEX(__hdr__) \
- (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+ ((le16_to_cpu((__hdr__)->eh_max)) ? \
+ ((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) : 0)
static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4f9eb4b61549..652d16f90beb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -389,7 +389,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
*/
if (lblock + len <= lblock)
return 0;
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
+ return ext4_inode_block_valid(inode, block, len);
}
static int ext4_valid_extent_idx(struct inode *inode,
@@ -397,7 +397,7 @@ static int ext4_valid_extent_idx(struct inode *inode,
{
ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
- return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
+ return ext4_inode_block_valid(inode, block, 1);
}
static int ext4_valid_extent_entries(struct inode *inode,
@@ -554,14 +554,10 @@ __read_extent_tree_block(const char *function, unsigned int line,
}
if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
return bh;
- if (!ext4_has_feature_journal(inode->i_sb) ||
- (inode->i_ino !=
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) {
- err = __ext4_ext_check(function, line, inode,
- ext_block_hdr(bh), depth, pblk);
- if (err)
- goto errout;
- }
+ err = __ext4_ext_check(function, line, inode,
+ ext_block_hdr(bh), depth, pblk);
+ if (err)
+ goto errout;
set_buffer_verified(bh);
/*
* If this is a leaf block, cache all of its entries
@@ -874,6 +870,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
eh->eh_entries = 0;
eh->eh_magic = EXT4_EXT_MAGIC;
eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
+ eh->eh_generation = 0;
ext4_mark_inode_dirty(handle, inode);
return 0;
}
@@ -1130,6 +1127,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_depth = 0;
+ neh->eh_generation = 0;
/* move remainder of path[depth] to the new leaf */
if (unlikely(path[depth].p_hdr->eh_entries !=
@@ -1207,6 +1205,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
neh->eh_magic = EXT4_EXT_MAGIC;
neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
neh->eh_depth = cpu_to_le16(depth - i);
+ neh->eh_generation = 0;
fidx = EXT_FIRST_INDEX(neh);
fidx->ei_block = border;
ext4_idx_store_pblock(fidx, oldblock);
@@ -2917,7 +2916,7 @@ again:
* in use to avoid freeing it when removing blocks.
*/
if (sbi->s_cluster_ratio > 1) {
- pblk = ext4_ext_pblock(ex) + end - ee_block + 2;
+ pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
partial_cluster =
-(long long) EXT4_B2C(sbi, pblk);
}
@@ -3279,7 +3278,10 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_unwritten(ex2);
err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
- if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ if (err != -ENOSPC && err != -EDQUOT)
+ goto out;
+
+ if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1) {
err = ext4_ext_zeroout(inode, ex2);
@@ -3305,30 +3307,30 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_pblock(&orig_ex));
}
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_len = cpu_to_le16(ee_len);
- ext4_ext_try_to_merge(handle, inode, path, ex);
- err = ext4_ext_dirty(handle, inode, path + path->p_depth);
- if (err)
- goto fix_extent_len;
-
- /* update extent status tree */
- err = ext4_zeroout_es(inode, &zero_ex);
-
- goto out;
- } else if (err)
- goto fix_extent_len;
-
-out:
- ext4_ext_show_leaf(inode, path);
- return err;
+ if (!err) {
+ /* update the extent length and mark as initialized */
+ ex->ee_len = cpu_to_le16(ee_len);
+ ext4_ext_try_to_merge(handle, inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+ if (!err)
+ /* update extent status tree */
+ err = ext4_zeroout_es(inode, &zero_ex);
+ /* If we failed at this point, we don't know in which
+ * state the extent tree exactly is so don't try to fix
+ * length of the original extent as it may do even more
+ * damage.
+ */
+ goto out;
+ }
+ }
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
ext4_ext_dirty(handle, inode, path + path->p_depth);
return err;
+out:
+ ext4_ext_show_leaf(inode, path);
+ return err;
}
/*
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 763ef185dd17..e89be95a4c8c 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1081,11 +1081,9 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
- if (!nr_to_scan)
- return ret;
-
nr_shrunk = __es_shrink(sbi, nr_to_scan, NULL);
+ ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_shk_cnt);
trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
return nr_shrunk;
}
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 7ec340898598..1a4d42a1b161 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -121,6 +121,9 @@ static int ext4_getfsmap_helper(struct super_block *sb,
/* Are we just counting mappings? */
if (info->gfi_head->fmh_count == 0) {
+ if (info->gfi_head->fmh_entries == UINT_MAX)
+ return EXT4_QUERY_RANGE_ABORT;
+
if (rec_fsblk > info->gfi_next_fsblk)
info->gfi_head->fmh_entries++;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 5508baa11bb6..8a28d47bd502 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -44,30 +44,28 @@
*/
static int ext4_sync_parent(struct inode *inode)
{
- struct dentry *dentry = NULL;
- struct inode *next;
+ struct dentry *dentry, *next;
int ret = 0;
if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
return 0;
- inode = igrab(inode);
+ dentry = d_find_any_alias(inode);
+ if (!dentry)
+ return 0;
while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
- dentry = d_find_any_alias(inode);
- if (!dentry)
- break;
- next = igrab(d_inode(dentry->d_parent));
+
+ next = dget_parent(dentry);
dput(dentry);
- if (!next)
- break;
- iput(inode);
- inode = next;
+ dentry = next;
+ inode = dentry->d_inode;
+
/*
* The directory inode may have gone through rmdir by now. But
* the inode itself and its blocks are still allocated (we hold
- * a reference to the inode so it didn't go through
- * ext4_evict_inode()) and so we are safe to flush metadata
- * blocks and the inode.
+ * a reference to the inode via its dentry), so it didn't go
+ * through ext4_evict_inode()) and so we are safe to flush
+ * metadata blocks and the inode.
*/
ret = sync_mapping_buffers(inode->i_mapping);
if (ret)
@@ -76,7 +74,7 @@ static int ext4_sync_parent(struct inode *inode)
if (ret)
break;
}
- iput(inode);
+ dput(dentry);
return ret;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 85c2a7ea5ea2..e6520627a84a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -407,7 +407,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
*
* We always try to spread first-level directories.
*
- * If there are blockgroups with both free inodes and free blocks counts
+ * If there are blockgroups with both free inodes and free clusters counts
* not worse than average we return one with smallest directory count.
* Otherwise we simply return a random group.
*
@@ -416,7 +416,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
* It's OK to put directory into a group unless
* it has too many directories already (max_dirs) or
* it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
+ * it has too few free clusters left (min_clusters) or
* Parent's group is preferred, if it doesn't satisfy these
* conditions we search cyclically through the rest. If none
* of the groups look good we just look for a group with more
@@ -432,7 +432,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t real_ngroups = ext4_get_groups_count(sb);
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei, grp_free;
- ext4_fsblk_t freeb, avefreec;
+ ext4_fsblk_t freec, avefreec;
unsigned int ndirs;
int max_dirs, min_inodes;
ext4_grpblk_t min_clusters;
@@ -451,9 +451,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
avefreei = freei / ngroups;
- freeb = EXT4_C2B(sbi,
- percpu_counter_read_positive(&sbi->s_freeclusters_counter));
- avefreec = freeb;
+ freec = percpu_counter_read_positive(&sbi->s_freeclusters_counter);
+ avefreec = freec;
do_div(avefreec, ngroups);
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
@@ -1367,6 +1366,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
handle_t *handle;
ext4_fsblk_t blk;
int num, ret = 0, used_blks = 0;
+ unsigned long used_inos = 0;
/* This should not happen, but just to be sure check this */
if (sb_rdonly(sb)) {
@@ -1397,22 +1397,37 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
* used inodes so we need to skip blocks with used inodes in
* inode table.
*/
- if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
- used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp)),
- sbi->s_inodes_per_block);
-
- if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group) ||
- ((group == 0) && ((EXT4_INODES_PER_GROUP(sb) -
- ext4_itable_unused_count(sb, gdp)) <
- EXT4_FIRST_INO(sb)))) {
- ext4_error(sb, "Something is wrong with group %u: "
- "used itable blocks: %d; "
- "itable unused count: %u",
- group, used_blks,
- ext4_itable_unused_count(sb, gdp));
- ret = 1;
- goto err_out;
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
+ used_inos = EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp);
+ used_blks = DIV_ROUND_UP(used_inos, sbi->s_inodes_per_block);
+
+ /* Bogus inode unused count? */
+ if (used_blks < 0 || used_blks > sbi->s_itb_per_group) {
+ ext4_error(sb, "Something is wrong with group %u: "
+ "used itable blocks: %d; "
+ "itable unused count: %u",
+ group, used_blks,
+ ext4_itable_unused_count(sb, gdp));
+ ret = 1;
+ goto err_out;
+ }
+
+ used_inos += group * EXT4_INODES_PER_GROUP(sb);
+ /*
+ * Are there some uninitialized inodes in the inode table
+ * before the first normal inode?
+ */
+ if ((used_blks != sbi->s_itb_per_group) &&
+ (used_inos < EXT4_FIRST_INO(sb))) {
+ ext4_error(sb, "Something is wrong with group %u: "
+ "itable unused count: %u; "
+ "itables initialized count: %ld",
+ group, ext4_itable_unused_count(sb, gdp),
+ used_inos);
+ ret = 1;
+ goto err_out;
+ }
}
blk = ext4_inode_table(sb, gdp) + used_blks;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index e1801b288847..a5442528a60d 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -842,8 +842,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
- count)) {
+ if (!ext4_inode_block_valid(inode, block_to_free, count)) {
EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"blocks %llu len %lu",
(unsigned long long) block_to_free, count);
@@ -1005,8 +1004,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr)
continue; /* A hole */
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
- nr, 1)) {
+ if (!ext4_inode_block_valid(inode, nr, 1)) {
EXT4_ERROR_INODE(inode,
"invalid indirect mapped "
"block %lu (level %d)",
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 6064bcb8572b..034b5d0a0504 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -756,6 +756,12 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
ext4_write_lock_xattr(inode, &no_expand);
BUG_ON(!ext4_has_inline_data(inode));
+ /*
+ * ei->i_inline_off may have changed since ext4_write_begin()
+ * called ext4_try_to_write_inline_data()
+ */
+ (void) ext4_find_inline_data_nolock(inode);
+
kaddr = kmap_atomic(page);
ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
kunmap_atomic(kaddr);
@@ -1895,6 +1901,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
ext4_write_lock_xattr(inode, &no_expand);
if (!ext4_has_inline_data(inode)) {
+ ext4_write_unlock_xattr(inode, &no_expand);
*has_inline = 0;
ext4_journal_stop(handle);
return 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5f03a4fabeaa..9c07c8674b21 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -202,6 +202,7 @@ void ext4_evict_inode(struct inode *inode)
*/
int extra_credits = 6;
struct ext4_xattr_inode_array *ea_inode_array = NULL;
+ bool freeze_protected = false;
trace_ext4_evict_inode(inode);
@@ -249,9 +250,14 @@ void ext4_evict_inode(struct inode *inode)
/*
* Protect us against freezing - iput() caller didn't have to have any
- * protection against it
+ * protection against it. When we are in a running transaction though,
+ * we are already protected against freezing and we cannot grab further
+ * protection due to lock ordering constraints.
*/
- sb_start_intwrite(inode->i_sb);
+ if (!ext4_journal_current_handle()) {
+ sb_start_intwrite(inode->i_sb);
+ freeze_protected = true;
+ }
if (!IS_NOQUOTA(inode))
extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
@@ -270,7 +276,8 @@ void ext4_evict_inode(struct inode *inode)
* cleaned up.
*/
ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
goto no_delete;
}
@@ -311,7 +318,8 @@ void ext4_evict_inode(struct inode *inode)
stop_handle:
ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
goto no_delete;
}
@@ -340,7 +348,8 @@ stop_handle:
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
- sb_end_intwrite(inode->i_sb);
+ if (freeze_protected)
+ sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array);
return;
no_delete:
@@ -411,8 +420,7 @@ static int __check_block_validity(struct inode *inode, const char *func,
(inode->i_ino ==
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
return 0;
- if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
- map->m_len)) {
+ if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
"(length %d)", (unsigned long) map->m_lblk,
@@ -2063,13 +2071,13 @@ static int __ext4_journalled_writepage(struct page *page,
if (!ret)
ret = err;
- if (!ext4_has_inline_data(inode))
- ext4_walk_page_buffers(NULL, page_bufs, 0, len,
- NULL, bput_one);
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out:
unlock_page(page);
out_no_pagelock:
+ if (!inline_data && page_bufs)
+ ext4_walk_page_buffers(NULL, page_bufs, 0, len,
+ NULL, bput_one);
brelse(inode_bh);
return ret;
}
@@ -3794,6 +3802,11 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = mapping->host;
size_t count = iov_iter_count(iter);
ssize_t ret;
+ loff_t offset = iocb->ki_pos;
+ loff_t size = i_size_read(inode);
+
+ if (offset >= size)
+ return 0;
/*
* Shared inode_lock is enough for us - it protects against concurrent
@@ -4926,7 +4939,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = 0;
if (ei->i_file_acl &&
- !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
+ !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) {
ext4_error_inode(inode, function, line, 0,
"iget: bad extended attribute block %llu",
ei->i_file_acl);
@@ -5050,16 +5063,16 @@ static int other_inode_match(struct inode * inode, unsigned long ino,
if ((inode->i_ino != ino) ||
(inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
- I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+ I_DIRTY_INODE)) ||
((inode->i_state & I_DIRTY_TIME) == 0))
return 0;
spin_lock(&inode->i_lock);
if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
- I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
+ I_DIRTY_INODE)) == 0) &&
(inode->i_state & I_DIRTY_TIME)) {
struct ext4_inode_info *ei = EXT4_I(inode);
- inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
+ inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
spin_lock(&ei->i_raw_lock);
@@ -5117,7 +5130,7 @@ static int ext4_do_update_inode(handle_t *handle,
struct ext4_inode_info *ei = EXT4_I(inode);
struct buffer_head *bh = iloc->bh;
struct super_block *sb = inode->i_sb;
- int err = 0, rc, block;
+ int err = 0, block;
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
@@ -5130,6 +5143,12 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+ err = ext4_inode_blocks_set(handle, raw_inode, ei);
+ if (err) {
+ spin_unlock(&ei->i_raw_lock);
+ goto out_brelse;
+ }
+
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
@@ -5163,18 +5182,13 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
- err = ext4_inode_blocks_set(handle, raw_inode, ei);
- if (err) {
- spin_unlock(&ei->i_raw_lock);
- goto out_brelse;
- }
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
- if (ei->i_disksize != ext4_isize(inode->i_sb, raw_inode)) {
+ if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) {
ext4_isize_set(raw_inode, ei->i_disksize);
need_datasync = 1;
}
@@ -5226,9 +5240,9 @@ static int ext4_do_update_inode(handle_t *handle,
bh->b_data);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
- rc = ext4_handle_dirty_metadata(handle, NULL, bh);
- if (!err)
- err = rc;
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_brelse;
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
if (set_large_file) {
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9dbb5542167a..6718c7ccd631 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1032,7 +1032,10 @@ resizefs_out:
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
if (err)
goto pwsalt_err_journal;
+ lock_buffer(sbi->s_sbh);
generate_random_uuid(sbi->s_es->s_encrypt_pw_salt);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);
err = ext4_handle_dirty_metadata(handle, NULL,
sbi->s_sbh);
pwsalt_err_journal:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d7cedfaa1cc0..c40d3c44a1d6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1558,10 +1558,11 @@ static int mb_find_extent(struct ext4_buddy *e4b, int block,
if (ex->fe_start + ex->fe_len > EXT4_CLUSTERS_PER_GROUP(e4b->bd_sb)) {
/* Should never happen! (but apparently sometimes does?!?) */
WARN_ON(1);
- ext4_error(e4b->bd_sb, "corruption or bug in mb_find_extent "
- "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
- block, order, needed, ex->fe_group, ex->fe_start,
- ex->fe_len, ex->fe_logical);
+ ext4_grp_locked_error(e4b->bd_sb, e4b->bd_group, 0, 0,
+ "corruption or bug in mb_find_extent "
+ "block=%d, order=%d needed=%d ex=%u/%d/%d@%u",
+ block, order, needed, ex->fe_group, ex->fe_start,
+ ex->fe_len, ex->fe_logical);
ex->fe_len = 0;
ex->fe_start = 0;
ex->fe_group = 0;
@@ -3017,7 +3018,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (!ext4_data_block_valid(sbi, block, len)) {
+ if (!ext4_inode_block_valid(ac->ac_inode, block, len)) {
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"fs metadata", block, block+len);
/* File system mounted not to panic on error
@@ -4718,6 +4719,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_group_first_block_no(sb, group) +
EXT4_C2B(sbi, cluster),
"Block already on to-be-freed list");
+ kmem_cache_free(ext4_free_data_cachep, new_entry);
return 0;
}
}
@@ -4782,7 +4784,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb);
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
- !ext4_data_block_valid(sbi, block, count)) {
+ !ext4_inode_block_valid(inode, block, count)) {
ext4_error(sb, "Freeing blocks not in datazone - "
"block = %llu, count = %lu", block, count);
goto error_return;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 161099f39ab9..9a138a6dc17e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1308,8 +1308,8 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
ext4_match(fname, de)) {
/* found a match - just to be sure, do
* a full check */
- if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
- bh->b_size, offset))
+ if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
+ buf_size, offset))
return -1;
*res_dir = de;
return 1;
@@ -1741,7 +1741,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
blocksize, hinfo, map);
map -= count;
dx_sort_map(map, count);
- /* Split the existing block in the middle, size-wise */
+ /* Ensure that neither split block is over half full */
size = 0;
move = 0;
for (i = count-1; i >= 0; i--) {
@@ -1751,8 +1751,18 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
size += map[i].size;
move++;
}
- /* map index at which we will split */
- split = count - move;
+ /*
+ * map index at which we will split
+ *
+ * If the sum of active entries didn't exceed half the block size, just
+ * split it in half by count; each resulting block will have at least
+ * half the space free.
+ */
+ if (i > 0)
+ split = count - move;
+ else
+ split = count/2;
+
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
@@ -2283,11 +2293,10 @@ again:
(frame - 1)->bh);
if (err)
goto journal_error;
- if (restart) {
- err = ext4_handle_dirty_dx_node(handle, dir,
- frame->bh);
+ err = ext4_handle_dirty_dx_node(handle, dir,
+ frame->bh);
+ if (restart || err)
goto journal_error;
- }
} else {
struct dx_root *dxroot;
memcpy((char *) entries2, (char *) entries,
@@ -2353,7 +2362,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
- bh->b_data, bh->b_size, i))
+ entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
@@ -3270,7 +3279,7 @@ static int ext4_link(struct dentry *old_dentry,
return -EMLINK;
if (ext4_encrypted_inode(dir) &&
!fscrypt_has_permitted_context(dir, inode))
- return -EPERM;
+ return -EXDEV;
if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
(!projid_eq(EXT4_I(dir)->i_projid,
@@ -3432,12 +3441,35 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
return retval;
}
}
- brelse(ent->bh);
- ent->bh = NULL;
return 0;
}
+static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
+ unsigned ino, unsigned file_type)
+{
+ struct ext4_renament old = *ent;
+ int retval = 0;
+
+ /*
+ * old->de could have moved from under us during make indexed dir,
+ * so the old->de may no longer valid and need to find it again
+ * before reset old inode info.
+ */
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ if (IS_ERR(old.bh))
+ retval = PTR_ERR(old.bh);
+ if (!old.bh)
+ retval = -ENOENT;
+ if (retval) {
+ ext4_std_error(old.dir->i_sb, retval);
+ return;
+ }
+
+ ext4_setent(handle, &old, ino, file_type);
+ brelse(old.bh);
+}
+
static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
const struct qstr *d_name)
{
@@ -3603,13 +3635,13 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
retval = -ENOENT;
if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
- goto end_rename;
+ goto release_bh;
if ((old.dir != new.dir) &&
ext4_encrypted_inode(new.dir) &&
!fscrypt_has_permitted_context(new.dir, old.inode)) {
- retval = -EPERM;
- goto end_rename;
+ retval = -EXDEV;
+ goto release_bh;
}
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
@@ -3617,7 +3649,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (IS_ERR(new.bh)) {
retval = PTR_ERR(new.bh);
new.bh = NULL;
- goto end_rename;
+ goto release_bh;
}
if (new.bh) {
if (!new.inode) {
@@ -3634,18 +3666,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
- handle = NULL;
- goto end_rename;
+ goto release_bh;
}
} else {
whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
if (IS_ERR(whiteout)) {
retval = PTR_ERR(whiteout);
- whiteout = NULL;
- goto end_rename;
+ goto release_bh;
}
}
+ old_file_type = old.de->file_type;
if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
ext4_handle_sync(handle);
@@ -3673,7 +3704,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
force_reread = (new.dir->i_ino == old.dir->i_ino &&
ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
- old_file_type = old.de->file_type;
if (whiteout) {
/*
* Do this before adding a new entry, so the old entry is sure
@@ -3745,17 +3775,23 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = 0;
end_rename:
- brelse(old.dir_bh);
- brelse(old.bh);
- brelse(new.bh);
if (whiteout) {
- if (retval)
+ if (retval) {
+ ext4_resetent(handle, &old,
+ old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
+ ext4_orphan_add(handle, whiteout);
+ }
unlock_new_inode(whiteout);
+ ext4_journal_stop(handle);
iput(whiteout);
- }
- if (handle)
+ } else {
ext4_journal_stop(handle);
+ }
+release_bh:
+ brelse(old.dir_bh);
+ brelse(old.bh);
+ brelse(new.bh);
return retval;
}
@@ -3788,7 +3824,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, old.inode) ||
!fscrypt_has_permitted_context(old_dir, new.inode)))
- return -EPERM;
+ return -EXDEV;
if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
!projid_eq(EXT4_I(new_dir)->i_projid,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 19af346a6651..454fd21a00b4 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -837,8 +837,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
BUFFER_TRACE(dind, "get_write_access");
err = ext4_journal_get_write_access(handle, dind);
- if (unlikely(err))
+ if (unlikely(err)) {
ext4_std_error(sb, err);
+ goto errout;
+ }
/* ext4_reserve_inode_write() gets a reference on the iloc */
err = ext4_reserve_inode_write(handle, inode, &iloc);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ffc985d78137..105334ebc102 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -63,10 +63,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
+static int ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
- struct ext4_super_block *es);
+static int ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -1679,8 +1679,8 @@ static const struct mount_opts {
{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
MOPT_CLEAR | MOPT_Q},
- {Opt_usrjquota, 0, MOPT_Q},
- {Opt_grpjquota, 0, MOPT_Q},
+ {Opt_usrjquota, 0, MOPT_Q | MOPT_STRING},
+ {Opt_grpjquota, 0, MOPT_Q | MOPT_STRING},
{Opt_offusrjquota, 0, MOPT_Q},
{Opt_offgrpjquota, 0, MOPT_Q},
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
@@ -2614,8 +2614,15 @@ static void ext4_orphan_cleanup(struct super_block *sb,
inode_lock(inode);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ret = ext4_truncate(inode);
- if (ret)
+ if (ret) {
+ /*
+ * We need to clean up the in-core orphan list
+ * manually if ext4_truncate() failed to get a
+ * transaction handle.
+ */
+ ext4_orphan_del(NULL, inode);
ext4_std_error(inode->i_sb, ret);
+ }
inode_unlock(inode);
nr_truncates++;
} else {
@@ -2945,8 +2952,8 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
struct ext4_group_desc *gdp = NULL;
ext4_group_t group, ngroups;
struct super_block *sb;
- unsigned long timeout = 0;
int ret = 0;
+ u64 start_time;
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
@@ -2966,13 +2973,12 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
ret = 1;
if (!ret) {
- timeout = jiffies;
+ start_time = ktime_get_real_ns();
ret = ext4_init_inode_table(sb, group,
elr->lr_timeout ? 0 : 1);
if (elr->lr_timeout == 0) {
- timeout = (jiffies - timeout) *
- elr->lr_sbi->s_li_wait_mult;
- elr->lr_timeout = timeout;
+ elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
+ elr->lr_sbi->s_li_wait_mult);
}
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
@@ -4458,7 +4464,9 @@ no_journal:
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
- ext4_mark_recovery_complete(sb, es);
+ err = ext4_mark_recovery_complete(sb, es);
+ if (err)
+ goto failed_mount8;
}
if (EXT4_SB(sb)->s_journal) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -4501,10 +4509,9 @@ cantfind_ext4:
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
goto failed_mount;
-#ifdef CONFIG_QUOTA
failed_mount8:
ext4_unregister_sysfs(sb);
-#endif
+ kobject_put(&sbi->s_kobj);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
@@ -4640,7 +4647,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
struct inode *journal_inode;
journal_t *journal;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return NULL;
journal_inode = ext4_get_journal_inode(sb, journal_inum);
if (!journal_inode)
@@ -4670,7 +4678,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
struct ext4_super_block *es;
struct block_device *bdev;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return NULL;
bdev = ext4_blkdev_get(j_dev, sb);
if (bdev == NULL)
@@ -4762,7 +4771,8 @@ static int ext4_load_journal(struct super_block *sb,
int err = 0;
int really_read_only;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return -EFSCORRUPTED;
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -4831,7 +4841,12 @@ static int ext4_load_journal(struct super_block *sb,
}
EXT4_SB(sb)->s_journal = journal;
- ext4_clear_journal_err(sb, es);
+ err = ext4_clear_journal_err(sb, es);
+ if (err) {
+ EXT4_SB(sb)->s_journal = NULL;
+ jbd2_journal_destroy(journal);
+ return err;
+ }
if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -4850,15 +4865,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
- if (!sbh || block_device_ejected(sb))
- return error;
-
- /*
- * The superblock bh should be mapped, but it might not be if the
- * device was hot-removed. Not much we can do but fail the I/O.
- */
- if (!buffer_mapped(sbh))
- return error;
+ if (!sbh)
+ return -EINVAL;
+ if (block_device_ejected(sb))
+ return -ENODEV;
/*
* If the file system is mounted read-only, don't update the
@@ -4930,26 +4940,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* remounting) the filesystem readonly, then we will end up with a
* consistent fs on disk. Record that fact.
*/
-static void ext4_mark_recovery_complete(struct super_block *sb,
- struct ext4_super_block *es)
+static int ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es)
{
+ int err;
journal_t *journal = EXT4_SB(sb)->s_journal;
if (!ext4_has_feature_journal(sb)) {
- BUG_ON(journal != NULL);
- return;
+ if (journal != NULL) {
+ ext4_error(sb, "Journal got removed while the fs was "
+ "mounted!");
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
jbd2_journal_lock_updates(journal);
- if (jbd2_journal_flush(journal) < 0)
+ err = jbd2_journal_flush(journal);
+ if (err < 0)
goto out;
if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
ext4_clear_feature_journal_needs_recovery(sb);
ext4_commit_super(sb, 1);
}
-
out:
jbd2_journal_unlock_updates(journal);
+ return err;
}
/*
@@ -4957,14 +4973,17 @@ out:
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext4_clear_journal_err(struct super_block *sb,
+static int ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es)
{
journal_t *journal;
int j_errno;
const char *errstr;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (!ext4_has_feature_journal(sb)) {
+ ext4_error(sb, "Journal got removed while the fs was mounted!");
+ return -EFSCORRUPTED;
+ }
journal = EXT4_SB(sb)->s_journal;
@@ -4989,6 +5008,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);
}
+ return 0;
}
/*
@@ -5268,8 +5288,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
(sbi->s_mount_state & EXT4_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
- if (sbi->s_journal)
+ if (sbi->s_journal) {
+ /*
+ * We let remount-ro finish even if marking fs
+ * as clean failed...
+ */
ext4_mark_recovery_complete(sb, es);
+ }
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
} else {
@@ -5317,8 +5342,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- if (sbi->s_journal)
- ext4_clear_journal_err(sb, es);
+ if (sbi->s_journal) {
+ err = ext4_clear_journal_err(sb, es);
+ if (err)
+ goto restore_opts;
+ }
sbi->s_mount_state = le16_to_cpu(es->s_state);
if (!ext4_setup_super(sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
@@ -5344,7 +5372,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_register_li_request(sb, first_not_zeroed);
}
- ext4_setup_system_zone(sb);
+ err = ext4_setup_system_zone(sb);
+ if (err)
+ goto restore_opts;
+
if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
@@ -5607,6 +5638,11 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb)
return -EXDEV;
+
+ /* Quota already enabled for this file? */
+ if (IS_NOQUOTA(d_inode(path->dentry)))
+ return -EBUSY;
+
/* Journaling quota? */
if (EXT4_SB(sb)->s_qf_names[type]) {
/* Quotafile not in fs root? */
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b0873b89dc87..a97dcfd5a566 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1479,6 +1479,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
if (!ce)
return NULL;
+ WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) &&
+ !(current->flags & PF_MEMALLOC_NOFS));
+
ea_data = ext4_kvmalloc(value_len, GFP_NOFS);
if (!ea_data) {
mb_cache_entry_put(ea_inode_cache, ce);
@@ -1823,8 +1826,11 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
if (EXT4_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
bs->bh = ext4_sb_bread(sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
- if (IS_ERR(bs->bh))
- return PTR_ERR(bs->bh);
+ if (IS_ERR(bs->bh)) {
+ error = PTR_ERR(bs->bh);
+ bs->bh = NULL;
+ return error;
+ }
ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
atomic_read(&(bs->bh->b_count)),
le32_to_cpu(BHDR(bs->bh)->h_refcount));
@@ -2342,6 +2348,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
error = -ENOSPC;
goto cleanup;
}
+ WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
}
error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -2415,7 +2422,7 @@ retry_inode:
* external inode if possible.
*/
if (ext4_has_feature_ea_inode(inode->i_sb) &&
- !i.in_inode) {
+ i.value_len && !i.in_inode) {
i.in_inode = 1;
goto retry_inode;
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 170423ff2721..0f2286e57907 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -202,6 +202,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
+ if (unlikely(blkno >= TOTAL_SEGS(sbi)))
+ goto out;
/* get sit block addr */
fio.new_blkaddr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
@@ -942,8 +944,12 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return -EIO;
+ }
spin_lock(&sbi->inode_lock[type]);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4abefd841b6c..ff519f7a8784 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -817,6 +817,17 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
+ /* check memory boundary before moving forward */
+ bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+ if (unlikely(bit_pos > d->max ||
+ le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
+ f2fs_msg(F2FS_I_SB(d->inode)->sb, KERN_WARNING,
+ "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
+ set_sbi_flag(F2FS_I_SB(d->inode)->sb->s_fs_info, SBI_NEED_FSCK);
+ return -EINVAL;
+ }
+
if (f2fs_encrypted_inode(d->inode)) {
int save_len = fstr->len;
int err;
@@ -835,7 +846,6 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
le32_to_cpu(de->ino), d_type))
return 1;
- bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
return 0;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index d98acc20a38a..90cc46e6421a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -729,7 +729,8 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
+ if (!in_group_p(inode->i_gid) &&
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 8906f6381b1a..74bc861bab39 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -203,7 +203,8 @@ out:
f2fs_put_page(page, 1);
- f2fs_balance_fs(sbi, dn.node_changed);
+ if (!err)
+ f2fs_balance_fs(sbi, dn.node_changed);
return err;
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index b13383948fca..9fb98fce7096 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -222,7 +222,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (f2fs_encrypted_inode(dir) &&
!fscrypt_has_permitted_context(dir, inode))
- return -EPERM;
+ return -EXDEV;
if (is_inode_flag_set(dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(dir)->i_projid,
@@ -746,7 +746,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_permitted_context(new_dir, old_inode)) {
- err = -EPERM;
+ err = -EXDEV;
goto out;
}
@@ -942,7 +942,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, old_inode) ||
!fscrypt_has_permitted_context(old_dir, new_inode)))
- return -EPERM;
+ return -EXDEV;
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index e7b8e2b35e22..f8006f62c546 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2011,6 +2011,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
+ if (unlikely(nid % NAT_ENTRY_PER_BLOCK))
+ nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK;
+
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
return;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 2cd0d126ef8f..4307f8db6c8f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3337,14 +3337,17 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
} else {
memcpy(se->discard_map, se->cur_valid_map,
SIT_VBLOCK_MAP_SIZE);
- sbi->discard_blks += old_valid_blocks -
- se->valid_blocks;
+ sbi->discard_blks += old_valid_blocks;
+ sbi->discard_blks -= se->valid_blocks;
}
}
- if (sbi->segs_per_sec > 1)
+ if (sbi->segs_per_sec > 1) {
get_sec_entry(sbi, start)->valid_blocks +=
- se->valid_blocks - old_valid_blocks;
+ se->valid_blocks;
+ get_sec_entry(sbi, start)->valid_blocks -=
+ old_valid_blocks;
+ }
}
up_read(&curseg->journal_rwsem);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0d46e936d54e..00c415131b06 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -91,11 +91,11 @@
#define BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
#define GET_SEC_FROM_SEG(sbi, segno) \
- ((segno) / (sbi)->segs_per_sec)
+ (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
((secno) * (sbi)->segs_per_sec)
#define GET_ZONE_FROM_SEC(sbi, secno) \
- ((secno) / (sbi)->secs_per_zone)
+ (((secno) == -1) ? -1: (secno) / (sbi)->secs_per_zone)
#define GET_ZONE_FROM_SEG(sbi, segno) \
GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2d021a33914a..b2c747f53c0c 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -782,6 +782,9 @@ static void f2fs_put_super(struct super_block *sb)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int i;
+ /* unregister procfs/sysfs entries in advance to avoid race case */
+ f2fs_unregister_sysfs(sbi);
+
f2fs_quota_off_umount(sb);
/* prevent remaining shrinker jobs */
@@ -834,8 +837,6 @@ static void f2fs_put_super(struct super_block *sb)
kfree(sbi->ckpt);
- f2fs_unregister_sysfs(sbi);
-
sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
@@ -918,7 +919,8 @@ static int f2fs_statfs_project(struct super_block *sb,
limit >>= sb->s_blocksize_bits;
if (limit && buf->f_blocks > limit) {
- curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ curblock = (dquot->dq_dqb.dqb_curspace +
+ dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
buf->f_blocks = limit;
buf->f_bfree = buf->f_bavail =
(buf->f_blocks > curblock) ?
@@ -2894,4 +2896,5 @@ module_exit(exit_f2fs_fs)
MODULE_AUTHOR("Samsung Electronics's Praesto Team");
MODULE_DESCRIPTION("Flash Friendly File System");
MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32");
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index a55919eec035..6a13099b3c82 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -563,4 +563,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
}
kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
}
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 89c975126d4e..b3c64ab0d5a5 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -636,8 +636,15 @@ static int __f2fs_setxattr(struct inode *inode, int index,
}
last = here;
- while (!IS_XATTR_LAST_ENTRY(last))
+ while (!IS_XATTR_LAST_ENTRY(last)) {
+ if ((void *)(last) + sizeof(__u32) > last_base_addr ||
+ (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
+ goto exit;
+ }
last = XATTR_NEXT_ENTRY(last);
+ }
newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1df023c4c2cc..c41393e30a04 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1512,6 +1512,12 @@ static int fat_read_bpb(struct super_block *sb, struct fat_boot_sector *b,
goto out;
}
+ if (bpb->fat_fat_length == 0 && bpb->fat32_length == 0) {
+ if (!silent)
+ fat_msg(sb, KERN_ERR, "bogus number of FAT sectors");
+ goto out;
+ }
+
error = 0;
out:
diff --git a/fs/file.c b/fs/file.c
index 0c25b980affe..f1943da6303b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -75,7 +75,7 @@ static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
*/
static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
{
- unsigned int cpy, set;
+ size_t cpy, set;
BUG_ON(nfdt->max_fds < ofdt->max_fds);
@@ -679,7 +679,7 @@ void do_close_on_exec(struct files_struct *files)
spin_unlock(&files->file_lock);
}
-static struct file *__fget(unsigned int fd, fmode_t mask)
+static struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs)
{
struct files_struct *files = current->files;
struct file *file;
@@ -694,23 +694,32 @@ loop:
*/
if (file->f_mode & mask)
file = NULL;
- else if (!get_file_rcu(file))
+ else if (!get_file_rcu_many(file, refs))
+ goto loop;
+ else if (__fcheck_files(files, fd) != file) {
+ fput_many(file, refs);
goto loop;
+ }
}
rcu_read_unlock();
return file;
}
+struct file *fget_many(unsigned int fd, unsigned int refs)
+{
+ return __fget(fd, FMODE_PATH, refs);
+}
+
struct file *fget(unsigned int fd)
{
- return __fget(fd, FMODE_PATH);
+ return __fget(fd, FMODE_PATH, 1);
}
EXPORT_SYMBOL(fget);
struct file *fget_raw(unsigned int fd)
{
- return __fget(fd, 0);
+ return __fget(fd, 0, 1);
}
EXPORT_SYMBOL(fget_raw);
@@ -741,7 +750,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
return 0;
return (unsigned long)file;
} else {
- file = __fget(fd, mask);
+ file = __fget(fd, mask, 1);
if (!file)
return 0;
return FDPUT_FPUT | (unsigned long)file;
diff --git a/fs/file_table.c b/fs/file_table.c
index c6bab39c1813..d52c5d449460 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -263,9 +263,9 @@ EXPORT_SYMBOL_GPL(flush_delayed_fput);
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
-void fput(struct file *file)
+void fput_many(struct file *file, unsigned int refs)
{
- if (atomic_long_dec_and_test(&file->f_count)) {
+ if (atomic_long_sub_and_test(refs, &file->f_count)) {
struct task_struct *task = current;
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
@@ -284,6 +284,11 @@ void fput(struct file *file)
}
}
+void fput(struct file *file)
+{
+ fput_many(file, 1);
+}
+
/*
* synchronous analog of fput(); for kernel threads that might be needed
* in some umount() (and thus can't use flush_delayed_fput() without
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6398bd8a066e..1e583e24dd5d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -45,7 +45,6 @@ struct wb_completion {
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
- unsigned long *older_than_this;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
@@ -160,7 +159,9 @@ static void inode_io_list_del_locked(struct inode *inode,
struct bdi_writeback *wb)
{
assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+ inode->i_state &= ~I_SYNC_QUEUED;
list_del_init(&inode->i_io_list);
wb_io_lists_depopulated(wb);
}
@@ -269,6 +270,7 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
wb_put(wb);
}
+EXPORT_SYMBOL_GPL(__inode_attach_wb);
/**
* locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
@@ -510,9 +512,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* find and pin the new wb */
rcu_read_lock();
memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
- if (memcg_css)
- isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ if (memcg_css && !css_tryget(memcg_css))
+ memcg_css = NULL;
rcu_read_unlock();
+ if (!memcg_css)
+ goto out_free;
+
+ isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+ css_put(memcg_css);
if (!isw->new_wb)
goto out_free;
@@ -1038,7 +1045,9 @@ void inode_io_list_del(struct inode *inode)
struct bdi_writeback *wb;
wb = inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
inode_io_list_del_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
}
@@ -1087,8 +1096,10 @@ void sb_clear_inode_writeback(struct inode *inode)
* the case then the inode must have been redirtied while it was being written
* out and we don't reset its dirtied_when.
*/
-static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
{
+ assert_spin_locked(&inode->i_lock);
+
if (!list_empty(&wb->b_dirty)) {
struct inode *tail;
@@ -1097,6 +1108,14 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
inode->dirtied_when = jiffies;
}
inode_io_list_move_locked(inode, wb, &wb->b_dirty);
+ inode->i_state &= ~I_SYNC_QUEUED;
+}
+
+static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
+{
+ spin_lock(&inode->i_lock);
+ redirty_tail_locked(inode, wb);
+ spin_unlock(&inode->i_lock);
}
/*
@@ -1135,16 +1154,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
#define EXPIRE_DIRTY_ATIME 0x0001
/*
- * Move expired (dirtied before work->older_than_this) dirty inodes from
+ * Move expired (dirtied before dirtied_before) dirty inodes from
* @delaying_queue to @dispatch_queue.
*/
static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue,
- int flags,
- struct wb_writeback_work *work)
+ unsigned long dirtied_before)
{
- unsigned long *older_than_this = NULL;
- unsigned long expire_time;
LIST_HEAD(tmp);
struct list_head *pos, *node;
struct super_block *sb = NULL;
@@ -1152,21 +1168,15 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
- if ((flags & EXPIRE_DIRTY_ATIME) == 0)
- older_than_this = work->older_than_this;
- else if (!work->for_sync) {
- expire_time = jiffies - (dirtytime_expire_interval * HZ);
- older_than_this = &expire_time;
- }
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
- if (older_than_this &&
- inode_dirtied_after(inode, *older_than_this))
+ if (inode_dirtied_after(inode, dirtied_before))
break;
list_move(&inode->i_io_list, &tmp);
moved++;
- if (flags & EXPIRE_DIRTY_ATIME)
- set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
+ spin_lock(&inode->i_lock);
+ inode->i_state |= I_SYNC_QUEUED;
+ spin_unlock(&inode->i_lock);
if (sb_is_blkdev_sb(inode->i_sb))
continue;
if (sb && sb != inode->i_sb)
@@ -1204,18 +1214,22 @@ out:
* |
* +--> dequeue for IO
*/
-static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
+static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
+ unsigned long dirtied_before)
{
int moved;
+ unsigned long time_expire_jif = dirtied_before;
assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io);
- moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
+ moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
+ if (!work->for_sync)
+ time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
- EXPIRE_DIRTY_ATIME, work);
+ time_expire_jif);
if (moved)
wb_io_lists_populated(wb);
- trace_writeback_queue_io(wb, work, moved);
+ trace_writeback_queue_io(wb, work, dirtied_before, moved);
}
static int write_inode(struct inode *inode, struct writeback_control *wbc)
@@ -1309,7 +1323,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* writeback is not making progress due to locked
* buffers. Skip this inode for now.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
return;
}
@@ -1329,7 +1343,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* retrying writeback of the dirty page/inode
* that cannot be performed immediately.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
}
} else if (inode->i_state & I_DIRTY) {
/*
@@ -1337,10 +1351,11 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* such as delayed allocation during submission or metadata
* updates after data IO completion.
*/
- redirty_tail(inode, wb);
+ redirty_tail_locked(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
inode->dirtied_when = jiffies;
inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
+ inode->i_state &= ~I_SYNC_QUEUED;
} else {
/* The inode is clean. Remove from writeback lists. */
inode_io_list_del_locked(inode, wb);
@@ -1380,25 +1395,25 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
+ * If the inode has dirty timestamps and we need to write them, call
+ * mark_inode_dirty_sync() to notify the filesystem about it and to
+ * change I_DIRTY_TIME into I_DIRTY_SYNC.
+ */
+ if ((inode->i_state & I_DIRTY_TIME) &&
+ (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
+ time_after(jiffies, inode->dirtied_time_when +
+ dirtytime_expire_interval * HZ))) {
+ trace_writeback_lazytime(inode);
+ mark_inode_dirty_sync(inode);
+ }
+
+ /*
* Some filesystems may redirty the inode during the writeback
* due to delalloc, clear dirty metadata flags right before
* write_inode()
*/
spin_lock(&inode->i_lock);
-
dirty = inode->i_state & I_DIRTY;
- if (inode->i_state & I_DIRTY_TIME) {
- if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
- wbc->sync_mode == WB_SYNC_ALL ||
- unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
- unlikely(time_after(jiffies,
- (inode->dirtied_time_when +
- dirtytime_expire_interval * HZ)))) {
- dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
- trace_writeback_lazytime(inode);
- }
- } else
- inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
inode->i_state &= ~dirty;
/*
@@ -1419,8 +1434,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_unlock(&inode->i_lock);
- if (dirty & I_DIRTY_TIME)
- mark_inode_dirty_sync(inode);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & ~I_DIRTY_PAGES) {
int err = write_inode(inode, wbc);
@@ -1584,8 +1597,8 @@ static long writeback_sb_inodes(struct super_block *sb,
*/
spin_lock(&inode->i_lock);
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+ redirty_tail_locked(inode, wb);
spin_unlock(&inode->i_lock);
- redirty_tail(inode, wb);
continue;
}
if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
@@ -1726,7 +1739,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
if (list_empty(&wb->b_io))
- queue_io(wb, &work);
+ queue_io(wb, &work, jiffies);
__writeback_inodes_wb(wb, &work);
spin_unlock(&wb->list_lock);
blk_finish_plug(&plug);
@@ -1746,7 +1759,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
* takes longer than a dirty_writeback_interval interval, then leave a
* one-second gap.
*
- * older_than_this takes precedence over nr_to_write. So we'll only write back
+ * dirtied_before takes precedence over nr_to_write. So we'll only write back
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
@@ -1754,14 +1767,11 @@ static long wb_writeback(struct bdi_writeback *wb,
{
unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
- unsigned long oldest_jif;
+ unsigned long dirtied_before = jiffies;
struct inode *inode;
long progress;
struct blk_plug plug;
- oldest_jif = jiffies;
- work->older_than_this = &oldest_jif;
-
blk_start_plug(&plug);
spin_lock(&wb->list_lock);
for (;;) {
@@ -1795,14 +1805,14 @@ static long wb_writeback(struct bdi_writeback *wb,
* safe.
*/
if (work->for_kupdate) {
- oldest_jif = jiffies -
+ dirtied_before = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
- oldest_jif = jiffies;
+ dirtied_before = jiffies;
trace_writeback_start(wb, work);
if (list_empty(&wb->b_io))
- queue_io(wb, work);
+ queue_io(wb, work, dirtied_before);
if (work->sb)
progress = writeback_sb_inodes(work->sb, wb, work);
else
@@ -1960,7 +1970,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;
- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
@@ -2077,28 +2087,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
return ret;
}
-static noinline void block_dump___mark_inode_dirty(struct inode *inode)
-{
- if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
- struct dentry *dentry;
- const char *name = "?";
-
- dentry = d_find_alias(inode);
- if (dentry) {
- spin_lock(&dentry->d_lock);
- name = (const char *) dentry->d_name.name;
- }
- printk(KERN_DEBUG
- "%s(%d): dirtied inode %lu (%s) on %s\n",
- current->comm, task_pid_nr(current), inode->i_ino,
- name, inode->i_sb->s_id);
- if (dentry) {
- spin_unlock(&dentry->d_lock);
- dput(dentry);
- }
- }
-}
-
/**
* __mark_inode_dirty - internal function
*
@@ -2127,7 +2115,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
*/
void __mark_inode_dirty(struct inode *inode, int flags)
{
-#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
struct super_block *sb = inode->i_sb;
int dirtytime;
@@ -2137,7 +2124,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
* Don't do this for I_DIRTY_PAGES - that doesn't actually
* dirty the inode itself
*/
- if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
+ if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode)
@@ -2159,9 +2146,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return;
- if (unlikely(block_dump))
- block_dump___mark_inode_dirty(inode);
-
spin_lock(&inode->i_lock);
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
goto out_unlock_inode;
@@ -2175,11 +2159,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
inode->i_state |= flags;
/*
- * If the inode is being synced, just update its dirty state.
- * The unlocker will place the inode on the appropriate
- * superblock list, based upon its state.
+ * If the inode is queued for writeback by flush worker, just
+ * update its dirty state. Once the flush worker is done with
+ * the inode it will place it on the appropriate superblock
+ * list, based upon its state.
*/
- if (inode->i_state & I_SYNC)
+ if (inode->i_state & I_SYNC_QUEUED)
goto out_unlock_inode;
/*
@@ -2212,7 +2197,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if (dirtytime)
inode->dirtied_time_when = jiffies;
- if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
+ if (inode->i_state & I_DIRTY)
dirty_list = &wb->b_dirty;
else
dirty_list = &wb->b_dirty_time;
@@ -2236,8 +2221,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
out_unlock_inode:
spin_unlock(&inode->i_lock);
-
-#undef I_DIRTY_INODE
}
EXPORT_SYMBOL(__mark_inode_dirty);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 55db06c7c587..b15eaa9e6cd7 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -616,6 +616,8 @@ static int __init cuse_init(void)
cuse_channel_fops.owner = THIS_MODULE;
cuse_channel_fops.open = cuse_channel_open;
cuse_channel_fops.release = cuse_channel_release;
+ /* CUSE is not prepared for FUSE_DEV_IOC_CLONE */
+ cuse_channel_fops.unlocked_ioctl = NULL;
cuse_class = class_create(THIS_MODULE, "cuse");
if (IS_ERR(cuse_class))
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f580695b7bb9..db7d746633cf 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -824,7 +824,6 @@ static int fuse_check_page(struct page *page)
{
if (page_mapcount(page) ||
page->mapping != NULL ||
- page_count(page) != 1 ||
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
~(1 << PG_locked |
1 << PG_referenced |
@@ -846,15 +845,16 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
struct page *newpage;
struct pipe_buffer *buf = cs->pipebufs;
+ get_page(oldpage);
err = unlock_request(cs->req);
if (err)
- return err;
+ goto out_put_old;
fuse_copy_finish(cs);
err = pipe_buf_confirm(cs->pipe, buf);
if (err)
- return err;
+ goto out_put_old;
BUG_ON(!cs->nr_segs);
cs->currbuf = buf;
@@ -894,7 +894,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
if (err) {
unlock_page(newpage);
- return err;
+ goto out_put_old;
}
get_page(newpage);
@@ -902,6 +902,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
lru_cache_add_file(newpage);
+ /*
+ * Release while we have extra ref on stolen page. Otherwise
+ * anon_pipe_buf_release() might think the page can be reused.
+ */
+ pipe_buf_release(cs->pipe, buf);
+
err = 0;
spin_lock(&cs->req->waitq.lock);
if (test_bit(FR_ABORTED, &cs->req->flags))
@@ -913,14 +919,19 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (err) {
unlock_page(newpage);
put_page(newpage);
- return err;
+ goto out_put_old;
}
unlock_page(oldpage);
+ /* Drop ref for ap->pages[] array */
put_page(oldpage);
cs->len = 0;
- return 0;
+ err = 0;
+out_put_old:
+ /* Drop ref obtained in this function */
+ put_page(oldpage);
+ return err;
out_fallback_unlock:
unlock_page(newpage);
@@ -929,10 +940,10 @@ out_fallback:
cs->offset = buf->offset;
err = lock_request(cs->req);
- if (err)
- return err;
+ if (!err)
+ err = 1;
- return 1;
+ goto out_put_old;
}
static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
@@ -944,14 +955,16 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
if (cs->nr_segs == cs->pipe->buffers)
return -EIO;
+ get_page(page);
err = unlock_request(cs->req);
- if (err)
+ if (err) {
+ put_page(page);
return err;
+ }
fuse_copy_finish(cs);
buf = cs->pipebufs;
- get_page(page);
buf->page = page;
buf->offset = offset;
buf->len = count;
@@ -1297,6 +1310,15 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
goto restart;
}
spin_lock(&fpq->lock);
+ /*
+ * Must not put request on fpq->io queue after having been shut down by
+ * fuse_abort_conn()
+ */
+ if (!fpq->connected) {
+ req->out.h.error = err = -ECONNABORTED;
+ goto out_end;
+
+ }
list_add(&req->list, &fpq->io);
spin_unlock(&fpq->lock);
cs->req = req;
@@ -1873,7 +1895,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
}
err = -EINVAL;
- if (oh.error <= -1000 || oh.error > 0)
+ if (oh.error <= -512 || oh.error > 0)
goto err_finish;
spin_lock(&fpq->lock);
@@ -2030,8 +2052,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
pipe_lock(pipe);
out_free:
- for (idx = 0; idx < nbuf; idx++)
- pipe_buf_release(pipe, &bufs[idx]);
+ for (idx = 0; idx < nbuf; idx++) {
+ struct pipe_buffer *buf = &bufs[idx];
+
+ if (buf->ops)
+ pipe_buf_release(pipe, buf);
+ }
pipe_unlock(pipe);
kfree(bufs);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4d95a416fc36..b8d13b69583c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -969,7 +969,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
if (!parent)
return -ENOENT;
- inode_lock(parent);
+ inode_lock_nested(parent, I_MUTEX_PARENT);
if (!S_ISDIR(parent->i_mode))
goto unlock;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 969584c99c54..4238939af2fe 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -17,6 +17,7 @@
#include <linux/swap.h>
#include <linux/falloc.h>
#include <linux/uio.h>
+#include <linux/fs.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -2534,7 +2535,16 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
struct iovec *iov = iov_page;
iov->iov_base = (void __user *)arg;
- iov->iov_len = _IOC_SIZE(cmd);
+
+ switch (cmd) {
+ case FS_IOC_GETFLAGS:
+ case FS_IOC_SETFLAGS:
+ iov->iov_len = sizeof(int);
+ break;
+ default:
+ iov->iov_len = _IOC_SIZE(cmd);
+ break;
+ }
if (_IOC_DIR(cmd) & _IOC_WRITE) {
in_iov = iov;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1e2ff4b32c79..be969f24ccf0 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -636,9 +636,6 @@ __acquires(&gl->gl_lockref.lock)
goto out_unlock;
if (nonblock)
goto out_sched;
- smp_mb();
- if (atomic_read(&gl->gl_revokes) != 0)
- goto out_sched;
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
gl->gl_target = gl->gl_demote_state;
@@ -873,7 +870,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
out_free:
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
- atomic_dec(&sdp->sd_glock_disposal);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
out:
return ret;
@@ -1458,6 +1456,7 @@ __acquires(&lru_lock)
while(!list_empty(list)) {
gl = list_entry(list->next, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
+ clear_bit(GLF_LRU, &gl->gl_flags);
if (!spin_trylock(&gl->gl_lockref.lock)) {
add_back_to_lru:
list_add(&gl->gl_lru, &lru_list);
@@ -1503,7 +1502,6 @@ static long gfs2_scan_glock_lru(int nr)
if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
list_move(&gl->gl_lru, &dispose);
atomic_dec(&lru_count);
- clear_bit(GLF_LRU, &gl->gl_flags);
freed++;
continue;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index cdd1c5f06f45..f1844af4005b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -89,6 +89,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
memset(&tr, 0, sizeof(tr));
INIT_LIST_HEAD(&tr.tr_buf);
INIT_LIST_HEAD(&tr.tr_databuf);
+ INIT_LIST_HEAD(&tr.tr_ail1_list);
+ INIT_LIST_HEAD(&tr.tr_ail2_list);
tr.tr_revokes = atomic_read(&gl->gl_ail_count);
if (!tr.tr_revokes)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6c6401084d3d..e893b1fbde98 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -714,7 +714,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
- goto fail_gunlock2;
+ goto fail_free_inode;
if (blocks > 1) {
ip->i_eattr = ip->i_no_addr + 1;
@@ -725,7 +725,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (error)
- goto fail_gunlock2;
+ goto fail_free_inode;
BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags));
@@ -734,7 +734,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
goto fail_gunlock2;
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
- gfs2_glock_put(io_gl);
gfs2_set_iop(inode);
insert_inode_hash(inode);
@@ -767,6 +766,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
mark_inode_dirty(inode);
d_instantiate(dentry, inode);
+ /* After instantiate, errors should result in evict which will destroy
+ * both inode and iopen glocks properly. */
if (file) {
*opened |= FILE_CREATED;
error = finish_open(file, dentry, gfs2_open_common, opened);
@@ -774,15 +775,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(ghs + 1);
clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
+ gfs2_glock_put(io_gl);
return error;
fail_gunlock3:
glock_clear_object(io_gl, ip);
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
- gfs2_glock_put(io_gl);
fail_gunlock2:
- if (io_gl)
- clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
+ clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
+ gfs2_glock_put(io_gl);
fail_free_inode:
if (ip->i_gl) {
glock_clear_object(ip->i_gl, ip);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6f5c033fe4b5..f3c16a504c8d 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -283,7 +283,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- int lvb_needs_unlock = 0;
int error;
if (gl->gl_lksb.sb_lkid == 0) {
@@ -296,13 +295,15 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
- /* don't want to skip dlm_unlock writing the lvb when lock is ex */
-
- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
- lvb_needs_unlock = 1;
+ /* don't want to call dlm if we've unmounted the lock protocol */
+ if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+ gfs2_glock_free(gl);
+ return;
+ }
+ /* don't want to skip dlm_unlock writing the lvb when lock has one */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
- !lvb_needs_unlock) {
+ !gl->gl_lksb.sb_lvbptr) {
gfs2_glock_free(gl);
return;
}
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index a3208511f35a..b05c0ed36b6f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -734,8 +734,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
tr = sdp->sd_log_tr;
if (tr) {
sdp->sd_log_tr = NULL;
- INIT_LIST_HEAD(&tr->tr_ail1_list);
- INIT_LIST_HEAD(&tr->tr_ail2_list);
tr->tr_first = sdp->sd_log_flush_head;
if (unlikely (state == SFS_FROZEN))
gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
@@ -804,8 +802,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
* @new: New transaction to be merged
*/
-static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
+static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
{
+ struct gfs2_trans *old = sdp->sd_log_tr;
+
WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
old->tr_num_buf_new += new->tr_num_buf_new;
@@ -817,6 +817,11 @@ static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new)
list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
list_splice_tail_init(&new->tr_buf, &old->tr_buf);
+
+ spin_lock(&sdp->sd_ail_lock);
+ list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
+ list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
+ spin_unlock(&sdp->sd_ail_lock);
}
static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
@@ -828,7 +833,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_log_lock(sdp);
if (sdp->sd_log_tr) {
- gfs2_merge_trans(sdp->sd_log_tr, tr);
+ gfs2_merge_trans(sdp, tr);
} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
sdp->sd_log_tr = tr;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 057be88eb1b4..0b5c37ceb3ed 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -161,15 +161,19 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
return -EINVAL;
}
- /* If format numbers match exactly, we're done. */
-
- if (sb->sb_fs_format == GFS2_FORMAT_FS &&
- sb->sb_multihost_format == GFS2_FORMAT_MULTI)
- return 0;
+ if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+ sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
+ fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ return -EINVAL;
+ }
- fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
+ if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
+ (sb->sb_bsize & (sb->sb_bsize - 1))) {
+ pr_warn("Invalid superblock size\n");
+ return -EINVAL;
+ }
- return -EINVAL;
+ return 0;
}
static void end_bio_io_page(struct bio *bio)
@@ -922,7 +926,7 @@ fail:
}
static const match_table_t nolock_tokens = {
- { Opt_jid, "jid=%d\n", },
+ { Opt_jid, "jid=%d", },
{ Opt_err, NULL },
};
@@ -1179,7 +1183,17 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
goto fail_per_node;
}
- if (!sb_rdonly(sb)) {
+ if (sb_rdonly(sb)) {
+ struct gfs2_holder freeze_gh;
+
+ error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
+ GL_EXACT, &freeze_gh);
+ if (error) {
+ fs_err(sdp, "can't make FS RO: %d\n", error);
+ goto fail_per_node;
+ }
+ gfs2_glock_dq_uninit(&freeze_gh);
+ } else {
error = gfs2_make_fs_rw(sdp);
if (error) {
fs_err(sdp, "can't make FS RW: %d\n", error);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e700fb162664..a833e2e07167 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1039,8 +1039,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
u32 x;
int error = 0;
- if (capable(CAP_SYS_RESOURCE) ||
- sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+ if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
return 0;
error = gfs2_quota_hold(ip, uid, gid);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 836f29480be6..e3a6e2404d11 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -47,7 +47,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip,
int ret;
ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */
- if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
+ if (capable(CAP_SYS_RESOURCE) ||
+ sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
return 0;
ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
if (ret)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7cb0672294df..87656030ec7d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -720,9 +720,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
}
gfs2_free_clones(rgd);
+ return_all_reservations(rgd);
kfree(rgd->rd_bits);
rgd->rd_bits = NULL;
- return_all_reservations(rgd);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
@@ -990,6 +990,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
if (error < 0)
return error;
+ if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
+ fs_err(sdp, "no resource groups found in the file system.\n");
+ return -ENOENT;
+ }
set_rgrp_preferences(sdp);
sdp->sd_rindex_uptodate = 1;
@@ -1361,6 +1365,9 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+ return -EROFS;
+
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c3f3f1ae4e1b..56bfed0a5873 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -791,7 +791,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
int need_endtrans = 0;
int ret;
- if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC)))
+ if (!(flags & I_DIRTY_INODE))
return;
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return;
@@ -924,6 +924,7 @@ restart:
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
+ truncate_inode_pages_final(&sdp->sd_aspace);
gfs2_delete_debugfs_file(sdp);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
@@ -988,11 +989,13 @@ void gfs2_freeze_func(struct work_struct *work)
static int gfs2_freeze(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
- int error = 0;
+ int error;
mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
+ if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
+ error = -EBUSY;
goto out;
+ }
if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
error = -EINVAL;
@@ -1034,10 +1037,10 @@ static int gfs2_unfreeze(struct super_block *sb)
struct gfs2_sbd *sdp = sb->s_fs_info;
mutex_lock(&sdp->sd_freeze_mutex);
- if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
+ if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
!gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
mutex_unlock(&sdp->sd_freeze_mutex);
- return 0;
+ return -EINVAL;
}
gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 69e3402a3cc5..3a9abc3aac82 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -56,6 +56,8 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
sizeof(u64));
INIT_LIST_HEAD(&tr->tr_databuf);
INIT_LIST_HEAD(&tr->tr_buf);
+ INIT_LIST_HEAD(&tr->tr_ail1_list);
+ INIT_LIST_HEAD(&tr->tr_ail2_list);
sb_start_intwrite(sdp->sd_vfs);
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index 4af318fbda77..ef9498a6e88a 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -25,7 +25,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
fd->key = ptr + tree->max_key_len + 2;
hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n",
tree->cnid, __builtin_return_address(0));
- mutex_lock(&tree->tree_lock);
+ switch (tree->cnid) {
+ case HFS_CAT_CNID:
+ mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
+ break;
+ case HFS_EXT_CNID:
+ mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
+ break;
+ case HFS_ATTR_CNID:
+ mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
+ break;
+ default:
+ return -EINVAL;
+ }
return 0;
}
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 8aec5e732abf..bca3ea4137ee 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -15,16 +15,31 @@
#include "btree.h"
-void hfs_bnode_read(struct hfs_bnode *node, void *buf,
- int off, int len)
+void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
+ int pagenum;
+ int bytes_read;
+ int bytes_to_read;
+ void *vaddr;
off += node->page_offset;
- page = node->page[0];
+ pagenum = off >> PAGE_SHIFT;
+ off &= ~PAGE_MASK; /* compute page offset for the first page */
- memcpy(buf, kmap(page) + off, len);
- kunmap(page);
+ for (bytes_read = 0; bytes_read < len; bytes_read += bytes_to_read) {
+ if (pagenum >= node->tree->pages_per_bnode)
+ break;
+ page = node->page[pagenum];
+ bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off);
+
+ vaddr = kmap_atomic(page);
+ memcpy(buf + bytes_read, vaddr + off, bytes_to_read);
+ kunmap_atomic(vaddr);
+
+ pagenum++;
+ off = 0; /* page offset only applies to the first page */
+ }
}
u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h
index dcc2aab1b2c4..25ac9a8bb57a 100644
--- a/fs/hfs/btree.h
+++ b/fs/hfs/btree.h
@@ -13,6 +13,13 @@ typedef int (*btree_keycmp)(const btree_key *, const btree_key *);
#define NODE_HASH_SIZE 256
+/* B-tree mutex nested subclasses */
+enum hfs_btree_mutex_classes {
+ CATALOG_BTREE_MUTEX,
+ EXTENTS_BTREE_MUTEX,
+ ATTR_BTREE_MUTEX,
+};
+
/* A HFS BTree held in memory */
struct hfs_btree {
struct super_block *sb;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 7e0d65e9586c..691810b0e6bc 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -427,14 +427,12 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
if (!res) {
if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) {
res = -EIO;
- goto bail;
+ goto bail_hfs_find;
}
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength);
}
- if (res) {
- hfs_find_exit(&fd);
- goto bail_no_root;
- }
+ if (res)
+ goto bail_hfs_find;
res = -EINVAL;
root_inode = hfs_iget(sb, &fd.search_key->cat, &rec);
hfs_find_exit(&fd);
@@ -450,6 +448,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
/* everything's okay */
return 0;
+bail_hfs_find:
+ hfs_find_exit(&fd);
bail_no_root:
pr_err("get root inode failed\n");
bail:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ba54a0e12bbd..142742d7cb69 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -436,7 +436,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
u32 hash;
index = page->index;
- hash = hugetlb_fault_mutex_hash(h, mapping, index, 0);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/*
@@ -618,7 +618,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
addr = index * hpage_size;
/* mutex taken here, fault path and hole punch */
- hash = hugetlb_fault_mutex_hash(h, mapping, index, addr);
+ hash = hugetlb_fault_mutex_hash(h, mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
@@ -649,8 +649,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ set_page_huge_active(page);
/*
- * page_put due to reference from alloc_huge_page()
+ * put_page() due to reference from alloc_huge_page()
* unlock_page because locked by add_to_page_cache()
*/
put_page(page);
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 947ce22f5b3c..55df4d80793b 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -152,6 +152,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *file,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
inode->i_ino);
+ brelse(bh);
return -EIO;
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index a4994e25e19e..ee3c92390413 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -157,7 +157,6 @@ struct iso9660_options{
unsigned int overriderockperm:1;
unsigned int uid_set:1;
unsigned int gid_set:1;
- unsigned int utf8:1;
unsigned char map;
unsigned char check;
unsigned int blocksize;
@@ -357,7 +356,6 @@ static int parse_options(char *options, struct iso9660_options *popt)
popt->gid = GLOBAL_ROOT_GID;
popt->uid = GLOBAL_ROOT_UID;
popt->iocharset = NULL;
- popt->utf8 = 0;
popt->overriderockperm = 0;
popt->session=-1;
popt->sbsector=-1;
@@ -390,10 +388,13 @@ static int parse_options(char *options, struct iso9660_options *popt)
case Opt_cruft:
popt->cruft = 1;
break;
+#ifdef CONFIG_JOLIET
case Opt_utf8:
- popt->utf8 = 1;
+ kfree(popt->iocharset);
+ popt->iocharset = kstrdup("utf8", GFP_KERNEL);
+ if (!popt->iocharset)
+ return 0;
break;
-#ifdef CONFIG_JOLIET
case Opt_iocharset:
kfree(popt->iocharset);
popt->iocharset = match_strdup(&args[0]);
@@ -496,7 +497,6 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
if (sbi->s_nocompress) seq_puts(m, ",nocompress");
if (sbi->s_overriderockperm) seq_puts(m, ",overriderockperm");
if (sbi->s_showassoc) seq_puts(m, ",showassoc");
- if (sbi->s_utf8) seq_puts(m, ",utf8");
if (sbi->s_check) seq_printf(m, ",check=%c", sbi->s_check);
if (sbi->s_mapping) seq_printf(m, ",map=%c", sbi->s_mapping);
@@ -519,9 +519,10 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",fmode=%o", sbi->s_fmode);
#ifdef CONFIG_JOLIET
- if (sbi->s_nls_iocharset &&
- strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
+ if (sbi->s_nls_iocharset)
seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+ else
+ seq_puts(m, ",iocharset=utf8");
#endif
return 0;
}
@@ -865,14 +866,13 @@ root_found:
sbi->s_nls_iocharset = NULL;
#ifdef CONFIG_JOLIET
- if (joliet_level && opt.utf8 == 0) {
+ if (joliet_level) {
char *p = opt.iocharset ? opt.iocharset : CONFIG_NLS_DEFAULT;
- sbi->s_nls_iocharset = load_nls(p);
- if (! sbi->s_nls_iocharset) {
- /* Fail only if explicit charset specified */
- if (opt.iocharset)
+ if (strcmp(p, "utf8") != 0) {
+ sbi->s_nls_iocharset = opt.iocharset ?
+ load_nls(opt.iocharset) : load_nls_default();
+ if (!sbi->s_nls_iocharset)
goto out_freesbi;
- sbi->s_nls_iocharset = load_nls_default();
}
}
#endif
@@ -888,7 +888,6 @@ root_found:
sbi->s_gid = opt.gid;
sbi->s_uid_set = opt.uid_set;
sbi->s_gid_set = opt.gid_set;
- sbi->s_utf8 = opt.utf8;
sbi->s_nocompress = opt.nocompress;
sbi->s_overriderockperm = opt.overriderockperm;
/*
@@ -1327,6 +1326,8 @@ static int isofs_read_inode(struct inode *inode, int relocated)
de = (struct iso_directory_record *) (bh->b_data + offset);
de_len = *(unsigned char *) de;
+ if (de_len < sizeof(struct iso_directory_record))
+ goto fail;
if (offset + de_len > bufsize) {
int frag1 = bufsize - offset;
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 8e42b4fbefdc..b4b9d4861c9d 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -44,7 +44,6 @@ struct isofs_sb_info {
unsigned char s_session;
unsigned int s_high_sierra:1;
unsigned int s_rock:2;
- unsigned int s_utf8:1;
unsigned int s_cruft:1; /* Broken disks with high byte of length
* containing junk */
unsigned int s_nocompress:1;
diff --git a/fs/isofs/joliet.c b/fs/isofs/joliet.c
index be8b6a9d0b92..c0f04a1e7f69 100644
--- a/fs/isofs/joliet.c
+++ b/fs/isofs/joliet.c
@@ -41,14 +41,12 @@ uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
- unsigned char utf8;
struct nls_table *nls;
unsigned char len = 0;
- utf8 = ISOFS_SB(inode->i_sb)->s_utf8;
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
- if (utf8) {
+ if (!nls) {
len = utf16s_to_utf8s((const wchar_t *) de->name,
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
outname, PAGE_SIZE);
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index cac468f04820..558e7c51ce0d 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -102,6 +102,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
printk(KERN_NOTICE "iso9660: Corrupted directory entry"
" in block %lu of inode %lu\n", block,
dir->i_ino);
+ brelse(bh);
return 0;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 6e054b368b5f..93a466cf58ba 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1356,8 +1356,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
int ret;
/* Buffer got discarded which means block device got invalidated */
- if (!buffer_mapped(bh))
+ if (!buffer_mapped(bh)) {
+ unlock_buffer(bh);
return -EIO;
+ }
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a355ca418e78..3311b1e684de 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1914,6 +1914,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
jbd2_journal_put_journal_head(jh);
@@ -2005,6 +2008,7 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
{
struct buffer_head *head;
struct buffer_head *bh;
+ bool has_write_io_error = false;
int ret = 0;
J_ASSERT(PageLocked(page));
@@ -2029,11 +2033,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
jbd_unlock_bh_state(bh);
if (buffer_jbd(bh))
goto busy;
+
+ /*
+ * If we free a metadata buffer which has been failed to
+ * write out, the jbd2 checkpoint procedure will not detect
+ * this failure and may lead to filesystem inconsistency
+ * after cleanup journal tail.
+ */
+ if (buffer_write_io_error(bh)) {
+ pr_err("JBD2: Error while async write back metadata bh %llu.",
+ (unsigned long long)bh->b_blocknr);
+ has_write_io_error = true;
+ }
} while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
busy:
+ if (has_write_io_error)
+ jbd2_journal_abort(journal, -EIO);
+
return ret;
}
@@ -2461,6 +2480,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
+
+ /*
+ * b_transaction must be set, otherwise the new b_transaction won't
+ * be holding jh reference
+ */
+ J_ASSERT_JH(jh, jh->b_transaction != NULL);
+
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __jbd2_journal_file_buffer() must not
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 406d9cc84ba8..79e771ab624f 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -37,6 +37,9 @@ static int jffs2_rtime_compress(unsigned char *data_in,
int outpos = 0;
int pos=0;
+ if (*dstlen <= 3)
+ return -1;
+
memset(positions,0,sizeof(positions));
while (pos < (*sourcelen) && outpos <= (*dstlen)-2) {
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index e5a6deb38e1e..f4a5ec92f5dc 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -590,10 +590,14 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
int ret;
uint32_t now = get_seconds();
+ mutex_lock(&f->sem);
for (fd = f->dents ; fd; fd = fd->next) {
- if (fd->ino)
+ if (fd->ino) {
+ mutex_unlock(&f->sem);
return -ENOTEMPTY;
+ }
}
+ mutex_unlock(&f->sem);
ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
dentry->d_name.len, f, now);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index bccfc40b3a74..d19483fa1fe8 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
jffs2_free_full_dirent(fd);
return -EIO;
}
+
+#ifdef CONFIG_JFFS2_SUMMARY
+ /*
+ * we use CONFIG_JFFS2_SUMMARY because without it, we
+ * have checked it while mounting
+ */
+ crc = crc32(0, fd->name, rd->nsize);
+ if (unlikely(crc != je32_to_cpu(rd->name_crc))) {
+ JFFS2_NOTICE("name CRC failed on dirent node at"
+ "%#08x: read %#08x,calculated %#08x\n",
+ ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
+ jffs2_mark_node_obsolete(c, ref);
+ jffs2_free_full_dirent(fd);
+ return 0;
+ }
+#endif
}
fd->nhash = full_name_hash(NULL, fd->name, rd->nsize);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 90431dd613b8..08813789fcf0 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -1075,7 +1075,7 @@ static int jffs2_scan_dirent_node(struct jffs2_sb_info *c, struct jffs2_eraseblo
memcpy(&fd->name, rd->name, checkedlen);
fd->name[checkedlen] = 0;
- crc = crc32(0, fd->name, rd->nsize);
+ crc = crc32(0, fd->name, checkedlen);
if (crc != je32_to_cpu(rd->name_crc)) {
pr_notice("%s(): Name CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
__func__, ofs, je32_to_cpu(rd->name_crc), crc);
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index be7c8a6a5748..4fe64519870f 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -783,6 +783,8 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
dbg_summary("Writing unknown RWCOMPAT_COPY node type %x\n",
je16_to_cpu(temp->u.nodetype));
jffs2_sum_disable_collecting(c->summary);
+ /* The above call removes the list, nothing more to do */
+ goto bail_rwcompat;
} else {
BUG(); /* unknown node in summary information */
}
@@ -794,6 +796,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
c->summary->sum_num--;
}
+ bail_rwcompat:
jffs2_sum_reset_collected(c->summary);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 054cc761b426..87b41edc800d 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -161,7 +161,8 @@ void jfs_evict_inode(struct inode *inode)
if (test_cflag(COMMIT_Freewmap, inode))
jfs_free_zero_link(inode);
- diFree(inode);
+ if (JFS_SBI(inode->i_sb)->ipimap)
+ diFree(inode);
/*
* Free the inode from the quota allocation.
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 2d514c7affc2..9ff510a489cb 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -1669,7 +1669,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
} else if (rc == -ENOSPC) {
/* search for next smaller log2 block */
l2nb = BLKSTOL2(nblocks) - 1;
- nblocks = 1 << l2nb;
+ nblocks = 1LL << l2nb;
} else {
/* Trim any already allocated blocks */
jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n");
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 562b9a7e4311..f502a15c6c98 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -196,7 +196,7 @@ typedef union dmtree {
#define dmt_leafidx t1.leafidx
#define dmt_height t1.height
#define dmt_budmin t1.budmin
-#define dmt_stree t1.stree
+#define dmt_stree t2.stree
/*
* on-disk aggregate disk allocation map descriptor.
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index b67d64671bb4..415bfa90607a 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -281,5 +281,6 @@
* fsck() must be run to repair
*/
#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */
+#define FM_STATE_MAX 0x0000000f /* max value of s_state */
#endif /* _H_JFS_FILSYS */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 0e5d412c0b01..794c2acb6822 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1338,6 +1338,7 @@ int lmLogInit(struct jfs_log * log)
} else {
if (memcmp(logsuper->uuid, log->uuid, 16)) {
jfs_warn("wrong uuid on JFS log device");
+ rc = -EINVAL;
goto errout20;
}
log->size = le32_to_cpu(logsuper->size);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index d8658607bf46..f1a705d15904 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -49,6 +49,7 @@
#include <linux/fs.h>
#include <linux/buffer_head.h>
+#include <linux/log2.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
@@ -92,14 +93,14 @@ int jfs_mount(struct super_block *sb)
* (initialize mount inode from the superblock)
*/
if ((rc = chkSuper(sb))) {
- goto errout20;
+ goto out;
}
ipaimap = diReadSpecial(sb, AGGREGATE_I, 0);
if (ipaimap == NULL) {
jfs_err("jfs_mount: Failed to read AGGREGATE_I");
rc = -EIO;
- goto errout20;
+ goto out;
}
sbi->ipaimap = ipaimap;
@@ -110,7 +111,7 @@ int jfs_mount(struct super_block *sb)
*/
if ((rc = diMount(ipaimap))) {
jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc);
- goto errout21;
+ goto err_ipaimap;
}
/*
@@ -119,7 +120,7 @@ int jfs_mount(struct super_block *sb)
ipbmap = diReadSpecial(sb, BMAP_I, 0);
if (ipbmap == NULL) {
rc = -EIO;
- goto errout22;
+ goto err_umount_ipaimap;
}
jfs_info("jfs_mount: ipbmap:0x%p", ipbmap);
@@ -131,7 +132,7 @@ int jfs_mount(struct super_block *sb)
*/
if ((rc = dbMount(ipbmap))) {
jfs_err("jfs_mount: dbMount failed w/rc = %d", rc);
- goto errout22;
+ goto err_ipbmap;
}
/*
@@ -150,7 +151,7 @@ int jfs_mount(struct super_block *sb)
if (!ipaimap2) {
jfs_err("jfs_mount: Failed to read AGGREGATE_I");
rc = -EIO;
- goto errout35;
+ goto err_umount_ipbmap;
}
sbi->ipaimap2 = ipaimap2;
@@ -162,7 +163,7 @@ int jfs_mount(struct super_block *sb)
if ((rc = diMount(ipaimap2))) {
jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d",
rc);
- goto errout35;
+ goto err_ipaimap2;
}
} else
/* Secondary aggregate inode table is not valid */
@@ -179,7 +180,7 @@ int jfs_mount(struct super_block *sb)
jfs_err("jfs_mount: Failed to read FILESYSTEM_I");
/* open fileset secondary inode allocation map */
rc = -EIO;
- goto errout40;
+ goto err_umount_ipaimap2;
}
jfs_info("jfs_mount: ipimap:0x%p", ipimap);
@@ -189,41 +190,34 @@ int jfs_mount(struct super_block *sb)
/* initialize fileset inode allocation map */
if ((rc = diMount(ipimap))) {
jfs_err("jfs_mount: diMount failed w/rc = %d", rc);
- goto errout41;
+ goto err_ipimap;
}
- goto out;
+ return rc;
/*
* unwind on error
*/
- errout41: /* close fileset inode allocation map inode */
+err_ipimap:
+ /* close fileset inode allocation map inode */
diFreeSpecial(ipimap);
-
- errout40: /* fileset closed */
-
+err_umount_ipaimap2:
/* close secondary aggregate inode allocation map */
- if (ipaimap2) {
+ if (ipaimap2)
diUnmount(ipaimap2, 1);
+err_ipaimap2:
+ /* close aggregate inodes */
+ if (ipaimap2)
diFreeSpecial(ipaimap2);
- }
-
- errout35:
-
- /* close aggregate block allocation map */
+err_umount_ipbmap: /* close aggregate block allocation map */
dbUnmount(ipbmap, 1);
+err_ipbmap: /* close aggregate inodes */
diFreeSpecial(ipbmap);
-
- errout22: /* close aggregate inode allocation map */
-
+err_umount_ipaimap: /* close aggregate inode allocation map */
diUnmount(ipaimap, 1);
-
- errout21: /* close aggregate inodes */
+err_ipaimap: /* close aggregate inodes */
diFreeSpecial(ipaimap);
- errout20: /* aggregate closed */
-
- out:
-
+out:
if (rc)
jfs_err("Mount JFS Failure: %d", rc);
@@ -378,6 +372,15 @@ static int chkSuper(struct super_block *sb)
sbi->bsize = bsize;
sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize);
+ /* check some fields for possible corruption */
+ if (sbi->l2bsize != ilog2((u32)bsize) ||
+ j_sb->pad != 0 ||
+ le32_to_cpu(j_sb->s_state) > FM_STATE_MAX) {
+ rc = -EINVAL;
+ jfs_err("jfs_mount: Mount Failure: superblock is corrupt!");
+ goto out;
+ }
+
/*
* For now, ignore s_pbsize, l2bfactor. All I/O going through buffer
* cache.
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 5019058e0f6a..610267585f8f 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -320,6 +320,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
info->root = root;
info->ns = ns;
+ INIT_LIST_HEAD(&info->node);
sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
&init_user_ns, info);
diff --git a/fs/libfs.c b/fs/libfs.c
index cb9310b091f5..83618c21c216 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -868,7 +868,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
struct simple_attr *attr;
- u64 val;
+ unsigned long long val;
size_t size;
ssize_t ret;
@@ -886,7 +886,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- val = simple_strtoll(attr->set_buf, NULL, 0);
+ ret = kstrtoull(attr->set_buf, 0, &val);
+ if (ret)
+ goto out;
ret = attr->set(attr->data, val);
if (ret == 0)
ret = len; /* on success, claim we got the whole input */
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index c4504ed9f680..9c39e13a28df 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -431,12 +431,7 @@ nlm_bind_host(struct nlm_host *host)
* RPC rebind is required
*/
if ((clnt = host->h_rpcclnt) != NULL) {
- if (time_after_eq(jiffies, host->h_nextrebind)) {
- rpc_force_rebind(clnt);
- host->h_nextrebind = jiffies + NLM_HOST_REBIND;
- dprintk("lockd: next rebind in %lu jiffies\n",
- host->h_nextrebind - jiffies);
- }
+ nlm_rebind_host(host);
} else {
unsigned long increment = nlmsvc_timeout;
struct rpc_timeout timeparms = {
@@ -484,13 +479,20 @@ nlm_bind_host(struct nlm_host *host)
return clnt;
}
-/*
- * Force a portmap lookup of the remote lockd port
+/**
+ * nlm_rebind_host - If needed, force a portmap lookup of the peer's lockd port
+ * @host: NLM host handle for peer
+ *
+ * This is not needed when using a connection-oriented protocol, such as TCP.
+ * The existing autobind mechanism is sufficient to force a rebind when
+ * required, e.g. on connection state transitions.
*/
void
nlm_rebind_host(struct nlm_host *host)
{
- dprintk("lockd: rebind host %s\n", host->h_name);
+ if (host->h_proto != IPPROTO_UDP)
+ return;
+
if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
rpc_force_rebind(host->h_rpcclnt);
host->h_nextrebind = jiffies + NLM_HOST_REBIND;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index b6829d679643..d49337b5abfa 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -155,6 +155,23 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
return 0;
}
+static bool minix_check_superblock(struct minix_sb_info *sbi)
+{
+ if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+ return false;
+
+ /*
+ * s_max_size must not exceed the block mapping limitation. This check
+ * is only needed for V1 filesystems, since V2/V3 support an extra level
+ * of indirect blocks which places the limit well above U32_MAX.
+ */
+ if (sbi->s_version == MINIX_V1 &&
+ sbi->s_max_size > (7 + 512 + 512*512) * BLOCK_SIZE)
+ return false;
+
+ return true;
+}
+
static int minix_fill_super(struct super_block *s, void *data, int silent)
{
struct buffer_head *bh;
@@ -233,11 +250,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
} else
goto out_no_fs;
+ if (!minix_check_superblock(sbi))
+ goto out_illegal_sb;
+
/*
* Allocate the buffer map to keep the superblock small.
*/
- if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
- goto out_illegal_sb;
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
map = kzalloc(i, GFP_KERNEL);
if (!map)
@@ -471,6 +489,13 @@ static struct inode *V1_minix_iget(struct inode *inode)
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
@@ -504,6 +529,13 @@ static struct inode *V2_minix_iget(struct inode *inode)
iget_failed(inode);
return ERR_PTR(-EIO);
}
+ if (raw_inode->i_nlinks == 0) {
+ printk("MINIX-fs: deleted inode referenced: %lu\n",
+ inode->i_ino);
+ brelse(bh);
+ iget_failed(inode);
+ return ERR_PTR(-ESTALE);
+ }
inode->i_mode = raw_inode->i_mode;
i_uid_write(inode, raw_inode->i_uid);
i_gid_write(inode, raw_inode->i_gid);
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index 043c3fdbc8e7..446148792f41 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -75,6 +75,7 @@ static int alloc_branch(struct inode *inode,
int n = 0;
int i;
int parent = minix_new_block(inode);
+ int err = -ENOSPC;
branch[0].key = cpu_to_block(parent);
if (parent) for (n = 1; n < num; n++) {
@@ -85,6 +86,11 @@ static int alloc_branch(struct inode *inode,
break;
branch[n].key = cpu_to_block(nr);
bh = sb_getblk(inode->i_sb, parent);
+ if (!bh) {
+ minix_free_block(inode, nr);
+ err = -ENOMEM;
+ break;
+ }
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
branch[n].bh = bh;
@@ -103,7 +109,7 @@ static int alloc_branch(struct inode *inode,
bforget(branch[i].bh);
for (i = 0; i < n; i++)
minix_free_block(inode, block_to_cpu(branch[i].key));
- return -ENOSPC;
+ return err;
}
static inline int splice_branch(struct inode *inode,
diff --git a/fs/namespace.c b/fs/namespace.c
index f264ff8491c8..a25b1cdf7206 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1696,13 +1696,22 @@ static inline bool may_mount(void)
return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
}
+#ifdef CONFIG_MANDATORY_FILE_LOCKING
+static bool may_mandlock(void)
+{
+ pr_warn_once("======================================================\n"
+ "WARNING: the mand mount option is being deprecated and\n"
+ " will be removed in v5.15!\n"
+ "======================================================\n");
+ return capable(CAP_SYS_ADMIN);
+}
+#else
static inline bool may_mandlock(void)
{
-#ifndef CONFIG_MANDATORY_FILE_LOCKING
+ pr_warn("VFS: \"mand\" mount option not supported");
return false;
-#endif
- return capable(CAP_SYS_ADMIN);
}
+#endif
/*
* Now umount can handle mount points as well as block devices.
@@ -1880,6 +1889,20 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
+static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ struct mount *child;
+
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (!is_subdir(child->mnt_mountpoint, dentry))
+ continue;
+
+ if (child->mnt.mnt_flags & MNT_LOCKED)
+ return true;
+ }
+ return false;
+}
+
/**
* clone_private_mount - create a private clone of a path
*
@@ -1894,14 +1917,27 @@ struct vfsmount *clone_private_mount(const struct path *path)
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;
+ down_read(&namespace_sem);
if (IS_MNT_UNBINDABLE(old_mnt))
- return ERR_PTR(-EINVAL);
+ goto invalid;
+
+ if (!check_mnt(old_mnt))
+ goto invalid;
+
+ if (has_locked_children(old_mnt, path->dentry))
+ goto invalid;
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+ up_read(&namespace_sem);
+
if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);
return &new_mnt->mnt;
+
+invalid:
+ up_read(&namespace_sem);
+ return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -2218,19 +2254,6 @@ static int do_change_type(struct path *path, int ms_flags)
return err;
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
-{
- struct mount *child;
- list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
- if (!is_subdir(child->mnt_mountpoint, dentry))
- continue;
-
- if (child->mnt.mnt_flags & MNT_LOCKED)
- return true;
- }
- return false;
-}
-
/*
* do loopback mount.
*/
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 7ccc30a757ec..c66916d77b4a 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -140,7 +140,7 @@ config PNFS_BLOCK
config PNFS_FLEXFILE_LAYOUT
tristate
depends on NFS_V4_1 && NFS_V3
- default m
+ default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
string "NFSv4.1 Implementation ID Domain"
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee72691ce94f..7c3d035713f8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -409,7 +409,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
if (cl_init->hostname == NULL) {
WARN_ON(1);
- return NULL;
+ return ERR_PTR(-EINVAL);
}
/* see if the client already exists */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 673d89bb817e..5c26e90db588 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -560,6 +560,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
do {
+ if (entry->label)
+ entry->label->len = NFS4_MAXLABELLEN;
+
status = xdr_decode(desc, entry, &stream);
if (status != 0) {
if (status == -EAGAIN)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 03da4e0b0098..38179e5a6a00 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -717,7 +717,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
if (unlikely(!p))
goto out_err;
fl->fh_array[i]->size = be32_to_cpup(p++);
- if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
+ if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size);
goto out_err;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 8dbde5ded042..9d99e19d98bd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -101,7 +101,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p))
return -ENOBUFS;
fh->size = be32_to_cpup(p++);
- if (fh->size > sizeof(struct nfs_fh)) {
+ if (fh->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
fh->size);
return -EOVERFLOW;
@@ -921,9 +921,8 @@ retry:
goto out_mds;
/* Use a direct mapping of ds_idx to pgio mirror_idx */
- if (WARN_ON_ONCE(pgio->pg_mirror_count !=
- FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
- goto out_mds;
+ if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
+ goto out_eagain;
for (i = 0; i < pgio->pg_mirror_count; i++) {
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
@@ -942,11 +941,15 @@ retry:
}
return;
-
+out_eagain:
+ pnfs_generic_pg_cleanup(pgio);
+ pgio->pg_error = -EAGAIN;
+ return;
out_mds:
pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
nfs_pageio_reset_write_mds(pgio);
+ pgio->pg_error = -EAGAIN;
}
static unsigned int
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 2464b9b80698..17dee8fd9834 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -428,10 +428,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
goto out_fail;
ds = mirror->mirror_ds->ds;
+ if (READ_ONCE(ds->ds_clp))
+ goto out;
/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
smp_rmb();
- if (ds->ds_clp)
- goto out;
/* FIXME: For now we assume the server sent only one version of NFS
* to use for the DS.
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 71a399f6805a..ad01d4fb795e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -972,6 +972,7 @@ EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
{
filp->private_data = get_nfs_open_context(ctx);
+ set_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags);
if (list_empty(&ctx->list))
nfs_inode_attach_open_context(ctx);
}
@@ -991,6 +992,8 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
continue;
if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
continue;
+ if (!test_bit(NFS_CONTEXT_FILE_OPEN, &pos->flags))
+ continue;
ctx = get_nfs_open_context(pos);
break;
}
@@ -1005,6 +1008,7 @@ void nfs_file_clear_open_context(struct file *filp)
if (ctx) {
struct inode *inode = d_inode(ctx->dentry);
+ clear_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags);
/*
* We fatal error on write before. Try to writeback
* every page again.
@@ -1533,10 +1537,10 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
*/
static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
- const struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
- return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
- ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
+ return (long)(fattr->gencount - attr_gencount) > 0 ||
+ (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
}
static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
@@ -1939,7 +1943,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->attrtimeo_timestamp = now;
}
/* Set the barrier to be more recent than this fattr */
- if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
nfsi->attr_gencount = fattr->gencount;
}
@@ -2053,7 +2057,7 @@ static int nfsiod_start(void)
{
struct workqueue_struct *wq;
dprintk("RPC: creating workqueue nfsiod\n");
- wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
+ wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
if (wq == NULL)
return -ENOMEM;
nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e8ccab17a962..69f6c4bef797 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -578,12 +578,14 @@ extern int nfs4_test_session_trunk(struct rpc_clnt *,
static inline struct inode *nfs_igrab_and_active(struct inode *inode)
{
- inode = igrab(inode);
- if (inode != NULL && !nfs_sb_active(inode->i_sb)) {
- iput(inode);
- inode = NULL;
+ struct super_block *sb = inode->i_sb;
+
+ if (sb && nfs_sb_active(sb)) {
+ if (igrab(inode))
+ return inode;
+ nfs_sb_deactive(sb);
}
- return inode;
+ return NULL;
}
static inline void nfs_iput_and_deactive(struct inode *inode)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e5686be67be8..d57d453aecc2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -30,9 +30,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
/*
* nfs_path - reconstruct the path given an arbitrary dentry
* @base - used to return pointer to the end of devname part of path
- * @dentry - pointer to dentry
+ * @dentry_in - pointer to dentry
* @buffer - result buffer
- * @buflen - length of buffer
+ * @buflen_in - length of buffer
* @flags - options (see below)
*
* Helper function for constructing the server pathname
@@ -47,15 +47,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
* the original device (export) name
* (if unset, the original name is returned verbatim)
*/
-char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen,
- unsigned flags)
+char *nfs_path(char **p, struct dentry *dentry_in, char *buffer,
+ ssize_t buflen_in, unsigned flags)
{
char *end;
int namelen;
unsigned seq;
const char *base;
+ struct dentry *dentry;
+ ssize_t buflen;
rename_retry:
+ buflen = buflen_in;
+ dentry = dentry_in;
end = buffer+buflen;
*--end = '\0';
buflen--;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index bc673fb47fb3..65f9a8ae2845 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -357,7 +357,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
break;
case NFS3_CREATE_UNCHECKED:
- goto out;
+ goto out_release_acls;
}
nfs_fattr_init(data->res.dir_attr);
nfs_fattr_init(data->res.fattr);
@@ -702,7 +702,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
break;
default:
status = -EINVAL;
- goto out;
+ goto out_release_acls;
}
status = nfs3_do_create(dir, dentry, data);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index f1cb0b7eb05f..be666aee28cc 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -34,6 +34,7 @@
*/
#define NFS3_fhandle_sz (1+16)
#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */
+#define NFS3_post_op_fh_sz (1+NFS3_fh_sz)
#define NFS3_sattr_sz (15)
#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2))
#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2))
@@ -71,7 +72,7 @@
#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
-#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_createres_sz (1+NFS3_post_op_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 9c374441f660..394503c79faf 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -58,7 +58,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
loff_t offset, loff_t len)
{
- struct nfs_server *server = NFS_SERVER(file_inode(filep));
+ struct inode *inode = file_inode(filep);
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
struct nfs_lock_context *lock;
int err;
@@ -67,9 +68,13 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
if (IS_ERR(lock))
return PTR_ERR(lock);
- exception.inode = file_inode(filep);
+ exception.inode = inode;
exception.state = lock->open_context->state;
+ err = nfs_sync_inode(inode);
+ if (err)
+ goto out;
+
do {
err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
if (err == -ENOTSUPP) {
@@ -78,7 +83,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
-
+out:
nfs_put_lock_context(lock);
return err;
}
@@ -116,16 +121,13 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return -EOPNOTSUPP;
inode_lock(inode);
- err = nfs_sync_inode(inode);
- if (err)
- goto out_unlock;
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
truncate_pagecache_range(inode, offset, (offset + len) -1);
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
-out_unlock:
+
inode_unlock(inode);
return err;
}
@@ -184,8 +186,9 @@ static ssize_t _nfs42_proc_copy(struct file *src,
goto out;
}
- truncate_pagecache_range(dst_inode, pos_dst,
- pos_dst + res->write_res.count);
+ WARN_ON_ONCE(invalidate_inode_pages2_range(dst_inode->i_mapping,
+ pos_dst >> PAGE_SHIFT,
+ (pos_dst + res->write_res.count - 1) >> PAGE_SHIFT));
status = res->write_res.count;
out:
@@ -303,7 +306,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
if (status)
return status;
- return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+ if (whence == SEEK_DATA && res.sr_eof)
+ return -NFS4ERR_NXIO;
+ else
+ return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 5966e1e7b1f5..09c683402f95 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -625,8 +625,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
status = decode_clone(xdr);
if (status)
goto out;
- status = decode_getfattr(xdr, res->dst_fattr, res->server);
-
+ decode_getfattr(xdr, res->dst_fattr, res->server);
out:
res->rpc_status = status;
return status;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 0924b68b5657..3c8dfab8e958 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -417,8 +417,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
*/
nfs_mark_client_ready(clp, -EPERM);
}
- nfs_put_client(clp);
clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
+ nfs_put_client(clp);
return old;
error:
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index b8d316a338bc..7020f36af993 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -148,7 +148,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
case SEEK_HOLE:
case SEEK_DATA:
ret = nfs42_proc_llseek(filep, offset, whence);
- if (ret != -ENOTSUPP)
+ if (ret != -EOPNOTSUPP)
return ret;
default:
return nfs_file_llseek(filep, offset, whence);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4d45786738ab..ae19ead908d5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4445,12 +4445,12 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
u64 cookie, struct page **pages, unsigned int count, bool plus)
{
struct inode *dir = d_inode(dentry);
+ struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_readdir_arg args = {
.fh = NFS_FH(dir),
.pages = pages,
.pgbase = 0,
.count = count,
- .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
.plus = plus,
};
struct nfs4_readdir_res res;
@@ -4465,9 +4465,15 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
dentry,
(unsigned long long)cookie);
+ if (!(server->caps & NFS_CAP_SECURITY_LABEL))
+ args.bitmask = server->attr_bitmask_nl;
+ else
+ args.bitmask = server->attr_bitmask;
+
nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
res.pgbase = args.pgbase;
- status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+ status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
+ &res.seq_res, 0);
if (status >= 0) {
memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
status += args.pgbase;
@@ -5249,6 +5255,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
int ret, i;
+ /* You can't remove system.nfs4_acl: */
+ if (buflen == 0)
+ return -EINVAL;
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
if (npages > ARRAY_SIZE(pages))
@@ -5285,6 +5294,14 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
do {
err = __nfs4_proc_set_acl(inode, buf, buflen);
trace_nfs4_set_acl(inode, err);
+ if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) {
+ /*
+ * no need to retry since the kernel
+ * isn't involved in encoding the ACEs.
+ */
+ err = -EINVAL;
+ break;
+ }
err = nfs4_handle_exception(NFS_SERVER(inode), err,
&exception);
} while (exception.retry);
@@ -5323,9 +5340,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
return ret;
if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
return -ENOENT;
- if (buflen < label.len)
- return -ERANGE;
- return 0;
+ return label.len;
}
static int nfs4_get_security_label(struct inode *inode, void *buf,
@@ -6391,9 +6406,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
data->arg.new_lock_owner, ret);
} else
data->cancelled = true;
+ trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
rpc_put_task(task);
dprintk("%s: done, ret = %d!\n", __func__, ret);
- trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret);
return ret;
}
@@ -6689,7 +6704,12 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
err = nfs4_set_lock_state(state, fl);
if (err != 0)
return err;
- err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ do {
+ err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ ssleep(1);
+ } while (err == -NFS4ERR_DELAY);
return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err);
}
@@ -7280,9 +7300,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name,
* both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or
* DS flags set.
*/
-static int nfs4_check_cl_exchange_flags(u32 flags)
+static int nfs4_check_cl_exchange_flags(u32 flags, u32 version)
{
- if (flags & ~EXCHGID4_FLAG_MASK_R)
+ if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R))
+ goto out_inval;
+ else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R))
goto out_inval;
if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) &&
(flags & EXCHGID4_FLAG_USE_NON_PNFS))
@@ -7309,7 +7331,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
}
static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = {
- .rpc_call_done = &nfs4_bind_one_conn_to_session_done,
+ .rpc_call_done = nfs4_bind_one_conn_to_session_done,
};
/*
@@ -7677,7 +7699,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
if (status != 0)
goto out;
- status = nfs4_check_cl_exchange_flags(resp->flags);
+ status = nfs4_check_cl_exchange_flags(resp->flags,
+ clp->cl_mvops->minor_version);
if (status != 0)
goto out;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0b2d051990e9..3cd04c98da6b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4258,7 +4258,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
goto out_overflow;
if (len < NFS4_MAXLABELLEN) {
if (label) {
- memcpy(label->label, p, len);
+ if (label->len) {
+ if (label->len < len)
+ return -ERANGE;
+ memcpy(label->label, p, len);
+ }
label->len = len;
label->pi = pi;
label->lfs = lfs;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7c01936be7c7..469064045995 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -132,47 +132,70 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
/*
- * nfs_page_group_lock - lock the head of the page group
- * @req - request in group that is to be locked
+ * nfs_page_set_headlock - set the request PG_HEADLOCK
+ * @req: request that is to be locked
*
- * this lock must be held when traversing or modifying the page
- * group list
+ * this lock must be held when modifying req->wb_head
*
* return 0 on success, < 0 on error
*/
int
-nfs_page_group_lock(struct nfs_page *req)
+nfs_page_set_headlock(struct nfs_page *req)
{
- struct nfs_page *head = req->wb_head;
-
- WARN_ON_ONCE(head != head->wb_head);
-
- if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags))
+ if (!test_and_set_bit(PG_HEADLOCK, &req->wb_flags))
return 0;
- set_bit(PG_CONTENDED1, &head->wb_flags);
+ set_bit(PG_CONTENDED1, &req->wb_flags);
smp_mb__after_atomic();
- return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ return wait_on_bit_lock(&req->wb_flags, PG_HEADLOCK,
TASK_UNINTERRUPTIBLE);
}
/*
- * nfs_page_group_unlock - unlock the head of the page group
- * @req - request in group that is to be unlocked
+ * nfs_page_clear_headlock - clear the request PG_HEADLOCK
+ * @req: request that is to be locked
*/
void
-nfs_page_group_unlock(struct nfs_page *req)
+nfs_page_clear_headlock(struct nfs_page *req)
{
- struct nfs_page *head = req->wb_head;
-
- WARN_ON_ONCE(head != head->wb_head);
-
smp_mb__before_atomic();
- clear_bit(PG_HEADLOCK, &head->wb_flags);
+ clear_bit(PG_HEADLOCK, &req->wb_flags);
smp_mb__after_atomic();
- if (!test_bit(PG_CONTENDED1, &head->wb_flags))
+ if (!test_bit(PG_CONTENDED1, &req->wb_flags))
return;
- wake_up_bit(&head->wb_flags, PG_HEADLOCK);
+ wake_up_bit(&req->wb_flags, PG_HEADLOCK);
+}
+
+/*
+ * nfs_page_group_lock - lock the head of the page group
+ * @req: request in group that is to be locked
+ *
+ * this lock must be held when traversing or modifying the page
+ * group list
+ *
+ * return 0 on success, < 0 on error
+ */
+int
+nfs_page_group_lock(struct nfs_page *req)
+{
+ int ret;
+
+ ret = nfs_page_set_headlock(req);
+ if (ret || req->wb_head == req)
+ return ret;
+ return nfs_page_set_headlock(req->wb_head);
+}
+
+/*
+ * nfs_page_group_unlock - unlock the head of the page group
+ * @req: request in group that is to be unlocked
+ */
+void
+nfs_page_group_unlock(struct nfs_page *req)
+{
+ if (req != req->wb_head)
+ nfs_page_clear_headlock(req->wb_head);
+ nfs_page_clear_headlock(req);
}
/*
@@ -963,17 +986,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
if (!list_empty(&mirror->pg_list)) {
int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
- else
+ if (list_empty(&mirror->pg_list)) {
mirror->pg_bytes_written += mirror->pg_count;
- }
- if (list_empty(&mirror->pg_list)) {
- mirror->pg_count = 0;
- mirror->pg_base = 0;
+ mirror->pg_count = 0;
+ mirror->pg_base = 0;
+ mirror->pg_recoalesce = 0;
+ }
}
}
@@ -1071,7 +1093,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
do {
list_splice_init(&mirror->pg_list, &head);
- mirror->pg_bytes_written -= mirror->pg_count;
mirror->pg_count = 0;
mirror->pg_base = 0;
mirror->pg_recoalesce = 0;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 83abf3dd7351..619fc5c4c82c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1136,6 +1136,11 @@ _pnfs_return_layout(struct inode *ino)
{
struct pnfs_layout_hdr *lo = NULL;
struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
LIST_HEAD(tmp_list);
nfs4_stateid stateid;
int status = 0;
@@ -1162,16 +1167,10 @@ _pnfs_return_layout(struct inode *ino)
}
valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list);
- pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0);
+ pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
- if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
- struct pnfs_layout_range range = {
- .iomode = IOMODE_ANY,
- .offset = 0,
- .length = NFS4_MAX_UINT64,
- };
+ if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
- }
/* Don't send a LAYOUTRETURN if list was initially empty */
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
@@ -1328,12 +1327,18 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
int ret)
{
struct pnfs_layout_hdr *lo = args->layout;
+ struct inode *inode = args->inode;
const nfs4_stateid *arg_stateid = NULL;
const nfs4_stateid *res_stateid = NULL;
struct nfs4_xdr_opaque_data *ld_private = args->ld_private;
switch (ret) {
case -NFS4ERR_NOMATCHING_LAYOUT:
+ spin_lock(&inode->i_lock);
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
+ pnfs_set_plh_return_info(lo, args->range.iomode, 0);
+ spin_unlock(&inode->i_lock);
break;
case 0:
if (res->lrs_present)
@@ -1967,7 +1972,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
* We got an entirely new state ID. Mark all segments for the
* inode invalid, and retry the layoutget
*/
- pnfs_mark_layout_stateid_invalid(lo, &free_me);
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .length = NFS4_MAX_UINT64,
+ };
+ pnfs_set_plh_return_info(lo, IOMODE_ANY, 0);
+ pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
+ &range, 0);
goto out_forget;
}
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index b0ef37f3e2dd..5d7a69ffaaa2 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -555,19 +555,16 @@ out:
}
EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
-static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
+static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
might_sleep();
- wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
- TASK_KILLABLE);
+ return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE);
}
static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
{
smp_mb__before_atomic();
- clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
- smp_mb__after_atomic();
- wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
+ clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state);
}
static struct nfs_client *(*get_v3_ds_connect)(
@@ -638,7 +635,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
}
smp_wmb();
- ds->ds_clp = clp;
+ WRITE_ONCE(ds->ds_clp, clp);
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
@@ -711,7 +708,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
}
smp_wmb();
- ds->ds_clp = clp;
+ WRITE_ONCE(ds->ds_clp, clp);
dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
out:
return status;
@@ -728,30 +725,33 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
{
int err;
-again:
- err = 0;
- if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
- if (version == 3) {
- err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo,
- retrans);
- } else if (version == 4) {
- err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo,
- retrans, minor_version);
- } else {
- dprintk("%s: unsupported DS version %d\n", __func__,
- version);
- err = -EPROTONOSUPPORT;
- }
+ do {
+ err = nfs4_wait_ds_connect(ds);
+ if (err || ds->ds_clp)
+ goto out;
+ if (nfs4_test_deviceid_unavailable(devid))
+ return -ENODEV;
+ } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
- nfs4_clear_ds_conn_bit(ds);
- } else {
- nfs4_wait_ds_connect(ds);
+ if (ds->ds_clp)
+ goto connect_done;
- /* what was waited on didn't connect AND didn't mark unavail */
- if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid))
- goto again;
+ switch (version) {
+ case 3:
+ err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans);
+ break;
+ case 4:
+ err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans,
+ minor_version);
+ break;
+ default:
+ dprintk("%s: unsupported DS version %d\n", __func__, version);
+ err = -EPROTONOSUPPORT;
}
+connect_done:
+ nfs4_clear_ds_conn_bit(ds);
+out:
/*
* At this point the ds->ds_clp should be ready, but it might have
* hit an error.
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b6bda68aa86..010733c8bdcd 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -406,22 +406,28 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
destroy_list = (subreq->wb_this_page == old_head) ?
NULL : subreq->wb_this_page;
+ /* Note: lock subreq in order to change subreq->wb_head */
+ nfs_page_set_headlock(subreq);
WARN_ON_ONCE(old_head != subreq->wb_head);
/* make sure old group is not used */
subreq->wb_this_page = subreq;
+ subreq->wb_head = subreq;
clear_bit(PG_REMOVE, &subreq->wb_flags);
/* Note: races with nfs_page_group_destroy() */
if (!kref_read(&subreq->wb_kref)) {
/* Check if we raced with nfs_page_group_destroy() */
- if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags))
+ if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) {
+ nfs_page_clear_headlock(subreq);
nfs_free_request(subreq);
+ } else
+ nfs_page_clear_headlock(subreq);
continue;
}
+ nfs_page_clear_headlock(subreq);
- subreq->wb_head = subreq;
nfs_release_request(old_head);
if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
@@ -1031,25 +1037,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_page *req, *tmp;
int ret = 0;
-restart:
list_for_each_entry_safe(req, tmp, src, wb_list) {
kref_get(&req->wb_kref);
if (!nfs_lock_request(req)) {
- int status;
-
- /* Prevent deadlock with nfs_lock_and_join_requests */
- if (!list_empty(dst)) {
- nfs_release_request(req);
- continue;
- }
- /* Ensure we make progress to prevent livelock */
- mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
- status = nfs_wait_on_request(req);
nfs_release_request(req);
- mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
- if (status < 0)
- break;
- goto restart;
+ continue;
}
nfs_request_remove_commit_list(req, cinfo);
clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
@@ -1898,6 +1890,7 @@ static int __nfs_commit_inode(struct inode *inode, int how,
int may_wait = how & FLUSH_SYNC;
int ret, nscan;
+ how &= ~FLUSH_SYNC;
nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_commit_begin(cinfo.mds);
for (;;) {
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index 3b13fb3b0553..63c9c2a70937 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -75,10 +75,14 @@ __state_in_grace(struct net *net, bool open)
if (!open)
return !list_empty(grace_list);
+ spin_lock(&grace_lock);
list_for_each_entry(lm, grace_list, list) {
- if (lm->block_opens)
+ if (lm->block_opens) {
+ spin_unlock(&grace_lock);
return true;
+ }
}
+ spin_unlock(&grace_lock);
return false;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index ef3e7878456c..6fbc48e074be 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -845,9 +845,14 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
if (isdotent(name, namlen)) {
if (namlen == 2) {
dchild = dget_parent(dparent);
- /* filesystem root - cannot return filehandle for ".." */
+ /*
+ * Don't return filehandle for ".." if we're at
+ * the filesystem or export root:
+ */
if (dchild == dparent)
goto out;
+ if (dparent == exp->ex_path.dentry)
+ goto out;
} else
dchild = dget(dparent);
} else
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 80aeb19b176b..22b784e7ef50 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1161,6 +1161,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
nfsd4_mark_cb_down(clp, err);
+ if (c)
+ svc_xprt_put(c->cn_xprt);
return;
}
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d5d1c70bb927..1c7a695ac265 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -955,6 +955,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
return 0;
}
+static bool delegation_hashed(struct nfs4_delegation *dp)
+{
+ return !(list_empty(&dp->dl_perfile));
+}
+
static bool
unhash_delegation_locked(struct nfs4_delegation *dp)
{
@@ -962,7 +967,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
lockdep_assert_held(&state_lock);
- if (list_empty(&dp->dl_perfile))
+ if (!delegation_hashed(dp))
return false;
dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
@@ -3881,7 +3886,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
* queued for a lease break. Don't queue it again.
*/
spin_lock(&state_lock);
- if (dp->dl_time == 0) {
+ if (delegation_hashed(dp) && dp->dl_time == 0) {
dp->dl_time = get_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c1e923334012..2e7349b2dd4d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3082,15 +3082,18 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
goto fail;
cd->rd_maxcount -= entry_bytes;
/*
- * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so
- * let's always let through the first entry, at least:
+ * RFC 3530 14.2.24 describes rd_dircount as only a "hint", and
+ * notes that it could be zero. If it is zero, then the server
+ * should enforce only the rd_maxcount value.
*/
- if (!cd->rd_dircount)
- goto fail;
- name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
- if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
- goto fail;
- cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (cd->rd_dircount) {
+ name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8;
+ if (name_and_cookie > cd->rd_dircount && cd->cookie_offset)
+ goto fail;
+ cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie);
+ if (!cd->rd_dircount)
+ cd->rd_maxcount = 0;
+ }
cd->cookie_offset = cookie_offset;
skip_entry:
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d44402241d9e..50465ee502c7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -788,7 +788,10 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- nfsd_destroy(net);
+ if (!list_empty(&nn->nfsd_serv->sv_permsocks))
+ nn->nfsd_serv->sv_nrthreads--;
+ else
+ nfsd_destroy(net);
return err;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 43c0419b8ddb..fc6282181a1f 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -118,6 +118,13 @@ done:
return nfsd_return_attrs(nfserr, resp);
}
+/* Obsolete, replaced by MNTPROC_MNT. */
+static __be32
+nfsd_proc_root(struct svc_rqst *rqstp)
+{
+ return nfs_ok;
+}
+
/*
* Look up a path name component
* Note: the dentry in the resp->fh may be negative if the file
@@ -201,6 +208,13 @@ nfsd_proc_read(struct svc_rqst *rqstp)
return fh_getattr(&resp->fh, &resp->stat);
}
+/* Reserved */
+static __be32
+nfsd_proc_writecache(struct svc_rqst *rqstp)
+{
+ return nfs_ok;
+}
+
/*
* Write data to a file
* N.B. After this call resp->fh needs an fh_put
@@ -605,6 +619,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_xdrressize = ST+AT,
},
[NFSPROC_ROOT] = {
+ .pc_func = nfsd_proc_root,
.pc_decode = nfssvc_decode_void,
.pc_encode = nfssvc_encode_void,
.pc_argsize = sizeof(struct nfsd_void),
@@ -642,6 +657,7 @@ static const struct svc_procedure nfsd_procedures2[18] = {
.pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4,
},
[NFSPROC_WRITECACHE] = {
+ .pc_func = nfsd_proc_writecache,
.pc_decode = nfssvc_decode_void,
.pc_encode = nfssvc_encode_void,
.pc_argsize = sizeof(struct nfsd_void),
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4a9e0fb634b6..67e85a752d3a 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -410,8 +410,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
return;
nfsd_shutdown_net(net);
- printk(KERN_WARNING "nfsd: last server has exited, flushing export "
- "cache\n");
+ pr_info("nfsd: last server has exited, flushing export cache\n");
nfsd_export_flush(net);
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 06d1f2edf2ec..a64065ad8851 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1202,6 +1202,9 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
iap->ia_mode = 0;
iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+ if (!IS_POSIXACL(dirp))
+ iap->ia_mode &= ~current_umask();
+
err = 0;
host_err = 0;
switch (type) {
@@ -1413,6 +1416,9 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}
+ if (!IS_POSIXACL(dirp))
+ iap->ia_mode &= ~current_umask();
+
host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
if (host_err < 0) {
fh_drop_write(fhp);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 50e12956c737..5d1e5832690e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2794,6 +2794,8 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
if (!nilfs->ns_writer)
return -ENOMEM;
+ inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
+
err = nilfs_segctor_start_thread(nilfs->ns_writer);
if (err) {
kfree(nilfs->ns_writer);
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 490303e3d517..33fba75aa9f3 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -73,11 +73,9 @@ static const struct sysfs_ops nilfs_##name##_attr_ops = { \
#define NILFS_DEV_INT_GROUP_TYPE(name, parent_name) \
static void nilfs_##name##_attr_release(struct kobject *kobj) \
{ \
- struct nilfs_sysfs_##parent_name##_subgroups *subgroups; \
- struct the_nilfs *nilfs = container_of(kobj->parent, \
- struct the_nilfs, \
- ns_##parent_name##_kobj); \
- subgroups = nilfs->ns_##parent_name##_subgroups; \
+ struct nilfs_sysfs_##parent_name##_subgroups *subgroups = container_of(kobj, \
+ struct nilfs_sysfs_##parent_name##_subgroups, \
+ sg_##name##_kobj); \
complete(&subgroups->sg_##name##_kobj_unregister); \
} \
static struct kobj_type nilfs_##name##_ktype = { \
@@ -103,12 +101,12 @@ static int nilfs_sysfs_create_##name##_group(struct the_nilfs *nilfs) \
err = kobject_init_and_add(kobj, &nilfs_##name##_ktype, parent, \
#name); \
if (err) \
- return err; \
- return 0; \
+ kobject_put(kobj); \
+ return err; \
} \
static void nilfs_sysfs_delete_##name##_group(struct the_nilfs *nilfs) \
{ \
- kobject_del(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
+ kobject_put(&nilfs->ns_##parent_name##_subgroups->sg_##name##_kobj); \
}
/************************************************************************
@@ -219,14 +217,14 @@ int nilfs_sysfs_create_snapshot_group(struct nilfs_root *root)
}
if (err)
- return err;
+ kobject_put(&root->snapshot_kobj);
- return 0;
+ return err;
}
void nilfs_sysfs_delete_snapshot_group(struct nilfs_root *root)
{
- kobject_del(&root->snapshot_kobj);
+ kobject_put(&root->snapshot_kobj);
}
/************************************************************************
@@ -1010,7 +1008,7 @@ int nilfs_sysfs_create_device_group(struct super_block *sb)
err = kobject_init_and_add(&nilfs->ns_dev_kobj, &nilfs_dev_ktype, NULL,
"%s", sb->s_id);
if (err)
- goto free_dev_subgroups;
+ goto cleanup_dev_kobject;
err = nilfs_sysfs_create_mounted_snapshots_group(nilfs);
if (err)
@@ -1047,9 +1045,7 @@ delete_mounted_snapshots_group:
nilfs_sysfs_delete_mounted_snapshots_group(nilfs);
cleanup_dev_kobject:
- kobject_del(&nilfs->ns_dev_kobj);
-
-free_dev_subgroups:
+ kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups);
failed_create_device_group:
@@ -1064,6 +1060,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
nilfs_sysfs_delete_superblock_group(nilfs);
nilfs_sysfs_delete_segctor_group(nilfs);
kobject_del(&nilfs->ns_dev_kobj);
+ kobject_put(&nilfs->ns_dev_kobj);
kfree(nilfs->ns_dev_subgroups);
}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 7c410f879412..8cd134750ebb 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -502,7 +502,7 @@ err_corrupt_attr:
}
file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
le16_to_cpu(attr->data.resident.value_offset));
- p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length);
+ p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
if (p2 < (u8*)attr || p2 > p)
goto err_corrupt_attr;
/* This attribute is ok, but is it in the $Extend directory? */
@@ -661,6 +661,12 @@ static int ntfs_read_locked_inode(struct inode *vi)
}
a = ctx->attr;
/* Get the standard information attribute value. */
+ if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
+ + le32_to_cpu(a->data.resident.value_length) >
+ (u8 *)ctx->mrec + vol->mft_record_size) {
+ ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
+ goto unm_err_out;
+ }
si = (STANDARD_INFORMATION*)((u8*)a +
le16_to_cpu(a->data.resident.value_offset));
@@ -1844,6 +1850,12 @@ int ntfs_read_inode_mount(struct inode *vi)
brelse(bh);
}
+ if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
+ ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
+ le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
+ goto err_out;
+ }
+
/* Apply the mst fixups. */
if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
/* FIXME: Try to use the $MFTMirr now. */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index bed54e8adcf9..8512f2119241 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6885,7 +6885,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
- int ret, i, has_data, num_pages = 0;
+ int ret, has_data, num_pages = 0;
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
@@ -6894,26 +6894,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
- struct page **pages = NULL;
- loff_t end = osb->s_clustersize;
+ struct page *page = NULL;
struct ocfs2_extent_tree et;
int did_quota = 0;
has_data = i_size_read(inode) ? 1 : 0;
if (has_data) {
- pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
- sizeof(struct page *), GFP_NOFS);
- if (pages == NULL) {
- ret = -ENOMEM;
- mlog_errno(ret);
- return ret;
- }
-
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
- goto free_pages;
+ goto out;
}
}
@@ -6933,7 +6924,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
if (has_data) {
- unsigned int page_end;
+ unsigned int page_end = min_t(unsigned, PAGE_SIZE,
+ osb->s_clustersize);
u64 phys;
ret = dquot_alloc_space_nodirty(inode,
@@ -6957,15 +6949,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
- /*
- * Non sparse file systems zero on extend, so no need
- * to do that now.
- */
- if (!ocfs2_sparse_alloc(osb) &&
- PAGE_SIZE < osb->s_clustersize)
- end = PAGE_SIZE;
-
- ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
+ ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
+ &num_pages);
if (ret) {
mlog_errno(ret);
need_free = 1;
@@ -6976,20 +6961,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* This should populate the 1st page for us and mark
* it up to date.
*/
- ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
+ ret = ocfs2_read_inline_data(inode, page, di_bh);
if (ret) {
mlog_errno(ret);
need_free = 1;
goto out_unlock;
}
- page_end = PAGE_SIZE;
- if (PAGE_SIZE > osb->s_clustersize)
- page_end = osb->s_clustersize;
-
- for (i = 0; i < num_pages; i++)
- ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
- pages[i], i > 0, &phys);
+ ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
+ &phys);
}
spin_lock(&oi->ip_lock);
@@ -7020,8 +7000,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_unlock:
- if (pages)
- ocfs2_unlock_and_free_pages(pages, num_pages);
+ if (page)
+ ocfs2_unlock_and_free_pages(&page, num_pages);
out_commit:
if (ret < 0 && did_quota)
@@ -7045,8 +7025,6 @@ out_commit:
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
-free_pages:
- kfree(pages);
return ret;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 7de0c9562b70..ca2b575a1e46 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2309,7 +2309,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
struct ocfs2_alloc_context *meta_ac = NULL;
handle_t *handle = NULL;
loff_t end = offset + bytes;
- int ret = 0, credits = 0, locked = 0;
+ int ret = 0, credits = 0;
ocfs2_init_dealloc_ctxt(&dealloc);
@@ -2320,13 +2320,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
!dwc->dw_orphaned)
goto out;
- /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we
- * are in that context. */
- if (dwc->dw_writer_pid != task_pid_nr(current)) {
- inode_lock(inode);
- locked = 1;
- }
-
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret < 0) {
mlog_errno(ret);
@@ -2401,8 +2394,6 @@ out:
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
ocfs2_run_deallocs(osb, &dealloc);
- if (locked)
- inode_unlock(inode);
ocfs2_dio_free_write_ctx(inode, dwc);
return ret;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index d0206042d068..241dd3bb30e4 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2154,7 +2154,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
o2hb_nego_timeout_handler,
reg, NULL, &reg->hr_handler_list);
if (ret)
- goto free;
+ goto remove_item;
ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key,
sizeof(struct o2hb_nego_msg),
@@ -2173,6 +2173,12 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
unregister_handler:
o2net_unregister_handler_list(&reg->hr_handler_list);
+remove_item:
+ spin_lock(&o2hb_live_lock);
+ list_del(&reg->hr_all_item);
+ if (o2hb_global_heartbeat_active())
+ clear_bit(reg->hr_region_num, o2hb_region_bitmap);
+ spin_unlock(&o2hb_live_lock);
free:
kfree(reg);
return ERR_PTR(ret);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e961015fb484..3aff7dd3fd1a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3705,7 +3705,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
oi = OCFS2_I(inode);
oi->ip_dir_lock_gen++;
mlog(0, "generation: %u\n", oi->ip_dir_lock_gen);
- goto out;
+ goto out_forget;
}
if (!S_ISREG(inode->i_mode))
@@ -3736,6 +3736,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
filemap_fdatawait(mapping);
}
+out_forget:
forget_all_cached_acls(inode);
out:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index dc455d45a66a..e671fbdd4b86 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -490,10 +490,11 @@ int ocfs2_truncate_file(struct inode *inode,
* greater than page size, so we have to truncate them
* anyway.
*/
- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
- truncate_inode_pages(inode->i_mapping, new_i_size);
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ unmap_mapping_range(inode->i_mapping,
+ new_i_size + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(inode->i_mapping, new_i_size);
status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
i_size_read(inode), 1);
if (status)
@@ -512,6 +513,9 @@ int ocfs2_truncate_file(struct inode *inode,
goto bail_unlock_sem;
}
+ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(inode->i_mapping, new_i_size);
+
status = ocfs2_commit_truncate(osb, inode, di_bh);
if (status < 0) {
mlog_errno(status);
@@ -1250,22 +1254,24 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
goto bail_unlock;
}
}
+ down_write(&OCFS2_I(inode)->ip_alloc_sem);
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
2 * ocfs2_quota_trans_credits(sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
- goto bail_unlock;
+ goto bail_unlock_alloc;
}
status = __dquot_transfer(inode, transfer_to);
if (status < 0)
goto bail_commit;
} else {
+ down_write(&OCFS2_I(inode)->ip_alloc_sem);
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
- goto bail_unlock;
+ goto bail_unlock_alloc;
}
}
@@ -1278,6 +1284,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
bail_commit:
ocfs2_commit_trans(osb, handle);
+bail_unlock_alloc:
+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
bail_unlock:
if (status && inode_locked) {
ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
@@ -1531,6 +1539,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
}
}
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ * is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+ u64 start, u64 len)
+{
+ int ret;
+ u64 start_block, end_block, nr_blocks;
+ u64 p_block, offset;
+ u32 cluster, p_cluster, nr_clusters;
+ struct super_block *sb = inode->i_sb;
+ u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+ if (start + len < end)
+ end = start + len;
+
+ start_block = ocfs2_blocks_for_bytes(sb, start);
+ end_block = ocfs2_blocks_for_bytes(sb, end);
+ nr_blocks = end_block - start_block;
+ if (!nr_blocks)
+ return 0;
+
+ cluster = ocfs2_bytes_to_clusters(sb, start);
+ ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+ &nr_clusters, NULL);
+ if (ret)
+ return ret;
+ if (!p_cluster)
+ return 0;
+
+ offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+ p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+ return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
@@ -1540,6 +1587,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
+ loff_t isize = i_size_read(inode);
/*
* The "start" and "end" values are NOT necessarily part of
@@ -1560,6 +1608,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
goto out;
+ /* No page cache for EOF blocks, issue zero out to disk. */
+ if (end > isize) {
+ /*
+ * zeroout eof blocks in last cluster starting from
+ * "isize" even "start" > "isize" because it is
+ * complicated to zeroout just at "start" as "start"
+ * may be not aligned with block size, buffer write
+ * would be required to do that, but out of eof buffer
+ * write is not supported.
+ */
+ ret = ocfs2_zeroout_partial_cluster(inode, isize,
+ end - isize);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ if (start >= isize)
+ goto out;
+ end = isize;
+ }
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -1867,7 +1935,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
{
int ret;
s64 llen;
- loff_t size;
+ loff_t size, orig_isize;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *di_bh = NULL;
handle_t *handle;
@@ -1959,6 +2027,15 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
default:
ret = -EINVAL;
}
+
+ orig_isize = i_size_read(inode);
+ /* zeroout eof blocks in the cluster. */
+ if (!ret && change_size && orig_isize < size) {
+ ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
+ size - orig_isize);
+ if (!ret)
+ i_size_write(inode, size);
+ }
up_write(&OCFS2_I(inode)->ip_alloc_sem);
if (ret) {
mlog_errno(ret);
@@ -1975,9 +2052,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
goto out_inode_unlock;
}
- if (change_size && i_size_read(inode) < size)
- i_size_write(inode, size);
-
inode->i_ctime = inode->i_mtime = current_time(inode);
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 2cabbcf2f28e..5571268b681c 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -431,11 +431,7 @@ static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
ret = snprintf(buf + total, remain, "%lu\t\t%u\t%s\n",
p->fe_ino, p->fe_done,
ocfs2_filecheck_error(p->fe_status));
- if (ret < 0) {
- total = ret;
- break;
- }
- if (ret == remain) {
+ if (ret >= remain) {
/* snprintf() didn't fit */
total = -E2BIG;
break;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 9a50f222ac97..b15537a76516 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -336,8 +336,8 @@ struct ocfs2_super
spinlock_t osb_lock;
u32 s_next_generation;
unsigned long osb_flags;
- s16 s_inode_steal_slot;
- s16 s_meta_steal_slot;
+ u16 s_inode_steal_slot;
+ u16 s_meta_steal_slot;
atomic_t s_num_inodes_stolen;
atomic_t s_num_meta_stolen;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5bb4a89f9045..0773c774e2bf 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -303,7 +303,7 @@
#define OCFS2_MAX_SLOTS 255
/* Slot map indicator for an empty slot */
-#define OCFS2_INVALID_SLOT -1
+#define OCFS2_INVALID_SLOT ((u16)-1)
#define OCFS2_VOL_UUID_LEN 16
#define OCFS2_MAX_VOL_LABEL_LEN 64
@@ -339,8 +339,8 @@ struct ocfs2_system_inode_info {
enum {
BAD_BLOCK_SYSTEM_INODE = 0,
GLOBAL_INODE_ALLOC_SYSTEM_INODE,
+#define OCFS2_FIRST_ONLINE_SYSTEM_INODE GLOBAL_INODE_ALLOC_SYSTEM_INODE
SLOT_MAP_SYSTEM_INODE,
-#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
USER_QUOTA_SYSTEM_INODE,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index c4b029c43464..e7eb08ac4215 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -510,11 +510,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
ret = snprintf(buf, remain, "%s\n",
p->sp_name);
- if (ret < 0) {
- total = ret;
- break;
- }
- if (ret == remain) {
+ if (ret >= remain) {
/* snprintf() didn't fit */
total = -E2BIG;
break;
@@ -541,7 +537,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
if (active_stack) {
ret = snprintf(buf, PAGE_SIZE, "%s\n",
active_stack->sp_name);
- if (ret == PAGE_SIZE)
+ if (ret >= PAGE_SIZE)
ret = -E2BIG;
}
spin_unlock(&ocfs2_stack_lock);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 71f22c8fbffd..8d03630dfd59 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -895,9 +895,9 @@ static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type)
{
spin_lock(&osb->osb_lock);
if (type == INODE_ALLOC_SYSTEM_INODE)
- osb->s_inode_steal_slot = slot;
+ osb->s_inode_steal_slot = (u16)slot;
else if (type == EXTENT_ALLOC_SYSTEM_INODE)
- osb->s_meta_steal_slot = slot;
+ osb->s_meta_steal_slot = (u16)slot;
spin_unlock(&osb->osb_lock);
}
@@ -2891,9 +2891,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
- inode_alloc_inode =
- ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
- suballoc_slot);
+ if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
+ else
+ inode_alloc_inode = ocfs2_get_system_file_inode(osb,
+ INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
if (!inode_alloc_inode) {
/* the error code could be inaccurate, but we are not able to
* get the correct one. */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 24ab735d91dd..4d76321cf722 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -92,7 +92,7 @@ struct mount_options
unsigned long commit_interval;
unsigned long mount_opt;
unsigned int atime_quantum;
- signed short slot;
+ unsigned short slot;
int localalloc_opt;
unsigned int resv_level;
int dir_resv_level;
@@ -1369,7 +1369,7 @@ static int ocfs2_parse_options(struct super_block *sb,
goto bail;
}
if (option)
- mopt->slot = (s16)option;
+ mopt->slot = (u16)option;
break;
case Opt_commit:
if (match_int(&args[0], &option)) {
@@ -1733,6 +1733,7 @@ static void ocfs2_inode_init_once(void *data)
oi->ip_blkno = 0ULL;
oi->ip_clusters = 0;
+ oi->ip_next_orphan = NULL;
ocfs2_resv_init_once(&oi->ip_la_data_resv);
@@ -2187,11 +2188,17 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
if (ocfs2_clusterinfo_valid(osb)) {
+ /*
+ * ci_stack and ci_cluster in ocfs2_cluster_info may not be null
+ * terminated, so make sure no overflow happens here by using
+ * memcpy. Destination strings will always be null terminated
+ * because osb is allocated using kzalloc.
+ */
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
- strlcpy(osb->osb_cluster_stack,
+ memcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
- OCFS2_STACK_LABEL_LEN + 1);
+ OCFS2_STACK_LABEL_LEN);
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
mlog(ML_ERROR,
"couldn't mount because of an invalid "
@@ -2200,9 +2207,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = -EINVAL;
goto bail;
}
- strlcpy(osb->osb_cluster_name,
+ memcpy(osb->osb_cluster_name,
OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
- OCFS2_CLUSTER_NAME_LEN + 1);
+ OCFS2_CLUSTER_NAME_LEN);
} else {
/* The empty string is identical with classic tools that
* don't know about s_cluster_info. */
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
index ae782df5c063..f764f4ba2411 100644
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__);
new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
- if (!new_op)
+ if (!new_op) {
+ ret = -ENOMEM;
goto out_put_parent;
+ }
new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
new_op->upcall.req.lookup.parent_refn = parent->refn;
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 1997ce49ab46..e5f7df28793d 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -197,7 +197,7 @@ static int orangefs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total;
buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail;
- buf->f_frsize = sb->s_blocksize;
+ buf->f_frsize = 0;
out_op_release:
op_release(new_op);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 321eae740148..f3ed80e2966c 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -59,7 +59,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
- int uninitialized_var(error);
+ int error = 0;
size_t slen;
if (!(old->d_inode->i_opflags & IOP_XATTR) ||
@@ -95,6 +95,14 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
if (ovl_is_private_xattr(name))
continue;
+
+ error = security_inode_copy_up_xattr(name);
+ if (error < 0 && error != -EOPNOTSUPP)
+ break;
+ if (error == 1) {
+ error = 0;
+ continue; /* Discard */
+ }
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
@@ -118,13 +126,6 @@ retry:
goto retry;
}
- error = security_inode_copy_up_xattr(name);
- if (error < 0 && error != -EOPNOTSUPP)
- break;
- if (error == 1) {
- error = 0;
- continue; /* Discard */
- }
error = vfs_setxattr(new, name, value, size, 0);
if (error)
break;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 8c561703275a..4869fa508616 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -850,8 +850,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
buflen -= thislen;
memcpy(&buf[buflen], name, thislen);
- tmp = dget_dlock(d->d_parent);
spin_unlock(&d->d_lock);
+ tmp = dget_parent(d);
dput(d);
d = tmp;
@@ -1032,9 +1032,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
goto out_dput;
}
} else {
- if (!d_is_negative(newdentry) &&
- (!new_opaque || !ovl_is_whiteout(newdentry)))
- goto out_dput;
+ if (!d_is_negative(newdentry)) {
+ if (!new_opaque || !ovl_is_whiteout(newdentry))
+ goto out_dput;
+ } else {
+ if (flags & RENAME_EXCHANGE)
+ goto out_dput;
+ }
}
if (olddentry == trap)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 30a1c7fc8c75..ac6efac119fb 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -216,7 +216,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
goto out;
if (!value && !upperdentry) {
+ old_cred = ovl_override_creds(dentry->d_sb);
err = vfs_getxattr(realdentry, name, NULL, 0);
+ revert_creds(old_cred);
if (err < 0)
goto out_drop_write;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index fa3c2c25cec5..0523dd148ed3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -30,6 +30,21 @@
#include "internal.h"
/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
+/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
*/
@@ -654,8 +669,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) {
- user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
- pipe_bufs = 1;
+ user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
+ pipe_bufs = PIPE_MIN_DEF_BUFFERS;
}
if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user())
diff --git a/fs/proc/base.c b/fs/proc/base.c
index eeaad0f45b6e..041678cd118d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -841,7 +841,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
while (count > 0) {
- int this_len = min_t(int, count, PAGE_SIZE);
+ size_t this_len = min_t(size_t, count, PAGE_SIZE);
if (write && copy_from_user(page, buf, this_len)) {
copied = -EFAULT;
@@ -1041,7 +1041,6 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
{
- static DEFINE_MUTEX(oom_adj_mutex);
struct mm_struct *mm = NULL;
struct task_struct *task;
int err = 0;
@@ -1081,7 +1080,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
struct task_struct *p = find_lock_task_mm(task);
if (p) {
- if (atomic_read(&p->mm->mm_users) > 1) {
+ if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
mm = p->mm;
mmgrab(mm);
}
@@ -2529,6 +2528,13 @@ out:
}
#ifdef CONFIG_SECURITY
+static int proc_pid_attr_open(struct inode *inode, struct file *file)
+{
+ file->private_data = NULL;
+ __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+ return 0;
+}
+
static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
@@ -2558,6 +2564,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
ssize_t length;
struct task_struct *task = get_proc_task(inode);
+ /* A task may only write when it was the opener. */
+ if (file->private_data != current->mm)
+ return -EPERM;
+
length = -ESRCH;
if (!task)
goto out_no_task;
@@ -2598,9 +2608,11 @@ out_no_task:
}
static const struct file_operations proc_pid_attr_operations = {
+ .open = proc_pid_attr_open,
.read = proc_pid_attr_read,
.write = proc_pid_attr_write,
.llseek = generic_file_llseek,
+ .release = mem_release,
};
static const struct pid_entry attr_dir_stuff[] = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 225f541f7078..d8e1249adb18 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -432,7 +432,7 @@ const struct inode_operations proc_link_inode_operations = {
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
- struct inode *inode = new_inode_pseudo(sb);
+ struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = de->low_ino;
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 31326bb23b8b..eba167e1700e 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -15,6 +15,13 @@ static const char *proc_self_get_link(struct dentry *dentry,
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
+ /*
+ * Not currently supported. Once we can inherit all of struct pid,
+ * we can allow this.
+ */
+ if (current->flags & PF_KTHREAD)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (!tgid)
return ERR_PTR(-ENOENT);
/* 11 for max length of signed int in decimal + NULL term */
@@ -41,7 +48,7 @@ int proc_setup_self(struct super_block *s)
inode_lock(root_inode);
self = d_alloc_name(s->s_root, "self");
if (self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index b813e3b529f2..c6cd35e5ef5d 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -42,7 +42,7 @@ int proc_setup_thread_self(struct super_block *s)
inode_lock(root_inode);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
- struct inode *inode = new_inode_pseudo(s);
+ struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = thread_self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index aaa7486b6f0d..0bfd71bec7f2 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -105,14 +105,19 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
nr_bytes = count;
/* If pfn is not ram, return zeros for sparse dump files */
- if (pfn_is_ram(pfn) == 0)
- memset(buf, 0, nr_bytes);
- else {
+ if (pfn_is_ram(pfn) == 0) {
+ tmp = 0;
+ if (!userbuf)
+ memset(buf, 0, nr_bytes);
+ else if (clear_user(buf, nr_bytes))
+ tmp = -EFAULT;
+ } else {
tmp = copy_oldmem_page(pfn, buf, nr_bytes,
offset, userbuf);
- if (tmp < 0)
- return tmp;
}
+ if (tmp < 0)
+ return tmp;
+
*ppos += nr_bytes;
count -= nr_bytes;
buf += nr_bytes;
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index a6ee23aadd28..66645a5a35f3 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -15,13 +15,48 @@
#include <linux/buffer_head.h>
#include "qnx4.h"
+/*
+ * A qnx4 directory entry is an inode entry or link info
+ * depending on the status field in the last byte. The
+ * first byte is where the name start either way, and a
+ * zero means it's empty.
+ *
+ * Also, due to a bug in gcc, we don't want to use the
+ * real (differently sized) name arrays in the inode and
+ * link entries, but always the 'de_name[]' one in the
+ * fake struct entry.
+ *
+ * See
+ *
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99578#c6
+ *
+ * for details, but basically gcc will take the size of the
+ * 'name' array from one of the used union entries randomly.
+ *
+ * This use of 'de_name[]' (48 bytes) avoids the false positive
+ * warnings that would happen if gcc decides to use 'inode.di_name'
+ * (16 bytes) even when the pointer and size were to come from
+ * 'link.dl_name' (48 bytes).
+ *
+ * In all cases the actual name pointer itself is the same, it's
+ * only the gcc internal 'what is the size of this field' logic
+ * that can get confused.
+ */
+union qnx4_directory_entry {
+ struct {
+ const char de_name[48];
+ u8 de_pad[15];
+ u8 de_status;
+ };
+ struct qnx4_inode_entry inode;
+ struct qnx4_link_info link;
+};
+
static int qnx4_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
unsigned int offset;
struct buffer_head *bh;
- struct qnx4_inode_entry *de;
- struct qnx4_link_info *le;
unsigned long blknum;
int ix, ino;
int size;
@@ -38,27 +73,27 @@ static int qnx4_readdir(struct file *file, struct dir_context *ctx)
}
ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK;
for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) {
+ union qnx4_directory_entry *de;
+
offset = ix * QNX4_DIR_ENTRY_SIZE;
- de = (struct qnx4_inode_entry *) (bh->b_data + offset);
- if (!de->di_fname[0])
+ de = (union qnx4_directory_entry *) (bh->b_data + offset);
+
+ if (!de->de_name[0])
continue;
- if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK)))
+ if (!(de->de_status & (QNX4_FILE_USED|QNX4_FILE_LINK)))
continue;
- if (!(de->di_status & QNX4_FILE_LINK))
- size = QNX4_SHORT_NAME_MAX;
- else
- size = QNX4_NAME_MAX;
- size = strnlen(de->di_fname, size);
- QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname));
- if (!(de->di_status & QNX4_FILE_LINK))
+ if (!(de->de_status & QNX4_FILE_LINK)) {
+ size = sizeof(de->inode.di_fname);
ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
- else {
- le = (struct qnx4_link_info*)de;
- ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
+ } else {
+ size = sizeof(de->link.dl_fname);
+ ino = ( le32_to_cpu(de->link.dl_inode_blk) - 1 ) *
QNX4_INODES_PER_BLOCK +
- le->dl_inode_ndx;
+ de->link.dl_inode_ndx;
}
- if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) {
+ size = strnlen(de->de_name, size);
+ QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, name));
+ if (!dir_emit(ctx, de->de_name, size, ino, DT_UNKNOWN)) {
brelse(bh);
return 0;
}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index bb3f59bcfcf5..833cd3e3758b 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -61,7 +61,7 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
memset(buf, 0, info->dqi_usable_bs);
return sb->s_op->quota_read(sb, info->dqi_type, buf,
- info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
}
static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
@@ -70,7 +70,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
ssize_t ret;
ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
- info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+ info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
if (ret != info->dqi_usable_bs) {
quota_error(sb, "dquota write failed");
if (ret >= 0)
@@ -283,7 +283,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
blk);
goto out_buf;
}
- dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+ dquot->dq_off = ((loff_t)blk << info->dqi_blocksize_bits) +
sizeof(struct qt_disk_dqdbheader) +
i * info->dqi_entry_size;
kfree(buf);
@@ -422,6 +422,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
quota_error(dquot->dq_sb, "Quota structure has offset to "
"other block (%u) than it should (%u)", blk,
(uint)(dquot->dq_off >> info->dqi_blocksize_bits));
+ ret = -EIO;
goto out_buf;
}
ret = read_blk(info, blk, buf);
@@ -487,6 +488,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
goto out_buf;
}
newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) {
+ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
+ newblk, info->dqi_blocks);
+ ret = -EUCLEAN;
+ goto out_buf;
+ }
+
if (depth == info->dqi_qtree_depth - 1) {
ret = free_dqentry(info, dquot, newblk);
newblk = 0;
@@ -558,7 +566,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
ret = -EIO;
goto out_buf;
} else {
- ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+ ret = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct
qt_disk_dqdbheader) + i * info->dqi_entry_size;
}
out_buf:
@@ -586,6 +594,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
if (!blk) /* No reference? */
goto out_buf;
+ if (blk < QT_TREEOFF || blk >= info->dqi_blocks) {
+ quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)",
+ blk, info->dqi_blocks);
+ ret = -EUCLEAN;
+ goto out_buf;
+ }
+
if (depth < info->dqi_qtree_depth - 1)
ret = find_tree_dqentry(info, dquot, blk, depth+1);
else
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index a73e5b34db41..addfaae8decf 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -158,7 +158,31 @@ static int v2_read_file_info(struct super_block *sb, int type)
qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
qinfo->dqi_ops = &v2r1_qtree_ops;
}
+ ret = -EUCLEAN;
+ /* Some sanity checks of the read headers... */
+ if ((loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits >
+ i_size_read(sb_dqopt(sb)->files[type])) {
+ quota_error(sb, "Number of blocks too big for quota file size (%llu > %llu).",
+ (loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits,
+ i_size_read(sb_dqopt(sb)->files[type]));
+ goto out_free;
+ }
+ if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) {
+ quota_error(sb, "Free block number too big (%u >= %u).",
+ qinfo->dqi_free_blk, qinfo->dqi_blocks);
+ goto out_free;
+ }
+ if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) {
+ quota_error(sb, "Block with free entry too big (%u >= %u).",
+ qinfo->dqi_free_entry, qinfo->dqi_blocks);
+ goto out_free;
+ }
ret = 0;
+out_free:
+ if (ret) {
+ kfree(info->dqi_priv);
+ info->dqi_priv = NULL;
+ }
out:
up_read(&dqopt->dqio_sem);
return ret;
@@ -283,6 +307,7 @@ static void v2r1_mem2diskdqb(void *dp, struct dquot *dquot)
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id));
+ d->dqb_pad = 0;
if (qtree_entry_unused(info, dp))
d->dqb_itime = cpu_to_le64(1);
}
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 3ac1f2387083..5e1ebbe639eb 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -228,7 +228,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
if (!pages)
goto out_free;
- nr = find_get_pages(inode->i_mapping, &pgoff, lpages, pages);
+ nr = find_get_pages_contig(inode->i_mapping, pgoff, lpages, pages);
if (nr != lpages)
goto out_free_pages; /* leave if some pages were missing */
diff --git a/fs/readdir.c b/fs/readdir.c
index 0c357663e33a..e6f4c7b8884b 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -133,6 +133,9 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
if (buf->result)
return -EINVAL;
+ buf->result = verify_dirent_name(name, namlen);
+ if (buf->result < 0)
+ return buf->result;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->result = -EOVERFLOW;
@@ -392,6 +395,9 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name,
if (buf->result)
return -EINVAL;
+ buf->result = verify_dirent_name(name, namlen);
+ if (buf->result < 0)
+ return buf->result;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->result = -EOVERFLOW;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 683496322aa8..06c4d376b0e3 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1552,11 +1552,7 @@ void reiserfs_read_locked_inode(struct inode *inode,
* set version 1, version 2 could be used too, because stat data
* key is the same in both versions
*/
- key.version = KEY_FORMAT_3_5;
- key.on_disk_key.k_dir_id = dirino;
- key.on_disk_key.k_objectid = inode->i_ino;
- key.on_disk_key.k_offset = 0;
- key.on_disk_key.k_type = 0;
+ _make_cpu_key(&key, KEY_FORMAT_3_5, dirino, inode->i_ino, 0, 0, 3);
/* look for the object's stat data */
retval = search_item(inode->i_sb, &key, &path_to_sd);
@@ -2164,7 +2160,8 @@ out_end_trans:
out_inserted_sd:
clear_nlink(inode);
th->t_trans_id = 0; /* so the caller can't use this handle later */
- unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
+ if (inode->i_state & I_NEW)
+ unlock_new_inode(inode);
iput(inode);
return err;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2be907231375..1a6e6343fed3 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2769,6 +2769,20 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
goto free_and_return;
}
+ /*
+ * Sanity check to see if journal first block is correct.
+ * If journal first block is invalid it can cause
+ * zeroing important superblock members.
+ */
+ if (!SB_ONDISK_JOURNAL_DEVICE(sb) &&
+ SB_ONDISK_JOURNAL_1st_BLOCK(sb) < SB_JOURNAL_1st_RESERVED_BLOCK(sb)) {
+ reiserfs_warning(sb, "journal-1393",
+ "journal 1st super block is invalid: 1st reserved block %d, but actual 1st block is %d",
+ SB_JOURNAL_1st_RESERVED_BLOCK(sb),
+ SB_ONDISK_JOURNAL_1st_BLOCK(sb));
+ goto free_and_return;
+ }
+
if (journal_init_dev(sb, journal, j_dev_name) != 0) {
reiserfs_warning(sb, "sh-462",
"unable to initialize journal device");
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 2946713cb00d..4ebad6781b0e 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -387,6 +387,24 @@ void pathrelse(struct treepath *search_path)
search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
}
+static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
+{
+ struct reiserfs_de_head *deh;
+ int i;
+
+ deh = B_I_DEH(bh, ih);
+ for (i = 0; i < ih_entry_count(ih); i++) {
+ if (deh_location(&deh[i]) > ih_item_len(ih)) {
+ reiserfs_warning(NULL, "reiserfs-5094",
+ "directory entry location seems wrong %h",
+ &deh[i]);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
{
struct block_head *blkh;
@@ -454,6 +472,15 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
"(second one): %h", ih);
return 0;
}
+ if (is_direntry_le_ih(ih)) {
+ if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
+ reiserfs_warning(NULL, "reiserfs-5093",
+ "item entry count seems wrong %h",
+ ih);
+ return 0;
+ }
+ return has_valid_deh_location(bh, ih);
+ }
prev_location = ih_location(ih);
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9caf3948417c..7dfdc503e601 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1264,6 +1264,10 @@ static int reiserfs_parse_options(struct super_block *s,
"turned on.");
return 0;
}
+ if (qf_names[qtype] !=
+ REISERFS_SB(s)->s_qf_names[qtype])
+ kfree(qf_names[qtype]);
+ qf_names[qtype] = NULL;
if (*arg) { /* Some filename specified? */
if (REISERFS_SB(s)->s_qf_names[qtype]
&& strcmp(REISERFS_SB(s)->s_qf_names[qtype],
@@ -1293,10 +1297,6 @@ static int reiserfs_parse_options(struct super_block *s,
else
*mount_options |= 1 << REISERFS_GRPQUOTA;
} else {
- if (qf_names[qtype] !=
- REISERFS_SB(s)->s_qf_names[qtype])
- kfree(qf_names[qtype]);
- qf_names[qtype] = NULL;
if (qtype == USRQUOTA)
*mount_options &= ~(1 << REISERFS_USRQUOTA);
else
@@ -2085,6 +2085,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
unlock_new_inode(root_inode);
}
+ if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) ||
+ !root_inode->i_size) {
+ SWARN(silent, s, "", "corrupt root inode, run fsck");
+ iput(root_inode);
+ errval = -EUCLEAN;
+ goto error;
+ }
+
s->s_root = d_make_root(root_inode);
if (!s->s_root)
goto error;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 28f6daf371d3..57a7b9a164a2 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -665,6 +665,13 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
if (get_inode_sd_version(inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
+ /*
+ * priv_root needn't be initialized during mount so allow initial
+ * lookups to succeed.
+ */
+ if (!REISERFS_SB(inode->i_sb)->priv_root)
+ return 0;
+
dentry = xattr_lookup(inode, name, XATTR_REPLACE);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index c764352447ba..81bec2c80b25 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -43,7 +43,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec);
static inline int reiserfs_xattrs_initialized(struct super_block *sb)
{
- return REISERFS_SB(sb)->priv_root != NULL;
+ return REISERFS_SB(sb)->priv_root && REISERFS_SB(sb)->xattr_root;
}
#define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index f86f51f99ace..1dcadd22b440 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -221,10 +221,8 @@ int romfs_dev_read(struct super_block *sb, unsigned long pos,
size_t limit;
limit = romfs_maxsize(sb);
- if (pos >= limit)
+ if (pos >= limit || buflen > limit - pos)
return -EIO;
- if (buflen > limit - pos)
- buflen = limit - pos;
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd)
diff --git a/fs/select.c b/fs/select.c
index 063067e606ca..b752dabb3131 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1006,10 +1006,9 @@ static long do_restart_poll(struct restart_block *restart_block)
ret = do_sys_poll(ufds, nfds, to);
- if (ret == -EINTR) {
- restart_block->fn = do_restart_poll;
- ret = -ERESTART_RESTARTBLOCK;
- }
+ if (ret == -EINTR)
+ ret = set_restart_fn(restart_block, do_restart_poll);
+
return ret;
}
@@ -1031,7 +1030,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
struct restart_block *restart_block;
restart_block = &current->restart_block;
- restart_block->fn = do_restart_poll;
restart_block->poll.ufds = ufds;
restart_block->poll.nfds = nfds;
@@ -1042,7 +1040,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
} else
restart_block->poll.has_timeout = 0;
- ret = -ERESTART_RESTARTBLOCK;
+ ret = set_restart_fn(restart_block, do_restart_poll);
}
return ret;
}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eea09f6d8830..6cb1144421bb 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -26,6 +26,9 @@ static void seq_set_overflow(struct seq_file *m)
static void *seq_buf_alloc(unsigned long size)
{
+ if (unlikely(size > MAX_RW_COUNT))
+ return NULL;
+
return kvmalloc(size, GFP_KERNEL);
}
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 1c667af86da5..0b7c6c2c95b8 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -35,17 +35,7 @@
void signalfd_cleanup(struct sighand_struct *sighand)
{
- wait_queue_head_t *wqh = &sighand->signalfd_wqh;
- /*
- * The lockless check can race with remove_wait_queue() in progress,
- * but in this case its caller should run under rcu_read_lock() and
- * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return.
- */
- if (likely(!waitqueue_active(wqh)))
- return;
-
- /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */
- wake_up_poll(wqh, POLLHUP | POLLFREE);
+ wake_up_pollfree(&sighand->signalfd_wqh);
}
struct signalfd_ctx {
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 8073b6532cf0..1d406a2094a5 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -54,12 +54,17 @@ static long long squashfs_inode_lookup(struct super_block *sb, int ino_num)
struct squashfs_sb_info *msblk = sb->s_fs_info;
int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1);
int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1);
- u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+ u64 start;
__le64 ino;
int err;
TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num);
+ if (ino_num == 0 || (ino_num - 1) >= msblk->inodes)
+ return -EINVAL;
+
+ start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+
err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino));
if (err < 0)
return err;
@@ -124,7 +129,10 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
u64 lookup_table_start, u64 next_table, unsigned int inodes)
{
unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes);
+ unsigned int indexes = SQUASHFS_LOOKUP_BLOCKS(inodes);
+ int n;
__le64 *table;
+ u64 start, end;
TRACE("In read_inode_lookup_table, length %d\n", length);
@@ -134,20 +142,41 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
if (inodes == 0)
return ERR_PTR(-EINVAL);
- /* length bytes should not extend into the next table - this check
- * also traps instances where lookup_table_start is incorrectly larger
- * than the next table start
+ /*
+ * The computed size of the lookup table (length bytes) should exactly
+ * match the table start and end points
*/
- if (lookup_table_start + length > next_table)
+ if (length != (next_table - lookup_table_start))
return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, lookup_table_start, length);
+ if (IS_ERR(table))
+ return table;
/*
- * table[0] points to the first inode lookup table metadata block,
- * this should be less than lookup_table_start
+ * table0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed inode lookup blocks. Each entry should be
+ * less than the next (i.e. table[0] < table[1]), and the difference
+ * between them should be SQUASHFS_METADATA_SIZE or less.
+ * table[indexes - 1] should be less than lookup_table_start, and
+ * again the difference should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end
+ || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= lookup_table_start ||
+ (lookup_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index f1c1430ae721..0bcb83479fcb 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -224,11 +224,11 @@ failure:
* If the skip factor is limited in this way then the file will use multiple
* slots.
*/
-static inline int calculate_skip(int blocks)
+static inline int calculate_skip(u64 blocks)
{
- int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+ u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
* SQUASHFS_META_INDEXES);
- return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+ return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index d38ea3dab951..d2e15baab537 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -48,10 +48,15 @@ int squashfs_get_id(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_ID_BLOCK(index);
int offset = SQUASHFS_ID_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->id_table[block]);
+ u64 start_block;
__le32 disk_id;
int err;
+ if (index >= msblk->ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->id_table[block]);
+
err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset,
sizeof(disk_id));
if (err < 0)
@@ -69,7 +74,10 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
u64 id_table_start, u64 next_table, unsigned short no_ids)
{
unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids);
+ unsigned int indexes = SQUASHFS_ID_BLOCKS(no_ids);
+ int n;
__le64 *table;
+ u64 start, end;
TRACE("In read_id_index_table, length %d\n", length);
@@ -80,20 +88,38 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
return ERR_PTR(-EINVAL);
/*
- * length bytes should not extend into the next table - this check
- * also traps instances where id_table_start is incorrectly larger
- * than the next table start
+ * The computed size of the index table (length bytes) should exactly
+ * match the table start and end points
*/
- if (id_table_start + length > next_table)
+ if (length != (next_table - id_table_start))
return ERR_PTR(-EINVAL);
table = squashfs_read_table(sb, id_table_start, length);
+ if (IS_ERR(table))
+ return table;
/*
- * table[0] points to the first id lookup table metadata block, this
- * should be less than id_table_start
+ * table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than id_table_start, and again the difference
+ * should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= id_table_start || (id_table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 4e6853f084d0..10e93345b615 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -30,6 +30,7 @@
/* size of metadata (inode and directory) blocks */
#define SQUASHFS_METADATA_SIZE 8192
+#define SQUASHFS_BLOCK_OFFSET 2
/* default size of block device I/O */
#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index ef69c31947bf..5234c19a0eab 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -77,5 +77,6 @@ struct squashfs_sb_info {
unsigned int inodes;
unsigned int fragments;
int xattr_ids;
+ unsigned int ids;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 1516bb779b8d..5abc9d03397c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -176,6 +176,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
msblk->inodes = le32_to_cpu(sblk->inodes);
msblk->fragments = le32_to_cpu(sblk->fragments);
+ msblk->ids = le16_to_cpu(sblk->no_ids);
flags = le16_to_cpu(sblk->flags);
TRACE("Found valid superblock on %pg\n", sb->s_bdev);
@@ -187,7 +188,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
TRACE("Block size %d\n", msblk->block_size);
TRACE("Number of inodes %d\n", msblk->inodes);
TRACE("Number of fragments %d\n", msblk->fragments);
- TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
+ TRACE("Number of ids %d\n", msblk->ids);
TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
TRACE("sblk->fragment_table_start %llx\n",
@@ -244,8 +245,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
allocate_id_index_table:
/* Allocate and read id index table */
msblk->id_table = squashfs_read_id_index_table(sb,
- le64_to_cpu(sblk->id_table_start), next_table,
- le16_to_cpu(sblk->no_ids));
+ le64_to_cpu(sblk->id_table_start), next_table, msblk->ids);
if (IS_ERR(msblk->id_table)) {
ERROR("unable to read id index table\n");
err = PTR_ERR(msblk->id_table);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index afe70f815e3d..86b0a0073e51 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -30,8 +30,16 @@ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
u64 start, u64 *xattr_table_start, int *xattr_ids)
{
+ struct squashfs_xattr_id_table *id_table;
+
+ id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ if (IS_ERR(id_table))
+ return (__le64 *) id_table;
+
+ *xattr_table_start = le64_to_cpu(id_table->xattr_table_start);
+ kfree(id_table);
+
ERROR("Xattrs in filesystem, these will be ignored\n");
- *xattr_table_start = start;
return ERR_PTR(-ENOTSUPP);
}
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index c89607d690c4..7f718d2bf357 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -44,10 +44,15 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_XATTR_BLOCK(index);
int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+ u64 start_block;
struct squashfs_xattr_id id;
int err;
+ if (index >= msblk->xattr_ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+
err = squashfs_read_metadata(sb, &id, &start_block, &offset,
sizeof(id));
if (err < 0)
@@ -63,13 +68,17 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
/*
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
-__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
+__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
u64 *xattr_table_start, int *xattr_ids)
{
- unsigned int len;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ unsigned int len, indexes;
struct squashfs_xattr_id_table *id_table;
+ __le64 *table;
+ u64 start, end;
+ int n;
- id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ id_table = squashfs_read_table(sb, table_start, sizeof(*id_table));
if (IS_ERR(id_table))
return (__le64 *) id_table;
@@ -83,13 +92,54 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
if (*xattr_ids == 0)
return ERR_PTR(-EINVAL);
- /* xattr_table should be less than start */
- if (*xattr_table_start >= start)
+ len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids);
+
+ /*
+ * The computed size of the index table (len bytes) should exactly
+ * match the table start and end points
+ */
+ start = table_start + sizeof(*id_table);
+ end = msblk->bytes_used;
+
+ if (len != (end - start))
return ERR_PTR(-EINVAL);
- len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ table = squashfs_read_table(sb, start, len);
+ if (IS_ERR(table))
+ return table;
+
+ /* table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed xattr id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than table_start, and again the difference
+ * shouls be SQUASHFS_METADATA_SIZE or less.
+ *
+ * Finally xattr_table_start should be less than table[0].
+ */
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= table_start || (table_start - start) >
+ (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
- TRACE("In read_xattr_index_table, length %d\n", len);
+ if (*xattr_table_start >= le64_to_cpu(table[0])) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
- return squashfs_read_table(sb, start + sizeof(*id_table), len);
+ return table;
}
diff --git a/fs/super.c b/fs/super.c
index 219f7ca7c5d2..1d7461bca160 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1336,36 +1336,11 @@ EXPORT_SYMBOL(__sb_end_write);
*/
int __sb_start_write(struct super_block *sb, int level, bool wait)
{
- bool force_trylock = false;
- int ret = 1;
+ if (!wait)
+ return percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
-#ifdef CONFIG_LOCKDEP
- /*
- * We want lockdep to tell us about possible deadlocks with freezing
- * but it's it bit tricky to properly instrument it. Getting a freeze
- * protection works as getting a read lock but there are subtle
- * problems. XFS for example gets freeze protection on internal level
- * twice in some cases, which is OK only because we already hold a
- * freeze protection also on higher level. Due to these cases we have
- * to use wait == F (trylock mode) which must not fail.
- */
- if (wait) {
- int i;
-
- for (i = 0; i < level - 1; i++)
- if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
- force_trylock = true;
- break;
- }
- }
-#endif
- if (wait && !force_trylock)
- percpu_down_read(sb->s_writers.rw_sem + level-1);
- else
- ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
-
- WARN_ON(force_trylock && !ret);
- return ret;
+ percpu_down_read(sb->s_writers.rw_sem + level-1);
+ return 1;
}
EXPORT_SYMBOL(__sb_start_write);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 666986b95c5d..300cdbdc8494 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -17,6 +17,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
+#include <linux/mm.h>
#include "sysfs.h"
#include "../kernfs/kernfs-internal.h"
@@ -549,3 +550,57 @@ void sysfs_remove_bin_file(struct kobject *kobj,
kernfs_remove_by_name(kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
+
+/**
+ * sysfs_emit - scnprintf equivalent, aware of PAGE_SIZE buffer.
+ * @buf: start of PAGE_SIZE buffer.
+ * @fmt: format
+ * @...: optional arguments to @format
+ *
+ *
+ * Returns number of characters written to @buf.
+ */
+int sysfs_emit(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ if (WARN(!buf || offset_in_page(buf),
+ "invalid sysfs_emit: buf:%p\n", buf))
+ return 0;
+
+ va_start(args, fmt);
+ len = vscnprintf(buf, PAGE_SIZE, fmt, args);
+ va_end(args);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(sysfs_emit);
+
+/**
+ * sysfs_emit_at - scnprintf equivalent, aware of PAGE_SIZE buffer.
+ * @buf: start of PAGE_SIZE buffer.
+ * @at: offset in @buf to start write in bytes
+ * @at must be >= 0 && < PAGE_SIZE
+ * @fmt: format
+ * @...: optional arguments to @fmt
+ *
+ *
+ * Returns number of characters written starting at &@buf[@at].
+ */
+int sysfs_emit_at(char *buf, int at, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE,
+ "invalid sysfs_emit_at: buf:%p at:%d\n", buf, at))
+ return 0;
+
+ va_start(args, fmt);
+ len = vscnprintf(buf + at, PAGE_SIZE - at, fmt, args);
+ va_end(args);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(sysfs_emit_at);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 20b8f82e115b..2bbe84d9c0a8 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -28,7 +28,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
{
struct dentry *root;
void *ns;
- bool new_sb;
+ bool new_sb = false;
if (!(flags & MS_KERNMOUNT)) {
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
@@ -38,9 +38,9 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
root = kernfs_mount_ns(fs_type, flags, sysfs_root,
SYSFS_MAGIC, &new_sb, ns);
- if (IS_ERR(root) || !new_sb)
+ if (!new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
- else if (new_sb)
+ else if (!IS_ERR(root))
root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return root;
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index bea8ad876bf9..16dc063edc4c 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -162,6 +162,77 @@ struct tracefs_fs_info {
struct tracefs_mount_opts mount_opts;
};
+static void change_gid(struct dentry *dentry, kgid_t gid)
+{
+ if (!dentry->d_inode)
+ return;
+ dentry->d_inode->i_gid = gid;
+}
+
+/*
+ * Taken from d_walk, but without he need for handling renames.
+ * Nothing can be renamed while walking the list, as tracefs
+ * does not support renames. This is only called when mounting
+ * or remounting the file system, to set all the files to
+ * the given gid.
+ */
+static void set_gid(struct dentry *parent, kgid_t gid)
+{
+ struct dentry *this_parent;
+ struct list_head *next;
+
+ this_parent = parent;
+ spin_lock(&this_parent->d_lock);
+
+ change_gid(this_parent, gid);
+repeat:
+ next = this_parent->d_subdirs.next;
+resume:
+ while (next != &this_parent->d_subdirs) {
+ struct list_head *tmp = next;
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+ next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
+ change_gid(dentry, gid);
+
+ if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+ this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+ goto repeat;
+ }
+ spin_unlock(&dentry->d_lock);
+ }
+ /*
+ * All done at this level ... ascend and resume the search.
+ */
+ rcu_read_lock();
+ascend:
+ if (this_parent != parent) {
+ struct dentry *child = this_parent;
+ this_parent = child->d_parent;
+
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /* go into the first sibling still alive */
+ do {
+ next = child->d_child.next;
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+ rcu_read_unlock();
+ goto resume;
+ }
+ rcu_read_unlock();
+ spin_unlock(&this_parent->d_lock);
+ return;
+}
+
static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
{
substring_t args[MAX_OPT_ARGS];
@@ -194,6 +265,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
if (!gid_valid(gid))
return -EINVAL;
opts->gid = gid;
+ set_gid(tracefs_mount->mnt_root, gid);
break;
case Opt_mode:
if (match_octal(&args[0], &option))
@@ -409,6 +481,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
inode->i_mode = mode;
inode->i_fop = fops ? fops : &tracefs_file_operations;
inode->i_private = data;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
return end_creating(dentry);
@@ -427,9 +501,12 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent,
if (unlikely(!inode))
return failed_creating(dentry);
- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ /* Do not set bits for OTH */
+ inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP;
inode->i_op = ops;
inode->i_fop = &simple_dir_operations;
+ inode->i_uid = d_inode(dentry->d_parent)->i_uid;
+ inode->i_gid = d_inode(dentry->d_parent)->i_gid;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 7cd8a7b95299..80cdd64aa7c3 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -1129,6 +1129,7 @@ int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
err = PTR_ERR(dent);
if (err == -ENOENT)
break;
+ kfree(pdent);
return err;
}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 358abc26dbc0..de0d63a347ac 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -747,7 +747,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
if (ubifs_crypt_is_encrypted(dir) &&
!fscrypt_has_permitted_context(dir, inode))
- return -EPERM;
+ return -EXDEV;
err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
if (err)
@@ -1357,7 +1357,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
if (old_dir != new_dir) {
if (ubifs_crypt_is_encrypted(new_dir) &&
!fscrypt_has_permitted_context(new_dir, old_inode))
- return -EPERM;
+ return -EXDEV;
}
if (unlink && is_dir) {
@@ -1408,7 +1408,10 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_release;
}
+ spin_lock(&whiteout->i_lock);
whiteout->i_state |= I_LINKABLE;
+ spin_unlock(&whiteout->i_lock);
+
whiteout_ui = ubifs_inode(whiteout);
whiteout_ui->data = dev;
whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0));
@@ -1501,7 +1504,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
inc_nlink(whiteout);
mark_inode_dirty(whiteout);
+
+ spin_lock(&whiteout->i_lock);
whiteout->i_state &= ~I_LINKABLE;
+ spin_unlock(&whiteout->i_lock);
+
iput(whiteout);
}
@@ -1579,7 +1586,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, fst_inode) ||
!fscrypt_has_permitted_context(old_dir, snd_inode)))
- return -EPERM;
+ return -EXDEV;
err = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &fst_nm);
if (err)
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3be28900bf37..5b4d4b1087c4 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -237,7 +237,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
int offs, int quiet, int must_chk_crc)
{
- int err = -EINVAL, type, node_len;
+ int err = -EINVAL, type, node_len, dump_node = 1;
uint32_t crc, node_crc, magic;
const struct ubifs_ch *ch = buf;
@@ -290,10 +290,22 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
out_len:
if (!quiet)
ubifs_err(c, "bad node length %d", node_len);
+ if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ)
+ dump_node = 0;
out:
if (!quiet) {
ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
- ubifs_dump_node(c, buf);
+ if (dump_node) {
+ ubifs_dump_node(c, buf);
+ } else {
+ int safe_len = min3(node_len, c->leb_size - offs,
+ (int)UBIFS_MAX_DATA_NODE_SZ);
+ pr_err("\tprevent out-of-bounds memory access\n");
+ pr_err("\ttruncated data node length %d\n", safe_len);
+ pr_err("\tcorrupted data node:\n");
+ print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
+ buf, safe_len, 0);
+ }
dump_stack();
}
return err;
@@ -319,7 +331,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
{
uint32_t crc;
- ubifs_assert(pad >= 0 && !(pad & 7));
+ ubifs_assert(pad >= 0);
if (pad >= UBIFS_PAD_NODE_SZ) {
struct ubifs_ch *ch = buf;
@@ -715,6 +727,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
* write-buffer.
*/
memcpy(wbuf->buf + wbuf->used, buf, len);
+ if (aligned_len > len) {
+ ubifs_assert(aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
+ }
if (aligned_len == wbuf->avail) {
dbg_io("flush jhead %s wbuf to LEB %d:%d",
@@ -807,13 +823,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
}
spin_lock(&wbuf->lock);
- if (aligned_len)
+ if (aligned_len) {
/*
* And now we have what's left and what does not take whole
* max. write unit, so write it to the write-buffer and we are
* done.
*/
memcpy(wbuf->buf, buf + written, len);
+ if (aligned_len > len) {
+ ubifs_assert(aligned_len - len < 8);
+ ubifs_pad(c, wbuf->buf + len, aligned_len - len);
+ }
+ }
if (c->leb_size - wbuf->offs >= c->max_write_size)
wbuf->size = c->max_write_size;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 3c1b54091d6c..e0e2bc19c929 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -132,21 +132,24 @@ void udf_evict_inode(struct inode *inode)
struct udf_inode_info *iinfo = UDF_I(inode);
int want_delete = 0;
- if (!inode->i_nlink && !is_bad_inode(inode)) {
- want_delete = 1;
- udf_setsize(inode, 0);
- udf_update_inode(inode, IS_SYNC(inode));
+ if (!is_bad_inode(inode)) {
+ if (!inode->i_nlink) {
+ want_delete = 1;
+ udf_setsize(inode, 0);
+ udf_update_inode(inode, IS_SYNC(inode));
+ }
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
+ inode->i_size != iinfo->i_lenExtents) {
+ udf_warn(inode->i_sb,
+ "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
+ inode->i_ino, inode->i_mode,
+ (unsigned long long)inode->i_size,
+ (unsigned long long)iinfo->i_lenExtents);
+ }
}
truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode);
clear_inode(inode);
- if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
- inode->i_size != iinfo->i_lenExtents) {
- udf_warn(inode->i_sb, "Inode %lu (mode %o) has inode size %llu different from extent length %llu. Filesystem need not be standards compliant.\n",
- inode->i_ino, inode->i_mode,
- (unsigned long long)inode->i_size,
- (unsigned long long)iinfo->i_lenExtents);
- }
kfree(iinfo->i_ext.i_data);
iinfo->i_ext.i_data = NULL;
udf_clear_extent_cache(inode);
@@ -537,11 +540,14 @@ static int udf_do_extend_file(struct inode *inode,
udf_write_aext(inode, last_pos, &last_ext->extLocation,
last_ext->extLength, 1);
+
/*
- * We've rewritten the last extent but there may be empty
- * indirect extent after it - enter it.
+ * We've rewritten the last extent. If we are going to add
+ * more extents, we may need to enter possible following
+ * empty indirect extent.
*/
- udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
+ if (new_block_bytes || prealloc_len)
+ udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0);
}
/* Managed to do everything necessary? */
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index 3949c4bec3a3..e5f4dcde309f 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -173,13 +173,22 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type,
else
offset = le32_to_cpu(eahd->appAttrLocation);
- while (offset < iinfo->i_lenEAttr) {
+ while (offset + sizeof(*gaf) < iinfo->i_lenEAttr) {
+ uint32_t attrLength;
+
gaf = (struct genericFormat *)&ea[offset];
+ attrLength = le32_to_cpu(gaf->attrLength);
+
+ /* Detect undersized elements and buffer overflows */
+ if ((attrLength < sizeof(*gaf)) ||
+ (attrLength > (iinfo->i_lenEAttr - offset)))
+ break;
+
if (le32_to_cpu(gaf->attrType) == type &&
gaf->attrSubtype == subtype)
return gaf;
else
- offset += le32_to_cpu(gaf->attrLength);
+ offset += attrLength;
}
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 041bf34f781f..d5516f025bad 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -956,6 +956,10 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
iinfo->i_location.partitionReferenceNum,
0);
epos.bh = udf_tgetblk(sb, block);
+ if (unlikely(!epos.bh)) {
+ err = -ENOMEM;
+ goto out_no_entry;
+ }
lock_buffer(epos.bh);
memset(epos.bh->b_data, 0x00, bsize);
set_buffer_uptodate(epos.bh);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 51de27685e18..70b56011b823 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -113,16 +113,10 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb)
return NULL;
lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data;
partnum = le32_to_cpu(lvid->numOfPartitions);
- if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) -
- offsetof(struct logicalVolIntegrityDesc, impUse)) /
- (2 * sizeof(uint32_t)) < partnum) {
- udf_err(sb, "Logical volume integrity descriptor corrupted "
- "(numOfPartitions = %u)!\n", partnum);
- return NULL;
- }
/* The offset is to skip freeSpaceTable and sizeTable arrays */
offset = partnum * 2 * sizeof(uint32_t);
- return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]);
+ return (struct logicalVolIntegrityDescImpUse *)
+ (((uint8_t *)(lvid + 1)) + offset);
}
/* UDF filesystem type */
@@ -1385,6 +1379,12 @@ static int udf_load_sparable_map(struct super_block *sb,
(int)spm->numSparingTables);
return -EIO;
}
+ if (le32_to_cpu(spm->sizeSparingTable) > sb->s_blocksize) {
+ udf_err(sb, "error loading logical volume descriptor: "
+ "Too big sparing table size (%u)\n",
+ le32_to_cpu(spm->sizeSparingTable));
+ return -EIO;
+ }
for (i = 0; i < spm->numSparingTables; i++) {
loc = le32_to_cpu(spm->locSparingTable[i]);
@@ -1559,6 +1559,7 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
struct udf_sb_info *sbi = UDF_SB(sb);
struct logicalVolIntegrityDesc *lvid;
int indirections = 0;
+ u32 parts, impuselen;
while (++indirections <= UDF_MAX_LVID_NESTING) {
final_bh = NULL;
@@ -1585,15 +1586,27 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
lvid = (struct logicalVolIntegrityDesc *)final_bh->b_data;
if (lvid->nextIntegrityExt.extLength == 0)
- return;
+ goto check;
loc = leea_to_cpu(lvid->nextIntegrityExt);
}
udf_warn(sb, "Too many LVID indirections (max %u), ignoring.\n",
UDF_MAX_LVID_NESTING);
+out_err:
brelse(sbi->s_lvid_bh);
sbi->s_lvid_bh = NULL;
+ return;
+check:
+ parts = le32_to_cpu(lvid->numOfPartitions);
+ impuselen = le32_to_cpu(lvid->lengthOfImpUse);
+ if (parts >= sb->s_blocksize || impuselen >= sb->s_blocksize ||
+ sizeof(struct logicalVolIntegrityDesc) + impuselen +
+ 2 * parts * sizeof(u32) > sb->s_blocksize) {
+ udf_warn(sb, "Corrupted LVID (parts=%u, impuselen=%u), "
+ "ignoring.\n", parts, impuselen);
+ goto out_err;
+ }
}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6440003f8ddc..e3f3a0df6027 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -99,7 +99,7 @@ static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gene
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct inode *inode;
- if (ino < UFS_ROOTINO || ino > uspi->s_ncg * uspi->s_ipg)
+ if (ino < UFS_ROOTINO || ino > (u64)uspi->s_ncg * uspi->s_ipg)
return ERR_PTR(-ESTALE);
inode = ufs_iget(sb, ino);
diff --git a/fs/xattr.c b/fs/xattr.c
index ee34b72567d1..2086a96dfa09 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -204,10 +204,22 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
return error;
}
-
+/**
+ * __vfs_setxattr_locked: set an extended attribute while holding the inode
+ * lock
+ *
+ * @dentry - object to perform setxattr on
+ * @name - xattr name to set
+ * @value - value to set @name to
+ * @size - size of @value
+ * @flags - flags to pass into filesystem operations
+ * @delegated_inode - on return, will contain an inode pointer that
+ * a delegation was broken on, NULL if none.
+ */
int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
+__vfs_setxattr_locked(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags,
+ struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
int error;
@@ -216,15 +228,40 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (error)
return error;
- inode_lock(inode);
error = security_inode_setxattr(dentry, name, value, size, flags);
if (error)
goto out;
+ error = try_break_deleg(inode, delegated_inode);
+ if (error)
+ goto out;
+
error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
out:
+ return error;
+}
+EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
+
+int
+vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags)
+{
+ struct inode *inode = dentry->d_inode;
+ struct inode *delegated_inode = NULL;
+ int error;
+
+retry_deleg:
+ inode_lock(inode);
+ error = __vfs_setxattr_locked(dentry, name, value, size, flags,
+ &delegated_inode);
inode_unlock(inode);
+
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry_deleg;
+ }
return error;
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -381,8 +418,18 @@ __vfs_removexattr(struct dentry *dentry, const char *name)
}
EXPORT_SYMBOL(__vfs_removexattr);
+/**
+ * __vfs_removexattr_locked: set an extended attribute while holding the inode
+ * lock
+ *
+ * @dentry - object to perform setxattr on
+ * @name - name of xattr to remove
+ * @delegated_inode - on return, will contain an inode pointer that
+ * a delegation was broken on, NULL if none.
+ */
int
-vfs_removexattr(struct dentry *dentry, const char *name)
+__vfs_removexattr_locked(struct dentry *dentry, const char *name,
+ struct inode **delegated_inode)
{
struct inode *inode = dentry->d_inode;
int error;
@@ -391,11 +438,14 @@ vfs_removexattr(struct dentry *dentry, const char *name)
if (error)
return error;
- inode_lock(inode);
error = security_inode_removexattr(dentry, name);
if (error)
goto out;
+ error = try_break_deleg(inode, delegated_inode);
+ if (error)
+ goto out;
+
error = __vfs_removexattr(dentry, name);
if (!error) {
@@ -404,12 +454,32 @@ vfs_removexattr(struct dentry *dentry, const char *name)
}
out:
+ return error;
+}
+EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
+
+int
+vfs_removexattr(struct dentry *dentry, const char *name)
+{
+ struct inode *inode = dentry->d_inode;
+ struct inode *delegated_inode = NULL;
+ int error;
+
+retry_deleg:
+ inode_lock(inode);
+ error = __vfs_removexattr_locked(dentry, name, &delegated_inode);
inode_unlock(inode);
+
+ if (delegated_inode) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry_deleg;
+ }
+
return error;
}
EXPORT_SYMBOL_GPL(vfs_removexattr);
-
/*
* Extended attribute SET operations
*/
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 516e0c57cf9c..a10d9a3c181e 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2529,6 +2529,13 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)))
return false;
+ if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks)
+ return false;
+
+ if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
+ be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))
+ return false;
+
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS ||
@@ -2540,6 +2547,10 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS))
return false;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length))
+ return false;
+
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
@@ -2554,6 +2565,11 @@ xfs_agf_verify(
return false;
if (xfs_sb_version_hasreflink(&mp->m_sb) &&
+ be32_to_cpu(agf->agf_refcount_blocks) >
+ be32_to_cpu(agf->agf_length))
+ return false;
+
+ if (xfs_sb_version_hasreflink(&mp->m_sb) &&
(be32_to_cpu(agf->agf_refcount_level) < 1 ||
be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS))
return false;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 73a541755d5b..facb83031ba7 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -520,8 +520,8 @@ xfs_attr_shortform_create(xfs_da_args_t *args)
ASSERT(ifp->if_flags & XFS_IFINLINE);
}
xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK);
- hdr = (xfs_attr_sf_hdr_t *)ifp->if_u1.if_data;
- hdr->count = 0;
+ hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
+ memset(hdr, 0, sizeof(*hdr));
hdr->totsize = cpu_to_be16(sizeof(*hdr));
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
@@ -1335,7 +1335,9 @@ xfs_attr3_leaf_add_work(
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (ichdr->freemap[i].base == tmp) {
ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
- ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
+ ichdr->freemap[i].size -=
+ min_t(uint16_t, ichdr->freemap[i].size,
+ sizeof(xfs_attr_leaf_entry_t));
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 84245d210182..2b07dadc5916 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -761,12 +761,16 @@ xfs_bmap_extents_to_btree(
*logflagsp = 0;
if ((error = xfs_alloc_vextent(&args))) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return error;
}
if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
xfs_iroot_realloc(ip, -1, whichfork);
+ ASSERT(ifp->if_broot == NULL);
+ XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
return -ENOSPC;
}
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 682e2bf370c7..ee4ebc2dd749 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -212,6 +212,7 @@ __xfs_dir3_free_read(
xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
xfs_verifier_error(*bpp);
xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 55c88a732690..1f16c2da2472 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -1319,7 +1319,7 @@ xfs_rmap_convert_shared(
* record for our insertion point. This will also give us the record for
* start block contiguity tests.
*/
- error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags,
+ error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext,
&PREV, &i);
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 43cfc07996a4..e7622e084186 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -273,6 +273,9 @@ xfs_getfsmap_helper(
/* Are we just counting mappings? */
if (info->head->fmh_count == 0) {
+ if (info->head->fmh_entries == UINT_MAX)
+ return -ECANCELED;
+
if (rec_daddr > info->next_daddr)
info->head->fmh_entries++;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 79a9a0def7db..5b2b223f9285 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -715,7 +715,8 @@ xfs_ioc_space(
flags |= XFS_PREALLOC_CLEAR;
if (bf->l_start > XFS_ISIZE(ip)) {
error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip), 0);
+ bf->l_start - XFS_ISIZE(ip),
+ XFS_BMAPI_PREALLOC);
if (error)
goto out_unlock;
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 4e6f2c8574f7..16d5a949fb11 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -835,7 +835,7 @@ xfs_setattr_size(
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
ASSERT(S_ISREG(inode->i_mode));
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
- ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);
oldsize = inode->i_size;
newsize = iattr->ia_size;
@@ -879,6 +879,16 @@ xfs_setattr_size(
if (newsize > oldsize) {
error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
} else {
+ /*
+ * iomap won't detect a dirty page over an unwritten block (or a
+ * cow block over a hole) and subsequently skips zeroing the
+ * newly post-EOF portion of the page. Flush the new EOF to
+ * convert the block before the pagecache truncate.
+ */
+ error = filemap_write_and_wait_range(inode->i_mapping, newsize,
+ newsize);
+ if (error)
+ return error;
error = iomap_truncate_page(inode, newsize, &did_zeroing,
&xfs_iomap_ops);
}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 360e32220f93..0c0f70e6c7d9 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2684,7 +2684,6 @@ xlog_state_do_callback(
int funcdidcallbacks; /* flag: function did callbacks */
int repeats; /* for issuing console warnings if
* looping too many times */
- int wake = 0;
spin_lock(&log->l_icloglock);
first_iclog = iclog = log->l_iclog;
@@ -2886,11 +2885,9 @@ xlog_state_do_callback(
#endif
if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
- wake = 1;
- spin_unlock(&log->l_icloglock);
-
- if (wake)
wake_up_all(&log->l_flush_wait);
+
+ spin_unlock(&log->l_icloglock);
}
@@ -4052,7 +4049,9 @@ xfs_log_force_umount(
* item committed callback functions will do this again under lock to
* avoid races.
*/
+ spin_lock(&log->l_cilp->xc_push_lock);
wake_up_all(&log->l_cilp->xc_commit_wait);
+ spin_unlock(&log->l_cilp->xc_push_lock);
xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
#ifdef XFSERRORDEBUG
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index aa6c5c193f45..8538916d255e 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -140,7 +140,7 @@ xfs_fs_map_blocks(
goto out_unlock;
error = invalidate_inode_pages2(inode->i_mapping);
if (WARN_ON_ONCE(error))
- return error;
+ goto out_unlock;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index db7f9fdd20a3..4d37f1b59436 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1076,6 +1076,7 @@ xfs_reflink_remap_extent(
xfs_filblks_t rlen;
xfs_filblks_t unmap_len;
xfs_off_t newlen;
+ int64_t qres;
int error;
unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
@@ -1098,13 +1099,19 @@ xfs_reflink_remap_extent(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- /* If we're not just clearing space, then do we have enough quota? */
- if (real_extent) {
- error = xfs_trans_reserve_quota_nblks(tp, ip,
- irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS);
- if (error)
- goto out_cancel;
- }
+ /*
+ * Reserve quota for this operation. We don't know if the first unmap
+ * in the dest file will cause a bmap btree split, so we always reserve
+ * at least enough blocks for that split. If the extent being mapped
+ * in is written, we need to reserve quota for that too.
+ */
+ qres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
+ if (real_extent)
+ qres += irec->br_blockcount;
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qres, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out_cancel;
trace_xfs_reflink_remap(ip, irec->br_startoff,
irec->br_blockcount, irec->br_startblock);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index cdcb7235e41a..f1cf83283710 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -257,6 +257,9 @@ xfs_rtallocate_extent_block(
end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
i <= end;
i++) {
+ /* Make sure we don't scan off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
+
/*
* See if there's a free extent of maxlen starting at i.
* If it's not so then next will contain the first non-free.
@@ -448,6 +451,14 @@ xfs_rtallocate_extent_near(
*/
if (bno >= mp->m_sb.sb_rextents)
bno = mp->m_sb.sb_rextents - 1;
+
+ /* Make sure we don't run off the end of the rt volume. */
+ maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
+ if (maxlen < minlen) {
+ *rtblock = NULLRTBLOCK;
+ return 0;
+ }
+
/*
* Try the exact allocation first.
*/
@@ -1003,10 +1014,13 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
- * Update the bitmap inode's size.
+ * Update the bitmap inode's size ondisk and incore. We need
+ * to update the incore size so that inode inactivation won't
+ * punch what it thinks are "posteof" blocks.
*/
mp->m_rbmip->i_d.di_size =
nsbp->sb_rbmblocks * nsbp->sb_blocksize;
+ i_size_write(VFS_I(mp->m_rbmip), mp->m_rbmip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
/*
* Get the summary inode into the transaction.
@@ -1014,9 +1028,12 @@ xfs_growfs_rt(
xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
- * Update the summary inode's size.
+ * Update the summary inode's size. We need to update the
+ * incore size so that inode inactivation won't punch what it
+ * thinks are "posteof" blocks.
*/
mp->m_rsumip->i_d.di_size = nmp->m_rsumsize;
+ i_size_write(VFS_I(mp->m_rsumip), mp->m_rsumip->i_d.di_size);
xfs_trans_log_inode(tp, mp->m_rsumip, XFS_ILOG_CORE);
/*
* Copy summary data from old to new sizes.
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index d04637181ef2..980c9429abec 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -44,9 +44,11 @@ xfs_sysfs_init(
struct xfs_kobj *parent_kobj,
const char *name)
{
+ struct kobject *parent;
+
+ parent = parent_kobj ? &parent_kobj->kobject : NULL;
init_completion(&kobj->complete);
- return kobject_init_and_add(&kobj->kobject, ktype,
- &parent_kobj->kobject, "%s", name);
+ return kobject_init_and_add(&kobj->kobject, ktype, parent, "%s", name);
}
static inline void
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index c3d547211d16..9c42e50a5cb7 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -669,7 +669,7 @@ xfs_trans_dqresv(
}
}
if (ninos > 0) {
- total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
+ total_count = dqp->q_res_icount + ninos;
timer = be32_to_cpu(dqp->q_core.d_itimer);
warns = be16_to_cpu(dqp->q_core.d_iwarns);
warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;