aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/server.c7
-rw-r--r--fs/afs/volume.c4
-rw-r--r--fs/aio.c9
-rw-r--r--fs/btrfs/block-rsv.c2
-rw-r--r--fs/btrfs/block-rsv.h16
-rw-r--r--fs/btrfs/dev-replace.c24
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/ioctl.c9
-rw-r--r--fs/btrfs/qgroup.c14
-rw-r--r--fs/btrfs/ref-verify.c6
-rw-r--r--fs/btrfs/send.c2
-rw-r--r--fs/btrfs/tree-checker.c2
-rw-r--r--fs/cachefiles/bind.c3
-rw-r--r--fs/ceph/caps.c9
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/ecryptfs/inode.c8
-rw-r--r--fs/ext4/mballoc.c39
-rw-r--r--fs/ext4/move_extent.c6
-rw-r--r--fs/ext4/resize.c37
-rw-r--r--fs/f2fs/recovery.c23
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/hugetlbfs/inode.c6
-rw-r--r--fs/inode.c32
-rw-r--r--fs/io_uring.c65
-rw-r--r--fs/iomap/buffered-io.c3
-rw-r--r--fs/jfs/jfs_dmap.c57
-rw-r--r--fs/jfs/jfs_dtree.c7
-rw-r--r--fs/jfs/jfs_imap.c3
-rw-r--r--fs/jfs/jfs_mount.c6
-rw-r--r--fs/kernfs/dir.c12
-rw-r--r--fs/namei.c144
-rw-r--r--fs/nfs/nfsroot.c4
-rw-r--r--fs/nilfs2/dat.c27
-rw-r--r--fs/nilfs2/file.c8
-rw-r--r--fs/nilfs2/recovery.c7
-rw-r--r--fs/nilfs2/segment.c8
-rw-r--r--fs/ocfs2/namei.c1
-rw-r--r--fs/pstore/ram.c1
-rw-r--r--fs/quota/dquot.c226
-rw-r--r--fs/select.c2
-rw-r--r--fs/ubifs/dir.c2
42 files changed, 514 insertions, 342 deletions
diff --git a/fs/afs/server.c b/fs/afs/server.c
index d3a9288f7556..44985ca6602e 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -35,7 +35,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
const struct afs_addr_list *alist;
struct afs_server *server = NULL;
unsigned int i;
- int seq = 0, diff;
+ int seq = 1, diff;
rcu_read_lock();
@@ -43,6 +43,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
if (server)
afs_put_server(net, server, afs_server_trace_put_find_rsq);
server = NULL;
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
if (srx->transport.family == AF_INET6) {
@@ -98,7 +99,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
{
struct afs_server *server = NULL;
struct rb_node *p;
- int diff, seq = 0;
+ int diff, seq = 1;
_enter("%pU", uuid);
@@ -110,7 +111,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
if (server)
afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
server = NULL;
-
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_lock, &seq);
p = net->fs_servers.rb_node;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 4310336b9bb8..4225bc766d46 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -221,7 +221,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
{
struct afs_server_list *new, *old, *discard;
struct afs_vldb_entry *vldb;
- char idbuf[16];
+ char idbuf[24];
int ret, idsz;
_enter("");
@@ -229,7 +229,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
/* We look up an ID by passing it as a decimal string in the
* operation's name parameter.
*/
- idsz = sprintf(idbuf, "%llu", volume->vid);
+ idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
if (IS_ERR(vldb)) {
diff --git a/fs/aio.c b/fs/aio.c
index 1ec5a773d09c..7a50c97cffc0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -570,6 +570,13 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
+ /*
+ * kiocb didn't come from aio or is neither a read nor a write, hence
+ * ignore it.
+ */
+ if (!(iocb->ki_flags & IOCB_AIO_RW))
+ return;
+
if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
return;
@@ -1455,7 +1462,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
- req->ki_flags = iocb_flags(req->ki_filp);
+ req->ki_flags = iocb_flags(req->ki_filp) | IOCB_AIO_RW;
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 36ef3228bac8..63205d2f4d84 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -392,7 +392,7 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
- if (unlikely(block_rsv->size == 0))
+ if (unlikely(btrfs_block_rsv_size(block_rsv) == 0))
goto try_reserve;
again:
ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize);
diff --git a/fs/btrfs/block-rsv.h b/fs/btrfs/block-rsv.h
index d1428bb73fc5..69770360917c 100644
--- a/fs/btrfs/block-rsv.h
+++ b/fs/btrfs/block-rsv.h
@@ -98,4 +98,20 @@ static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
btrfs_block_rsv_release(fs_info, block_rsv, 0);
}
+/*
+ * Get the size of a block reserve in a context where getting a stale value is
+ * acceptable, instead of accessing it directly and trigger data race warning
+ * from KCSAN.
+ */
+static inline u64 btrfs_block_rsv_size(struct btrfs_block_rsv *rsv)
+{
+ u64 ret;
+
+ spin_lock(&rsv->lock);
+ ret = rsv->size;
+ spin_unlock(&rsv->lock);
+
+ return ret;
+}
+
#endif /* BTRFS_BLOCK_RSV_H */
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 444e1e5d012e..4abc0db6527e 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -535,6 +535,23 @@ leave:
return ret;
}
+static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args)
+{
+ if (args->start.srcdevid == 0) {
+ if (memchr(args->start.srcdev_name, 0,
+ sizeof(args->start.srcdev_name)) == NULL)
+ return -ENAMETOOLONG;
+ } else {
+ args->start.srcdev_name[0] = 0;
+ }
+
+ if (memchr(args->start.tgtdev_name, 0,
+ sizeof(args->start.tgtdev_name)) == NULL)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -547,10 +564,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
default:
return -EINVAL;
}
-
- if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
- args->start.tgtdev_name[0] == '\0')
- return -EINVAL;
+ ret = btrfs_check_replace_dev_names(args);
+ if (ret < 0)
+ return ret;
ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name,
args->start.srcdevid,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f47316edcf42..a28b0eafb65a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1243,7 +1243,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << 9);
- if (WARN_ON(start != aligned_start)) {
+ /* Adjust the range to be aligned to 512B sectors if necessary. */
+ if (start != aligned_start) {
len -= aligned_start - start;
len = round_down(len, 1 << 9);
start = aligned_start;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9f1efd5c24f1..874a441dc8b5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3102,6 +3102,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
kfree(range);
goto out;
}
+ if (range->flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
/* compression requires us to start the IO */
if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
@@ -4970,6 +4974,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto out;
}
+ if (sa->create && is_fstree(sa->qgroupid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a743404dce0c..42412a4554d3 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1561,6 +1561,15 @@ out:
return ret;
}
+static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
+{
+ return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
+ qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
+}
+
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1580,6 +1589,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
goto out;
}
+ if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
/* Check if there are no children of this qgroup */
if (!list_empty(&qgroup->members)) {
ret = -EBUSY;
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index bbd63535965c..d59e89ef3251 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -888,8 +888,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
out_unlock:
spin_unlock(&fs_info->ref_verify_lock);
out:
- if (ret)
+ if (ret) {
+ btrfs_free_ref_cache(fs_info);
btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+ }
return ret;
}
@@ -1018,8 +1020,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
}
}
if (ret) {
- btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
btrfs_free_ref_cache(fs_info);
+ btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
}
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 2e22da11e240..0dfa88ac0139 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7363,7 +7363,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
}
if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
- ret = -EINVAL;
+ ret = -EOPNOTSUPP;
goto out;
}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index d15de5abb562..597362eaf300 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1165,7 +1165,7 @@ static int check_extent_item(struct extent_buffer *leaf,
if (ptr + btrfs_extent_inline_ref_size(inline_type) > end) {
extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
- ptr, inline_type, end);
+ ptr, btrfs_extent_inline_ref_size(inline_type), end);
return -EUCLEAN;
}
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index dfb14dbddf51..3b39552c2365 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -245,6 +245,8 @@ error_open_root:
kmem_cache_free(cachefiles_object_jar, fsdef);
error_root_object:
cachefiles_end_secure(cache, saved_cred);
+ put_cred(cache->cache_cred);
+ cache->cache_cred = NULL;
pr_err("Failed to register: %d\n", ret);
return ret;
}
@@ -265,6 +267,7 @@ void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
dput(cache->graveyard);
mntput(cache->mnt);
+ put_cred(cache->cache_cred);
kfree(cache->rootdirname);
kfree(cache->secctx);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 4e88cb990723..45b8f6741f8d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4304,12 +4304,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
struct inode *dir,
int mds, int drop, int unless)
{
- struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
int ret;
+ /* This shouldn't happen */
+ BUG_ON(!dir);
+
/*
* force an record for the directory caps if we have a dentry lease.
* this is racy (can't take i_ceph_lock and d_lock together), but it
@@ -4319,14 +4321,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
- if (!dir) {
- parent = dget(dentry->d_parent);
- dir = d_inode(parent);
- }
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
- dput(parent);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 8fcc53d83af2..22f7dc6688de 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -994,8 +994,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
if (!f.file)
goto out;
- /* RED-PEN how should LSM module know it's handling 32bit? */
- error = security_file_ioctl(f.file, cmd, arg);
+ error = security_file_ioctl_compat(f.file, cmd, arg);
if (error)
goto out_fput;
diff --git a/fs/dcache.c b/fs/dcache.c
index c08d8b922770..76517330103b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -740,12 +740,12 @@ static inline bool fast_dput(struct dentry *dentry)
*/
if (unlikely(ret < 0)) {
spin_lock(&dentry->d_lock);
- if (dentry->d_lockref.count > 1) {
- dentry->d_lockref.count--;
+ if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) {
spin_unlock(&dentry->d_lock);
return true;
}
- return false;
+ dentry->d_lockref.count--;
+ goto locked;
}
/*
@@ -796,6 +796,7 @@ static inline bool fast_dput(struct dentry *dentry)
* else could have killed it and marked it dead. Either way, we
* don't need to do anything else.
*/
+locked:
if (dentry->d_lockref.count) {
spin_unlock(&dentry->d_lock);
return true;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e23752d9a79f..c867a0d62f36 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -76,6 +76,14 @@ static struct inode *__ecryptfs_get_inode(struct inode *lower_inode,
if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb))
return ERR_PTR(-EXDEV);
+
+ /* Reject dealing with casefold directories. */
+ if (IS_CASEFOLDED(lower_inode)) {
+ pr_err_ratelimited("%s: Can't handle casefolded directory.\n",
+ __func__);
+ return ERR_PTR(-EREMOTE);
+ }
+
if (!igrab(lower_inode))
return ERR_PTR(-ESTALE);
inode = iget5_locked(sb, (unsigned long)lower_inode,
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9e5aa625ab30..e823731110e3 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1802,6 +1802,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return err;
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@@ -1809,6 +1812,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -1835,12 +1839,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (err)
return err;
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
- ext4_mb_unload_buddy(e4b);
- return 0;
- }
-
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
ex.fe_logical = 0xDEADFA11; /* debug value */
@@ -1873,6 +1875,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ac->ac_b_ex = ex;
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -5195,11 +5198,16 @@ __acquires(bitlock)
static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
ext4_group_t grp)
{
- if (grp < ext4_get_groups_count(sb))
- return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
- return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
- ext4_group_first_block_no(sb, grp) - 1) >>
- EXT4_CLUSTER_BITS(sb);
+ unsigned long nr_clusters_in_group;
+
+ if (grp < (ext4_get_groups_count(sb) - 1))
+ nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb);
+ else
+ nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) -
+ ext4_group_first_block_no(sb, grp))
+ >> EXT4_CLUSTER_BITS(sb);
+
+ return nr_clusters_in_group - 1;
}
static bool ext4_trim_interrupted(void)
@@ -5211,13 +5219,15 @@ static int ext4_try_to_trim_range(struct super_block *sb,
struct ext4_buddy *e4b, ext4_grpblk_t start,
ext4_grpblk_t max, ext4_grpblk_t minblocks)
{
- ext4_grpblk_t next, count, free_count;
+ ext4_grpblk_t next, count, free_count, last, origin_start;
bool set_trimmed = false;
void *bitmap;
+ last = ext4_last_grp_cluster(sb, e4b->bd_group);
bitmap = e4b->bd_bitmap;
- if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
+ if (start == 0 && max >= last)
set_trimmed = true;
+ origin_start = start;
start = max(e4b->bd_info->bb_first_free, start);
count = 0;
free_count = 0;
@@ -5226,7 +5236,10 @@ static int ext4_try_to_trim_range(struct super_block *sb,
start = mb_find_next_zero_bit(bitmap, max + 1, start);
if (start > max)
break;
- next = mb_find_next_bit(bitmap, max + 1, start);
+
+ next = mb_find_next_bit(bitmap, last + 1, start);
+ if (origin_start == 0 && next >= last)
+ set_trimmed = true;
if ((next - start) >= minblocks) {
int ret = ext4_trim_extent(sb, start, next - start, e4b);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 30ce3dc69378..e71ab64f1df5 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -615,6 +615,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
goto out;
o_end = o_start + len;
+ *moved_len = 0;
while (o_start < o_end) {
struct ext4_extent *ex;
ext4_lblk_t cur_blk, next_blk;
@@ -670,7 +671,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
/* Swap original branches with new branches */
- move_extent_per_page(o_filp, donor_inode,
+ *moved_len += move_extent_per_page(o_filp, donor_inode,
orig_page_index, donor_page_index,
offset_in_page, cur_len,
unwritten, &ret);
@@ -680,9 +681,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
o_start += cur_len;
d_start += cur_len;
}
- *moved_len = o_start - orig_blk;
- if (*moved_len > len)
- *moved_len = len;
out:
if (*moved_len) {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3616c437bea6..409b4ad28e71 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -227,17 +227,24 @@ struct ext4_new_flex_group_data {
in the flex group */
__u16 *bg_flags; /* block group flags of groups
in @groups */
+ ext4_group_t resize_bg; /* number of allocated
+ new_group_data */
ext4_group_t count; /* number of groups in @groups
*/
};
/*
+ * Avoiding memory allocation failures due to too many groups added each time.
+ */
+#define MAX_RESIZE_BG 16384
+
+/*
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
* @flexbg_size.
*
* Returns NULL on failure otherwise address of the allocated structure.
*/
-static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
+static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size)
{
struct ext4_new_flex_group_data *flex_gd;
@@ -245,17 +252,18 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
if (flex_gd == NULL)
goto out3;
- if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
- goto out2;
- flex_gd->count = flexbg_size;
+ if (unlikely(flexbg_size > MAX_RESIZE_BG))
+ flex_gd->resize_bg = MAX_RESIZE_BG;
+ else
+ flex_gd->resize_bg = flexbg_size;
- flex_gd->groups = kmalloc_array(flexbg_size,
+ flex_gd->groups = kmalloc_array(flex_gd->resize_bg,
sizeof(struct ext4_new_group_data),
GFP_NOFS);
if (flex_gd->groups == NULL)
goto out2;
- flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
+ flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16),
GFP_NOFS);
if (flex_gd->bg_flags == NULL)
goto out1;
@@ -292,7 +300,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
*/
static int ext4_alloc_group_tables(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
- int flexbg_size)
+ unsigned int flexbg_size)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t start_blk;
@@ -393,12 +401,12 @@ next_group:
group = group_data[0].group;
printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
- "%d groups, flexbg size is %d:\n", flex_gd->count,
+ "%u groups, flexbg size is %u:\n", flex_gd->count,
flexbg_size);
for (i = 0; i < flex_gd->count; i++) {
ext4_debug(
- "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
+ "adding %s group %u: %u blocks (%u free, %u mdata blocks)\n",
ext4_bg_has_super(sb, group + i) ? "normal" :
"no-super", group + i,
group_data[i].blocks_count,
@@ -1584,8 +1592,7 @@ exit:
static int ext4_setup_next_flex_gd(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
- ext4_fsblk_t n_blocks_count,
- unsigned long flexbg_size)
+ ext4_fsblk_t n_blocks_count)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
@@ -1609,7 +1616,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
BUG_ON(last);
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
- last_group = group | (flexbg_size - 1);
+ last_group = group | (flex_gd->resize_bg - 1);
if (last_group > n_group)
last_group = n_group;
@@ -1963,8 +1970,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_fsblk_t o_blocks_count;
ext4_fsblk_t n_blocks_count_retry = 0;
unsigned long last_update_time = 0;
- int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
+ int err = 0;
int meta_bg;
+ unsigned int flexbg_size = ext4_flex_bg_size(sbi);
/* See if the device is actually as big as what was requested */
bh = sb_bread(sb, n_blocks_count - 1);
@@ -2105,8 +2113,7 @@ retry:
/* Add flex groups. Note that a regular group is a
* flex group with 1 group.
*/
- while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
- flexbg_size)) {
+ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) {
if (jiffies - last_update_time > HZ * 10) {
if (last_update_time)
ext4_msg(sb, KERN_INFO,
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index da123c6d3ce0..7e30326b296c 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -611,7 +611,16 @@ retry_dn:
*/
if (dest == NEW_ADDR) {
f2fs_truncate_data_blocks_range(&dn, 1);
- f2fs_reserve_new_block(&dn);
+ do {
+ err = f2fs_reserve_new_block(&dn);
+ if (err == -ENOSPC) {
+ f2fs_bug_on(sbi, 1);
+ break;
+ }
+ } while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
+ if (err)
+ goto err;
continue;
}
@@ -619,12 +628,14 @@ retry_dn:
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
- err = f2fs_reserve_new_block(&dn);
- while (err &&
- IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
+ do {
err = f2fs_reserve_new_block(&dn);
- /* We should not get -ENOSPC */
- f2fs_bug_on(sbi, err);
+ if (err == -ENOSPC) {
+ f2fs_bug_on(sbi, 1);
+ break;
+ }
+ } while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
if (err)
goto err;
}
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 01263ffbc4c0..9a5f153c8919 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -37,7 +37,7 @@ static long do_sys_name_to_handle(struct path *path,
if (f_handle.handle_bytes > MAX_HANDLE_SZ)
return -EINVAL;
- handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
GFP_KERNEL);
if (!handle)
return -ENOMEM;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b1d31c78fc9d..47b292f9b4f8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1205,6 +1205,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
{
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
+ struct hstate *h;
char *rest;
unsigned long ps;
int opt;
@@ -1249,11 +1250,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_pagesize:
ps = memparse(param->string, &rest);
- ctx->hstate = size_to_hstate(ps);
- if (!ctx->hstate) {
+ h = size_to_hstate(ps);
+ if (!h) {
pr_err("Unsupported page size %lu MB\n", ps >> 20);
return -EINVAL;
}
+ ctx->hstate = h;
return 0;
case Opt_min_size:
diff --git a/fs/inode.c b/fs/inode.c
index b73e099a4b77..78d7aba27ebd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2101,10 +2101,6 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
/* Directories are special, and always inherit S_ISGID */
if (S_ISDIR(mode))
mode |= S_ISGID;
- else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
- !in_group_p(inode->i_gid) &&
- !capable_wrt_inode_uidgid(dir, CAP_FSETID))
- mode &= ~S_ISGID;
} else
inode->i_gid = current_fsgid();
inode->i_mode = mode;
@@ -2360,3 +2356,31 @@ int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
return 0;
}
EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
+
+/**
+ * mode_strip_sgid - handle the sgid bit for non-directories
+ * @dir: parent directory inode
+ * @mode: mode of the file to be created in @dir
+ *
+ * If the @mode of the new file has both the S_ISGID and S_IXGRP bit
+ * raised and @dir has the S_ISGID bit raised ensure that the caller is
+ * either in the group of the parent directory or they have CAP_FSETID
+ * in their user namespace and are privileged over the parent directory.
+ * In all other cases, strip the S_ISGID bit from @mode.
+ *
+ * Return: the new mode to use for the file
+ */
+umode_t mode_strip_sgid(const struct inode *dir, umode_t mode)
+{
+ if ((mode & (S_ISGID | S_IXGRP)) != (S_ISGID | S_IXGRP))
+ return mode;
+ if (S_ISDIR(mode) || !dir || !(dir->i_mode & S_ISGID))
+ return mode;
+ if (in_group_p(dir->i_gid))
+ return mode;
+ if (capable_wrt_inode_uidgid(dir, CAP_FSETID))
+ return mode;
+
+ return mode & ~S_ISGID;
+}
+EXPORT_SYMBOL(mode_strip_sgid);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9de8961763b0..2c793e4ccf09 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -63,7 +63,6 @@
#include <linux/net.h>
#include <net/sock.h>
#include <net/af_unix.h>
-#include <net/scm.h>
#include <linux/anon_inodes.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
@@ -264,10 +263,6 @@ struct io_ring_ctx {
struct async_list pending_async[2];
-#if defined(CONFIG_UNIX)
- struct socket *ring_sock;
-#endif
-
struct list_head task_list;
spinlock_t task_lock;
};
@@ -381,19 +376,6 @@ static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops;
-struct sock *io_uring_get_socket(struct file *file)
-{
-#if defined(CONFIG_UNIX)
- if (file->f_op == &io_uring_fops) {
- struct io_ring_ctx *ctx = file->private_data;
-
- return ctx->ring_sock->sk;
- }
-#endif
- return NULL;
-}
-EXPORT_SYMBOL(io_uring_get_socket);
-
static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -3080,20 +3062,10 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
-#if defined(CONFIG_UNIX)
- if (ctx->ring_sock) {
- struct sock *sock = ctx->ring_sock->sk;
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
- kfree_skb(skb);
- }
-#else
int i;
for (i = 0; i < ctx->nr_user_files; i++)
fput(ctx->user_files[i]);
-#endif
}
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
@@ -3588,13 +3560,6 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_sqe_files_unregister(ctx);
io_eventfd_unregister(ctx);
-#if defined(CONFIG_UNIX)
- if (ctx->ring_sock) {
- ctx->ring_sock->file = NULL; /* so that iput() is called */
- sock_release(ctx->ring_sock);
- }
-#endif
-
io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes);
@@ -3794,6 +3759,11 @@ static const struct file_operations io_uring_fops = {
.fasync = io_uring_fasync,
};
+bool io_is_uring_fops(struct file *file)
+{
+ return file->f_op == &io_uring_fops;
+}
+
static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
@@ -3841,45 +3811,26 @@ static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
/*
* Allocate an anonymous fd, this is what constitutes the application
* visible backing of an io_uring instance. The application mmaps this
- * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
- * we have to tie this fd to a socket for file garbage collection purposes.
+ * fd to gain access to the SQ/CQ ring details.
*/
static int io_uring_get_fd(struct io_ring_ctx *ctx)
{
struct file *file;
int ret;
-#if defined(CONFIG_UNIX)
- ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
- &ctx->ring_sock);
- if (ret)
- return ret;
-#endif
-
ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (ret < 0)
- goto err;
+ return ret;
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
O_RDWR | O_CLOEXEC);
if (IS_ERR(file)) {
put_unused_fd(ret);
- ret = PTR_ERR(file);
- goto err;
+ return PTR_ERR(file);
}
-#if defined(CONFIG_UNIX)
- ctx->ring_sock->file = file;
- ctx->ring_sock->sk->sk_user_data = ctx;
-#endif
fd_install(ret, file);
return ret;
-err:
-#if defined(CONFIG_UNIX)
- sock_release(ctx->ring_sock);
- ctx->ring_sock = NULL;
-#endif
- return ret;
}
static int io_uring_create(unsigned entries, struct io_uring_params *p)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 53cd7b2bb580..c28ba474a25e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -23,6 +23,7 @@ static struct iomap_page *
iomap_page_create(struct inode *inode, struct page *page)
{
struct iomap_page *iop = to_iomap_page(page);
+ unsigned int nr_blocks = PAGE_SIZE / i_blocksize(inode);
if (iop || i_blocksize(inode) == PAGE_SIZE)
return iop;
@@ -32,6 +33,8 @@ iomap_page_create(struct inode *inode, struct page *page)
atomic_set(&iop->write_count, 0);
spin_lock_init(&iop->uptodate_lock);
bitmap_zero(iop->uptodate, PAGE_SIZE / SECTOR_SIZE);
+ if (PageUptodate(page))
+ bitmap_fill(iop->uptodate, nr_blocks);
/*
* migrate_page_move_mapping() assumes that pages with private data have
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index ea330ce921b1..deb54efb5601 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -63,10 +63,10 @@
*/
static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
int nblocks);
-static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
-static int dbBackSplit(dmtree_t * tp, int leafno);
-static int dbJoin(dmtree_t * tp, int leafno, int newval);
-static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
+static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl);
+static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl);
+static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl);
+static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl);
static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
int level);
static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
@@ -2171,7 +2171,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
* system.
*/
if (dp->tree.stree[word] == NOFREE)
- dbBackSplit((dmtree_t *) & dp->tree, word);
+ dbBackSplit((dmtree_t *)&dp->tree, word, false);
dbAllocBits(bmp, dp, blkno, nblocks);
}
@@ -2257,7 +2257,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* the binary system of the leaves if need be.
*/
dbSplit(tp, word, BUDMIN,
- dbMaxBud((u8 *) & dp->wmap[word]));
+ dbMaxBud((u8 *)&dp->wmap[word]), false);
word += 1;
} else {
@@ -2297,7 +2297,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* system of the leaves to reflect the current
* allocation (size).
*/
- dbSplit(tp, word, size, NOFREE);
+ dbSplit(tp, word, size, NOFREE, false);
/* get the number of dmap words handled */
nw = BUDSIZE(size, BUDMIN);
@@ -2404,7 +2404,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
/* update the leaf for this dmap word.
*/
rc = dbJoin(tp, word,
- dbMaxBud((u8 *) & dp->wmap[word]));
+ dbMaxBud((u8 *)&dp->wmap[word]), false);
if (rc)
return rc;
@@ -2437,7 +2437,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
/* update the leaf.
*/
- rc = dbJoin(tp, word, size);
+ rc = dbJoin(tp, word, size, false);
if (rc)
return rc;
@@ -2589,14 +2589,14 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
* that it is at the front of a binary buddy system.
*/
if (oldval == NOFREE) {
- rc = dbBackSplit((dmtree_t *) dcp, leafno);
+ rc = dbBackSplit((dmtree_t *)dcp, leafno, true);
if (rc)
return rc;
oldval = dcp->stree[ti];
}
- dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
+ dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true);
} else {
- rc = dbJoin((dmtree_t *) dcp, leafno, newval);
+ rc = dbJoin((dmtree_t *) dcp, leafno, newval, true);
if (rc)
return rc;
}
@@ -2625,7 +2625,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
*/
if (alloc) {
dbJoin((dmtree_t *) dcp, leafno,
- oldval);
+ oldval, true);
} else {
/* the dbJoin() above might have
* caused a larger binary buddy system
@@ -2635,9 +2635,9 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
*/
if (dcp->stree[ti] == NOFREE)
dbBackSplit((dmtree_t *)
- dcp, leafno);
+ dcp, leafno, true);
dbSplit((dmtree_t *) dcp, leafno,
- dcp->budmin, oldval);
+ dcp->budmin, oldval, true);
}
/* release the buffer and return the error.
@@ -2685,7 +2685,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
*
* serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
*/
-static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
+static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl)
{
int budsz;
int cursz;
@@ -2707,7 +2707,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
while (cursz >= splitsz) {
/* update the buddy's leaf with its new value.
*/
- dbAdjTree(tp, leafno ^ budsz, cursz);
+ dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl);
/* on to the next size and buddy.
*/
@@ -2719,7 +2719,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
/* adjust the dmap tree to reflect the specified leaf's new
* value.
*/
- dbAdjTree(tp, leafno, newval);
+ dbAdjTree(tp, leafno, newval, is_ctl);
}
@@ -2750,7 +2750,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
*
* serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
*/
-static int dbBackSplit(dmtree_t * tp, int leafno)
+static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl)
{
int budsz, bud, w, bsz, size;
int cursz;
@@ -2801,7 +2801,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
* system in two.
*/
cursz = leaf[bud] - 1;
- dbSplit(tp, bud, cursz, cursz);
+ dbSplit(tp, bud, cursz, cursz, is_ctl);
break;
}
}
@@ -2829,7 +2829,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
*
* RETURN VALUES: none
*/
-static int dbJoin(dmtree_t * tp, int leafno, int newval)
+static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
{
int budsz, buddy;
s8 *leaf;
@@ -2884,12 +2884,12 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
if (leafno < buddy) {
/* leafno is the left buddy.
*/
- dbAdjTree(tp, buddy, NOFREE);
+ dbAdjTree(tp, buddy, NOFREE, is_ctl);
} else {
/* buddy is the left buddy and becomes
* leafno.
*/
- dbAdjTree(tp, leafno, NOFREE);
+ dbAdjTree(tp, leafno, NOFREE, is_ctl);
leafno = buddy;
}
@@ -2902,7 +2902,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
/* update the leaf value.
*/
- dbAdjTree(tp, leafno, newval);
+ dbAdjTree(tp, leafno, newval, is_ctl);
return 0;
}
@@ -2923,15 +2923,20 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
*
* RETURN VALUES: none
*/
-static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
+static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl)
{
int lp, pp, k;
- int max;
+ int max, size;
+
+ size = is_ctl ? CTLTREESIZE : TREESIZE;
/* pick up the index of the leaf for this leafno.
*/
lp = leafno + le32_to_cpu(tp->dmt_leafidx);
+ if (WARN_ON_ONCE(lp >= size || lp < 0))
+ return;
+
/* is the current value the same as the old value ? if so,
* there is nothing to do.
*/
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 3acc954f7c04..077a87e53020 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -633,6 +633,11 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
index = base + (lim >> 1);
+ if (stbl[index] < 0) {
+ rc = -EIO;
+ goto out;
+ }
+
if (p->header.flag & BT_LEAF) {
/* uppercase leaf name to compare */
cmp =
@@ -1970,7 +1975,7 @@ static int dtSplitRoot(tid_t tid,
do {
f = &rp->slot[fsi];
fsi = f->next;
- } while (fsi != -1);
+ } while (fsi >= 0);
f->next = n;
}
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 14f918a4831d..b0965f3ef186 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2181,6 +2181,9 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
/* get the ag and iag numbers for this iag.
*/
agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
+ if (agno >= MAXAG || agno < 0)
+ return -EIO;
+
iagno = le32_to_cpu(iagp->iagnum);
/* check if this is the last free extent within the
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index d41733540df9..459324f3570a 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -171,15 +171,15 @@ int jfs_mount(struct super_block *sb)
}
jfs_info("jfs_mount: ipimap:0x%p", ipimap);
- /* map further access of per fileset inodes by the fileset inode */
- sbi->ipimap = ipimap;
-
/* initialize fileset inode allocation map */
if ((rc = diMount(ipimap))) {
jfs_err("jfs_mount: diMount failed w/rc = %d", rc);
goto err_ipimap;
}
+ /* map further access of per fileset inodes by the fileset inode */
+ sbi->ipimap = ipimap;
+
return rc;
/*
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d2068566c0b8..d3a602ea795b 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -702,6 +702,18 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
{
struct kernfs_node *kn;
+ if (parent->mode & S_ISGID) {
+ /* this code block imitates inode_init_owner() for
+ * kernfs
+ */
+
+ if (parent->iattr)
+ gid = parent->iattr->ia_gid;
+
+ if (flags & KERNFS_DIR)
+ mode |= S_ISGID;
+ }
+
kn = __kernfs_new_node(kernfs_root(parent), parent,
name, mode, uid, gid, flags);
if (kn) {
diff --git a/fs/namei.c b/fs/namei.c
index f6708ab8ec7e..a4cba6991a4d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -52,8 +52,8 @@
* The new code replaces the old recursive symlink resolution with
* an iterative one (in case of non-nested symlink chains). It does
* this with calls to <fs>_follow_link().
- * As a side effect, dir_namei(), _namei() and follow_link() are now
- * replaced with a single function lookup_dentry() that can handle all
+ * As a side effect, dir_namei(), _namei() and follow_link() are now
+ * replaced with a single function lookup_dentry() that can handle all
* the special cases of the former code.
*
* With the new dcache, the pathname is stored at each inode, at least as
@@ -2879,20 +2879,14 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
p = d_ancestor(p2, p1);
if (p) {
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
return p;
}
p = d_ancestor(p1, p2);
- if (p) {
- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
- return p;
- }
-
- lock_two_inodes(p1->d_inode, p2->d_inode,
- I_MUTEX_PARENT, I_MUTEX_PARENT2);
- return NULL;
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+ return p;
}
EXPORT_SYMBOL(lock_rename);
@@ -2906,6 +2900,63 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
}
EXPORT_SYMBOL(unlock_rename);
+/**
+ * mode_strip_umask - handle vfs umask stripping
+ * @dir: parent directory of the new inode
+ * @mode: mode of the new inode to be created in @dir
+ *
+ * Umask stripping depends on whether or not the filesystem supports POSIX
+ * ACLs. If the filesystem doesn't support it umask stripping is done directly
+ * in here. If the filesystem does support POSIX ACLs umask stripping is
+ * deferred until the filesystem calls posix_acl_create().
+ *
+ * Returns: mode
+ */
+static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
+{
+ if (!IS_POSIXACL(dir))
+ mode &= ~current_umask();
+ return mode;
+}
+
+/**
+ * vfs_prepare_mode - prepare the mode to be used for a new inode
+ * @dir: parent directory of the new inode
+ * @mode: mode of the new inode
+ * @mask_perms: allowed permission by the vfs
+ * @type: type of file to be created
+ *
+ * This helper consolidates and enforces vfs restrictions on the @mode of a new
+ * object to be created.
+ *
+ * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
+ * the kernel documentation for mode_strip_umask()). Moving umask stripping
+ * after setgid stripping allows the same ordering for both non-POSIX ACL and
+ * POSIX ACL supporting filesystems.
+ *
+ * Note that it's currently valid for @type to be 0 if a directory is created.
+ * Filesystems raise that flag individually and we need to check whether each
+ * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
+ * non-zero type.
+ *
+ * Returns: mode to be passed to the filesystem
+ */
+static inline umode_t vfs_prepare_mode(const struct inode *dir, umode_t mode,
+ umode_t mask_perms, umode_t type)
+{
+ mode = mode_strip_sgid(dir, mode);
+ mode = mode_strip_umask(dir, mode);
+
+ /*
+ * Apply the vfs mandated allowed permission mask and set the type of
+ * file to be created before we call into the filesystem.
+ */
+ mode &= (mask_perms & ~S_IFMT);
+ mode |= (type & S_IFMT);
+
+ return mode;
+}
+
int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool want_excl)
{
@@ -2915,8 +2966,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (!dir->i_op->create)
return -EACCES; /* shouldn't it be ENOSYS? */
- mode &= S_IALLUGO;
- mode |= S_IFREG;
+
+ mode = vfs_prepare_mode(dir, mode, S_IALLUGO, S_IFREG);
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
@@ -3186,8 +3237,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
* O_EXCL open we want to return EEXIST not EROFS).
*/
if (open_flag & O_CREAT) {
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
+ mode = vfs_prepare_mode(dir->d_inode, mode, mode, mode);
if (unlikely(!got_write)) {
create_error = -EROFS;
open_flag &= ~O_CREAT;
@@ -3463,8 +3513,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
child = d_alloc(dentry, &slash_name);
if (unlikely(!child))
goto out_err;
- if (!IS_POSIXACL(dir))
- mode &= ~current_umask();
+ mode = vfs_prepare_mode(dir, mode, mode, mode);
error = dir->i_op->tmpfile(dir, child, mode);
if (error)
goto out_err;
@@ -3723,6 +3772,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
if (!dir->i_op->mknod)
return -EPERM;
+ mode = vfs_prepare_mode(dir, mode, mode, mode);
error = devcgroup_inode_mknod(mode, dev);
if (error)
return error;
@@ -3771,9 +3821,8 @@ retry:
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (!IS_POSIXACL(path.dentry->d_inode))
- mode &= ~current_umask();
- error = security_path_mknod(&path, dentry, mode, dev);
+ error = security_path_mknod(&path, dentry,
+ mode_strip_umask(path.dentry->d_inode, mode), dev);
if (error)
goto out;
switch (mode & S_IFMT) {
@@ -3821,7 +3870,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!dir->i_op->mkdir)
return -EPERM;
- mode &= (S_IRWXUGO|S_ISVTX);
+ mode = vfs_prepare_mode(dir, mode, S_IRWXUGO | S_ISVTX, 0);
error = security_inode_mkdir(dir, dentry, mode);
if (error)
return error;
@@ -3848,9 +3897,8 @@ retry:
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (!IS_POSIXACL(path.dentry->d_inode))
- mode &= ~current_umask();
- error = security_path_mkdir(&path, dentry, mode);
+ error = security_path_mkdir(&path, dentry,
+ mode_strip_umask(path.dentry->d_inode, mode));
if (!error)
error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
done_path_create(&path, dentry);
@@ -4383,11 +4431,12 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
*
* a) we can get into loop creation.
* b) race potential - two innocent renames can create a loop together.
- * That's where 4.4 screws up. Current fix: serialization on
+ * That's where 4.4BSD screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
- * c) we have to lock _four_ objects - parents and victim (if it exists),
- * and source.
+ * c) we may have to lock up to _four_ objects - parents and victim (if it exists),
+ * and source (if it's a non-directory or a subdirectory that moves to
+ * different parent).
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -4416,6 +4465,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
struct name_snapshot old_name;
+ bool lock_old_subdir, lock_new_subdir;
if (source == target)
return 0;
@@ -4465,15 +4515,32 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
take_dentry_name_snapshot(&old_name, old_dentry);
dget(new_dentry);
/*
- * Lock all moved children. Moved directories may need to change parent
- * pointer so they need the lock to prevent against concurrent
- * directory changes moving parent pointer. For regular files we've
- * historically always done this. The lockdep locking subclasses are
- * somewhat arbitrary but RENAME_EXCHANGE in particular can swap
- * regular files and directories so it's difficult to tell which
- * subclasses to use.
+ * Lock children.
+ * The source subdirectory needs to be locked on cross-directory
+ * rename or cross-directory exchange since its parent changes.
+ * The target subdirectory needs to be locked on cross-directory
+ * exchange due to parent change and on any rename due to becoming
+ * a victim.
+ * Non-directories need locking in all cases (for NFS reasons);
+ * they get locked after any subdirectories (in inode address order).
+ *
+ * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE.
+ * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex.
*/
- lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
+ lock_old_subdir = new_dir != old_dir;
+ lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE);
+ if (is_dir) {
+ if (lock_old_subdir)
+ inode_lock_nested(source, I_MUTEX_CHILD);
+ if (target && (!new_is_dir || lock_new_subdir))
+ inode_lock(target);
+ } else if (new_is_dir) {
+ if (lock_new_subdir)
+ inode_lock_nested(target, I_MUTEX_CHILD);
+ inode_lock(source);
+ } else {
+ lock_two_nondirectories(source, target);
+ }
error = -EBUSY;
if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4517,8 +4584,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
d_exchange(old_dentry, new_dentry);
}
out:
- inode_unlock(source);
- if (target)
+ if (!is_dir || lock_old_subdir)
+ inode_unlock(source);
+ if (target && (!new_is_dir || lock_new_subdir))
inode_unlock(target);
dput(new_dentry);
if (!error) {
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index effaa4247b91..c0f2e1751c33 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -169,10 +169,10 @@ static int __init root_nfs_cat(char *dest, const char *src,
size_t len = strlen(dest);
if (len && dest[len - 1] != ',')
- if (strlcat(dest, ",", destlen) > destlen)
+ if (strlcat(dest, ",", destlen) >= destlen)
return -1;
- if (strlcat(dest, src, destlen) > destlen)
+ if (strlcat(dest, src, destlen) >= destlen)
return -1;
return 0;
}
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index e2a5320f2718..b9c759addd50 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
static int nilfs_dat_prepare_entry(struct inode *dat,
struct nilfs_palloc_req *req, int create)
{
- return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
- create, &req->pr_entry_bh);
+ int ret;
+
+ ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
+ create, &req->pr_entry_bh);
+ if (unlikely(ret == -ENOENT)) {
+ nilfs_msg(dat->i_sb, KERN_ERR,
+ "DAT doesn't have a block to manage vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ /*
+ * Return internal code -EINVAL to notify bmap layer of
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
static void nilfs_dat_commit_entry(struct inode *dat,
@@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct inode *dat,
int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
{
- int ret;
-
- ret = nilfs_dat_prepare_entry(dat, req, 0);
- WARN_ON(ret == -ENOENT);
- return ret;
+ return nilfs_dat_prepare_entry(dat, req, 0);
}
void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
@@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
int ret;
ret = nilfs_dat_prepare_entry(dat, req, 0);
- if (ret < 0) {
- WARN_ON(ret == -ENOENT);
+ if (ret < 0)
return ret;
- }
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 64bc81363c6c..3802b42e1cb4 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- wait_for_stable_page(page);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ wait_on_page_writeback(page);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 140b663e91c7..18feb9c7c706 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
return -EIO;
kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index fdcbed6ee832..7d1860d33723 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1702,7 +1702,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
lock_page(bd_page);
@@ -1713,6 +1712,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
}
break;
}
+ set_buffer_async_write(bh);
if (bh->b_page != fs_page) {
nilfs_begin_page_io(fs_page);
fs_page = bh->b_page;
@@ -1798,7 +1798,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
@@ -1807,6 +1806,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
}
break;
}
+ clear_buffer_async_write(bh);
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, err);
fs_page = bh->b_page;
@@ -1894,8 +1894,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Redirected));
- set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
@@ -1903,6 +1904,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
update_sr = true;
break;
}
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, 0);
fs_page = bh->b_page;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index cd6a21439826..ba574fade6f0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -198,6 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
* callers. */
if (S_ISDIR(mode))
set_nlink(inode, 2);
+ mode = mode_strip_sgid(dir, mode);
inode_init_owner(inode, dir, mode);
status = dquot_initialize(inode);
if (status)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 65cbc8a60ca3..9f83d8eba0e6 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -563,6 +563,7 @@ static int ramoops_init_przs(const char *name,
}
zone_sz = mem_sz / *cnt;
+ zone_sz = ALIGN_DOWN(zone_sz, 2);
if (!zone_sz) {
dev_err(dev, "%s zone size == 0\n", name);
goto fail;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d9da4a8c4317..a7ddb874912d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -399,15 +399,17 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
/* Dirtify all the dquots - this can block when journalling */
-static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
+static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots)
{
int ret, err, cnt;
+ struct dquot *dquot;
ret = err = 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquot[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (dquot)
/* Even in case of error we have to continue */
- ret = mark_dquot_dirty(dquot[cnt]);
+ ret = mark_dquot_dirty(dquot);
if (!err)
err = ret;
}
@@ -995,14 +997,15 @@ out:
}
EXPORT_SYMBOL(dqget);
-static inline struct dquot **i_dquot(struct inode *inode)
+static inline struct dquot __rcu **i_dquot(struct inode *inode)
{
- return inode->i_sb->s_op->get_dquots(inode);
+ /* Force __rcu for now until filesystems are fixed */
+ return (struct dquot __rcu **)inode->i_sb->s_op->get_dquots(inode);
}
static int dqinit_needed(struct inode *inode, int type)
{
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
int cnt;
if (IS_NOQUOTA(inode))
@@ -1075,59 +1078,7 @@ out:
return err;
}
-/*
- * Remove references to dquots from inode and add dquot to list for freeing
- * if we have the last reference to dquot
- */
-static void remove_inode_dquot_ref(struct inode *inode, int type,
- struct list_head *tofree_head)
-{
- struct dquot **dquots = i_dquot(inode);
- struct dquot *dquot = dquots[type];
-
- if (!dquot)
- return;
-
- dquots[type] = NULL;
- if (list_empty(&dquot->dq_free)) {
- /*
- * The inode still has reference to dquot so it can't be in the
- * free list
- */
- spin_lock(&dq_list_lock);
- list_add(&dquot->dq_free, tofree_head);
- spin_unlock(&dq_list_lock);
- } else {
- /*
- * Dquot is already in a list to put so we won't drop the last
- * reference here.
- */
- dqput(dquot);
- }
-}
-
-/*
- * Free list of dquots
- * Dquots are removed from inodes and no new references can be got so we are
- * the only ones holding reference
- */
-static void put_dquot_list(struct list_head *tofree_head)
-{
- struct list_head *act_head;
- struct dquot *dquot;
-
- act_head = tofree_head->next;
- while (act_head != tofree_head) {
- dquot = list_entry(act_head, struct dquot, dq_free);
- act_head = act_head->next;
- /* Remove dquot from the list so we won't have problems... */
- list_del_init(&dquot->dq_free);
- dqput(dquot);
- }
-}
-
-static void remove_dquot_ref(struct super_block *sb, int type,
- struct list_head *tofree_head)
+static void remove_dquot_ref(struct super_block *sb, int type)
{
struct inode *inode;
#ifdef CONFIG_QUOTA_DEBUG
@@ -1144,11 +1095,18 @@ static void remove_dquot_ref(struct super_block *sb, int type,
*/
spin_lock(&dq_data_lock);
if (!IS_NOQUOTA(inode)) {
+ struct dquot __rcu **dquots = i_dquot(inode);
+ struct dquot *dquot = srcu_dereference_check(
+ dquots[type], &dquot_srcu,
+ lockdep_is_held(&dq_data_lock));
+
#ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0))
reserved = 1;
#endif
- remove_inode_dquot_ref(inode, type, tofree_head);
+ rcu_assign_pointer(dquots[type], NULL);
+ if (dquot)
+ dqput(dquot);
}
spin_unlock(&dq_data_lock);
}
@@ -1165,13 +1123,8 @@ static void remove_dquot_ref(struct super_block *sb, int type,
/* Gather all references from inodes and drop them */
static void drop_dquot_ref(struct super_block *sb, int type)
{
- LIST_HEAD(tofree_head);
-
- if (sb->dq_op) {
- remove_dquot_ref(sb, type, &tofree_head);
- synchronize_srcu(&dquot_srcu);
- put_dquot_list(&tofree_head);
- }
+ if (sb->dq_op)
+ remove_dquot_ref(sb, type);
}
static inline
@@ -1504,7 +1457,8 @@ static int inode_quota_active(const struct inode *inode)
static int __dquot_initialize(struct inode *inode, int type)
{
int cnt, init_needed = 0;
- struct dquot **dquots, *got[MAXQUOTAS] = {};
+ struct dquot __rcu **dquots;
+ struct dquot *got[MAXQUOTAS] = {};
struct super_block *sb = inode->i_sb;
qsize_t rsv;
int ret = 0;
@@ -1579,7 +1533,7 @@ static int __dquot_initialize(struct inode *inode, int type)
if (!got[cnt])
continue;
if (!dquots[cnt]) {
- dquots[cnt] = got[cnt];
+ rcu_assign_pointer(dquots[cnt], got[cnt]);
got[cnt] = NULL;
/*
* Make quota reservation system happy if someone
@@ -1587,12 +1541,16 @@ static int __dquot_initialize(struct inode *inode, int type)
*/
rsv = inode_get_rsv_space(inode);
if (unlikely(rsv)) {
+ struct dquot *dquot = srcu_dereference_check(
+ dquots[cnt], &dquot_srcu,
+ lockdep_is_held(&dq_data_lock));
+
spin_lock(&inode->i_lock);
/* Get reservation again under proper lock */
rsv = __inode_get_rsv_space(inode);
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- dquots[cnt]->dq_dqb.dqb_rsvspace += rsv;
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ spin_lock(&dquot->dq_dqb_lock);
+ dquot->dq_dqb.dqb_rsvspace += rsv;
+ spin_unlock(&dquot->dq_dqb_lock);
spin_unlock(&inode->i_lock);
}
}
@@ -1614,7 +1572,7 @@ EXPORT_SYMBOL(dquot_initialize);
bool dquot_initialize_needed(struct inode *inode)
{
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
int i;
if (!inode_quota_active(inode))
@@ -1639,13 +1597,14 @@ EXPORT_SYMBOL(dquot_initialize_needed);
static void __dquot_drop(struct inode *inode)
{
int cnt;
- struct dquot **dquots = i_dquot(inode);
+ struct dquot __rcu **dquots = i_dquot(inode);
struct dquot *put[MAXQUOTAS];
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- put[cnt] = dquots[cnt];
- dquots[cnt] = NULL;
+ put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu,
+ lockdep_is_held(&dq_data_lock));
+ rcu_assign_pointer(dquots[cnt], NULL);
}
spin_unlock(&dq_data_lock);
dqput_all(put);
@@ -1653,7 +1612,7 @@ static void __dquot_drop(struct inode *inode)
void dquot_drop(struct inode *inode)
{
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
int cnt;
if (IS_NOQUOTA(inode))
@@ -1726,7 +1685,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
int reserve = flags & DQUOT_SPACE_RESERVE;
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
if (!inode_quota_active(inode)) {
if (reserve) {
@@ -1746,27 +1706,26 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
index = srcu_read_lock(&dquot_srcu);
spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
if (reserve) {
- ret = dquot_add_space(dquots[cnt], 0, number, flags,
- &warn[cnt]);
+ ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]);
} else {
- ret = dquot_add_space(dquots[cnt], number, 0, flags,
- &warn[cnt]);
+ ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]);
}
if (ret) {
/* Back out changes we already did */
for (cnt--; cnt >= 0; cnt--) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- spin_lock(&dquots[cnt]->dq_dqb_lock);
+ spin_lock(&dquot->dq_dqb_lock);
if (reserve)
- dquot_free_reserved_space(dquots[cnt],
- number);
+ dquot_free_reserved_space(dquot, number);
else
- dquot_decr_space(dquots[cnt], number);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ dquot_decr_space(dquot, number);
+ spin_unlock(&dquot->dq_dqb_lock);
}
spin_unlock(&inode->i_lock);
goto out_flush_warn;
@@ -1796,7 +1755,8 @@ int dquot_alloc_inode(struct inode *inode)
{
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
+ struct dquot *dquot;
if (!inode_quota_active(inode))
return 0;
@@ -1807,17 +1767,19 @@ int dquot_alloc_inode(struct inode *inode)
index = srcu_read_lock(&dquot_srcu);
spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]);
+ ret = dquot_add_inodes(dquot, 1, &warn[cnt]);
if (ret) {
for (cnt--; cnt >= 0; cnt--) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
/* Back out changes we already did */
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- dquot_decr_inodes(dquots[cnt], 1);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ spin_lock(&dquot->dq_dqb_lock);
+ dquot_decr_inodes(dquot, 1);
+ spin_unlock(&dquot->dq_dqb_lock);
}
goto warn_put_all;
}
@@ -1838,7 +1800,8 @@ EXPORT_SYMBOL(dquot_alloc_inode);
*/
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
int cnt, index;
if (!inode_quota_active(inode)) {
@@ -1854,9 +1817,8 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquots[cnt]) {
- struct dquot *dquot = dquots[cnt];
-
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (dquot) {
spin_lock(&dquot->dq_dqb_lock);
if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number))
number = dquot->dq_dqb.dqb_rsvspace;
@@ -1880,7 +1842,8 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
*/
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
int cnt, index;
if (!inode_quota_active(inode)) {
@@ -1896,9 +1859,8 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquots[cnt]) {
- struct dquot *dquot = dquots[cnt];
-
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (dquot) {
spin_lock(&dquot->dq_dqb_lock);
if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
number = dquot->dq_dqb.dqb_curspace;
@@ -1924,7 +1886,8 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
{
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
int reserve = flags & DQUOT_SPACE_RESERVE, index;
if (!inode_quota_active(inode)) {
@@ -1945,17 +1908,18 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
int wtype;
warn[cnt].w_type = QUOTA_NL_NOWARN;
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- wtype = info_bdq_free(dquots[cnt], number);
+ spin_lock(&dquot->dq_dqb_lock);
+ wtype = info_bdq_free(dquot, number);
if (wtype != QUOTA_NL_NOWARN)
- prepare_warning(&warn[cnt], dquots[cnt], wtype);
+ prepare_warning(&warn[cnt], dquot, wtype);
if (reserve)
- dquot_free_reserved_space(dquots[cnt], number);
+ dquot_free_reserved_space(dquot, number);
else
- dquot_decr_space(dquots[cnt], number);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ dquot_decr_space(dquot, number);
+ spin_unlock(&dquot->dq_dqb_lock);
}
if (reserve)
*inode_reserved_space(inode) -= number;
@@ -1979,7 +1943,8 @@ void dquot_free_inode(struct inode *inode)
{
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
+ struct dquot *dquot;
int index;
if (!inode_quota_active(inode))
@@ -1990,16 +1955,16 @@ void dquot_free_inode(struct inode *inode)
spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
-
warn[cnt].w_type = QUOTA_NL_NOWARN;
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- wtype = info_idq_free(dquots[cnt], 1);
+ spin_lock(&dquot->dq_dqb_lock);
+ wtype = info_idq_free(dquot, 1);
if (wtype != QUOTA_NL_NOWARN)
- prepare_warning(&warn[cnt], dquots[cnt], wtype);
- dquot_decr_inodes(dquots[cnt], 1);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ prepare_warning(&warn[cnt], dquot, wtype);
+ dquot_decr_inodes(dquot, 1);
+ spin_unlock(&dquot->dq_dqb_lock);
}
spin_unlock(&inode->i_lock);
mark_all_dquot_dirty(dquots);
@@ -2025,8 +1990,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
qsize_t cur_space;
qsize_t rsv_space = 0;
qsize_t inode_usage = 1;
+ struct dquot __rcu **dquots;
struct dquot *transfer_from[MAXQUOTAS] = {};
- int cnt, ret = 0;
+ int cnt, index, ret = 0;
char is_valid[MAXQUOTAS] = {};
struct dquot_warn warn_to[MAXQUOTAS];
struct dquot_warn warn_from_inodes[MAXQUOTAS];
@@ -2057,6 +2023,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
}
cur_space = __inode_get_bytes(inode);
rsv_space = __inode_get_rsv_space(inode);
+ dquots = i_dquot(inode);
/*
* Build the transfer_from list, check limits, and update usage in
* the target structures.
@@ -2071,7 +2038,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
if (!sb_has_quota_active(inode->i_sb, cnt))
continue;
is_valid[cnt] = 1;
- transfer_from[cnt] = i_dquot(inode)[cnt];
+ transfer_from[cnt] = srcu_dereference_check(dquots[cnt],
+ &dquot_srcu, lockdep_is_held(&dq_data_lock));
ret = dquot_add_inodes(transfer_to[cnt], inode_usage,
&warn_to[cnt]);
if (ret)
@@ -2110,13 +2078,21 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
rsv_space);
spin_unlock(&transfer_from[cnt]->dq_dqb_lock);
}
- i_dquot(inode)[cnt] = transfer_to[cnt];
+ rcu_assign_pointer(dquots[cnt], transfer_to[cnt]);
}
spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock);
- mark_all_dquot_dirty(transfer_from);
- mark_all_dquot_dirty(transfer_to);
+ /*
+ * These arrays are local and we hold dquot references so we don't need
+ * the srcu protection but still take dquot_srcu to avoid warning in
+ * mark_all_dquot_dirty().
+ */
+ index = srcu_read_lock(&dquot_srcu);
+ mark_all_dquot_dirty((struct dquot __rcu **)transfer_from);
+ mark_all_dquot_dirty((struct dquot __rcu **)transfer_to);
+ srcu_read_unlock(&dquot_srcu, index);
+
flush_warnings(warn_to);
flush_warnings(warn_from_inodes);
flush_warnings(warn_from_space);
diff --git a/fs/select.c b/fs/select.c
index 7716d9d5be1e..f405dc5adf3c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -475,7 +475,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in,
wait->_key |= POLLOUT_SET;
}
-static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
+static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
{
ktime_t expire, *to = NULL;
struct poll_wqueues table;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 88beae18e3c4..c63baff39ba9 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1206,6 +1206,8 @@ out_cancel:
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
out_inode:
+ /* Free inode->i_link before inode is marked as bad. */
+ fscrypt_free_inode(inode);
make_bad_inode(inode);
iput(inode);
out_fname: