diff options
Diffstat (limited to 'fs')
36 files changed, 238 insertions, 111 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index e98d6ea35ea8..bcf19dfb0af3 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -2388,7 +2388,6 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) struct btrfs_path *path = NULL; LIST_HEAD(dirty); struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; int loops = 0; spin_lock(&cur_trans->dirty_bgs_lock); @@ -2455,7 +2454,6 @@ again: cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; /* @@ -2556,7 +2554,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) int should_put; struct btrfs_path *path; struct list_head *io = &cur_trans->io_bgs; - int num_started = 0; path = btrfs_alloc_path(); if (!path) @@ -2614,7 +2611,6 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans) cache->io_ctl.inode = NULL; ret = btrfs_write_out_cache(trans, cache, path); if (ret == 0 && cache->io_ctl.inode) { - num_started++; should_put = 0; list_add_tail(&cache->io_list, io); } else { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index fcf1807cc8dd..c8def2bdf247 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -202,7 +202,7 @@ struct extent_buffer { */ struct extent_changeset { /* How many bytes are set/cleared in this operation */ - unsigned int bytes_changed; + u64 bytes_changed; /* Changed ranges */ struct ulist range_changed; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 344d18de1f08..8898682c9103 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4320,10 +4320,12 @@ static int balance_kthread(void *data) struct btrfs_fs_info *fs_info = data; int ret = 0; + sb_start_write(fs_info->sb); mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL); mutex_unlock(&fs_info->balance_mutex); + sb_end_write(fs_info->sb); return ret; } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b736acd3917b..a24bcbbb5033 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -97,6 +97,9 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len, if (rc != 1) return -EINVAL; + if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) + return -EINVAL; + rc = symlink_hash(link_len, link_str, md5_hash); if (rc) { cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 065cd2d1bdc6..db403c01d4d5 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -770,8 +770,12 @@ static loff_t ext2_max_size(int bits) res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); res += 1LL << (3*(bits-2)); + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; /* Does block tree limit file size? */ - if (res < upper_limit) + if (res + meta_blocks <= upper_limit) goto check_lfs; res = upper_limit; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index dcbd8ac8d471..0d62f05f8925 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2161,6 +2161,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2710,6 +2719,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 03dce3980d90..54f0d2c4c7d8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -848,6 +848,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, struct page *cp_page_1 = NULL, *cp_page_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; + unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, @@ -855,15 +856,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (err) return NULL; - if (le32_to_cpu(cp_block->cp_pack_total_block_count) > - sbi->blocks_per_seg) { + cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); + + if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1679f9c0b63b..773028921c48 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2467,8 +2467,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[DATA]); - else if (atomic_read(&sbi->wb_sync_req[DATA])) + else if (atomic_read(&sbi->wb_sync_req[DATA])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } if (__should_serialize_io(inode, wbc)) { mutex_lock(&sbi->writepages); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 16abb017e497..68d5c73c5ed1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -633,8 +633,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } - if (f2fs_check_nid_range(sbi, dni->ino)) + if (f2fs_check_nid_range(sbi, dni->ino)) { + f2fs_put_page(node_page, 1); return false; + } *nofs = ofs_of_node(node_page); source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2383d52b1f42..264c19e17779 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -777,6 +777,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0cd1d51dde06..3dc7cc3d6ac6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1995,8 +1995,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); - else if (atomic_read(&sbi->wb_sync_req[NODE])) + else if (atomic_read(&sbi->wb_sync_req[NODE])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } trace_f2fs_writepages(mapping->host, wbc, NODE); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a6c9c9fdf21d..6bd8a944902e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2066,7 +2066,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; - int ret; + int ret = 0; /* * Now when everything is written we can discard the pagecache so @@ -2077,8 +2077,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, type)) - return 0; + if (!sb_has_quota_active(sb, cnt)) + continue; inode_lock(dqopt->files[cnt]); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c056ed5c6df3..d7ec0ac87fc0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1429,7 +1429,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp) start = r.start >> bs_shift; end = start + (r.len >> bs_shift); - minlen = max_t(u64, r.minlen, + minlen = max_t(u64, r.minlen, sdp->sd_sb.sb_bsize); + minlen = max_t(u64, minlen, q->limits.discard_granularity) >> bs_shift; if (end <= start || minlen > sdp->sd_max_rg_data) diff --git a/fs/io_uring.c b/fs/io_uring.c index 478df7e10767..e73969fa96bc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -438,6 +438,22 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) return ctx; } +static void io_req_put_fs(struct io_kiocb *req) +{ + struct fs_struct *fs = req->fs; + + if (!fs) + return; + + spin_lock(&req->fs->lock); + if (--fs->users) + fs = NULL; + spin_unlock(&req->fs->lock); + if (fs) + free_fs_struct(fs); + req->fs = NULL; +} + static inline bool __io_sequence_defer(struct io_ring_ctx *ctx, struct io_kiocb *req) { @@ -695,6 +711,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) static void __io_free_req(struct io_kiocb *req) { + io_req_put_fs(req); if (req->file && !(req->flags & REQ_F_FIXED_FILE)) fput(req->file); percpu_ref_put(&req->ctx->refs); @@ -1701,16 +1718,7 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, ret = -EINTR; } - if (req->fs) { - struct fs_struct *fs = req->fs; - - spin_lock(&req->fs->lock); - if (--fs->users) - fs = NULL; - spin_unlock(&req->fs->lock); - if (fs) - free_fs_struct(fs); - } + io_req_put_fs(req); io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index b288c8ae1236..837cd55fd4c5 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -415,13 +415,15 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c) jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); ret = -EIO; - goto out_free; + goto out_sum_exit; } jffs2_calc_trigger_levels(c); return 0; + out_sum_exit: + jffs2_sum_exit(c); out_free: kvfree(c->blocks); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index ab8cdd9e9325..ad1eba809e7e 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -602,8 +602,8 @@ out_root: jffs2_free_ino_caches(c); jffs2_free_raw_node_refs(c); kvfree(c->blocks); - out_inohash: jffs2_clear_xattr_subsystem(c); + out_inohash: kfree(c->inocache_list); out_wbuf: jffs2_flash_cleanup(c); diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 0b1a7f68b712..f73904c08b39 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -136,7 +136,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (!s) { JFFS2_WARNING("Can't allocate memory for summary\n"); ret = -ENOMEM; - goto out; + goto out_buf; } } @@ -274,13 +274,15 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } ret = 0; out: + jffs2_sum_reset_collected(s); + kfree(s); + out_buf: if (buf_size) kfree(flashbuf); #ifndef __ECOS else mtd_unpoint(c->mtd, 0, c->mtd->size); #endif - kfree(s); return ret; } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index d862cfc3d3a8..62c4a5450cda 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -146,12 +146,13 @@ void jfs_evict_inode(struct inode *inode) dquot_initialize(inode); if (JFS_IP(inode)->fileset == FILESYSTEM_I) { + struct inode *ipimap = JFS_SBI(inode->i_sb)->ipimap; truncate_inode_pages_final(&inode->i_data); if (test_cflag(COMMIT_Freewmap, inode)) jfs_free_zero_link(inode); - if (JFS_SBI(inode->i_sb)->ipimap) + if (ipimap && JFS_IP(ipimap)->i_imap) diFree(inode); /* diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 6fe82ce8663e..79f3440e204b 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -148,6 +148,7 @@ static const s8 budtab[256] = { * 0 - success * -ENOMEM - insufficient memory * -EIO - i/o error + * -EINVAL - wrong bmap data */ int dbMount(struct inode *ipbmap) { @@ -179,6 +180,12 @@ int dbMount(struct inode *ipbmap) bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); + if (!bmp->db_numag) { + release_metapage(mp); + kfree(bmp); + return -EINVAL; + } + bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 7b09a9158e40..3fffc709afd4 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -447,7 +447,8 @@ static const struct address_space_operations minix_aops = { .writepage = minix_writepage, .write_begin = minix_write_begin, .write_end = generic_write_end, - .bmap = minix_bmap + .bmap = minix_bmap, + .direct_IO = noop_direct_IO }; static const struct inode_operations minix_symlink_inode_operations = { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b8a7b223b5b1..31922657e836 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -364,12 +364,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; + const struct pnfs_layoutdriver_type *ld = NULL; uint32_t i; __be32 res = 0; - struct nfs_client *clp = cps->clp; - struct nfs_server *server = NULL; - if (!clp) { + if (!cps->clp) { res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); goto out; } @@ -377,23 +376,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, for (i = 0; i < args->ndevs; i++) { struct cb_devicenotifyitem *dev = &args->devs[i]; - if (!server || - server->pnfs_curr_ld->id != dev->cbd_layout_type) { - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - if (server->pnfs_curr_ld && - server->pnfs_curr_ld->id == dev->cbd_layout_type) { - rcu_read_unlock(); - goto found; - } - rcu_read_unlock(); - continue; + if (!ld || ld->id != dev->cbd_layout_type) { + pnfs_put_layoutdriver(ld); + ld = pnfs_find_layoutdriver(dev->cbd_layout_type); + if (!ld) + continue; } - - found: - nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); + nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); } - + pnfs_put_layoutdriver(ld); out: kfree(args->devs); return res; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 90b5511c4c44..04d27f0ed39a 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -271,10 +271,6 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, n = ntohl(*p++); if (n == 0) goto out; - if (n > ULONG_MAX / sizeof(*args->devs)) { - status = htonl(NFS4ERR_BADXDR); - goto out; - } args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); if (!args->devs) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6b0bf4ebd812..0682037f972b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -272,8 +272,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (iov_iter_rw(iter) == READ) - return nfs_file_direct_read(iocb, iter); - return nfs_file_direct_write(iocb, iter); + return nfs_file_direct_read(iocb, iter, true); + return nfs_file_direct_write(iocb, iter, true); } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -524,6 +524,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -539,7 +540,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -581,12 +583,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (iter_is_iovec(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - nfs_start_io_direct(inode); + if (!swap) + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - nfs_end_io_direct(inode); + if (!swap) + nfs_end_io_direct(inode); if (requested > 0) { result = nfs_direct_wait(dreq); @@ -851,7 +855,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos) + loff_t pos, int ioflags) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -859,7 +863,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); @@ -937,6 +941,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -953,7 +958,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { ssize_t result = -EINVAL, requested; size_t count; @@ -967,7 +973,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); + if (swap) + /* bypass generic checks */ + result = iov_iter_count(iter); + else + result = generic_write_checks(iocb, iter); if (result <= 0) return result; count = result; @@ -997,16 +1007,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - nfs_start_io_direct(inode); + if (swap) { + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { + nfs_start_io_direct(inode); - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); + } - nfs_end_io_direct(inode); + nfs_end_io_direct(inode); + } if (requested > 0) { result = nfs_direct_wait(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 387a2cfa7e17..73415970af38 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -161,7 +161,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) ssize_t result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_read(iocb, to); + return nfs_file_direct_read(iocb, to, false); dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, @@ -609,7 +609,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from); + return nfs_file_direct_write(iocb, from, false); dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, iov_iter_count(from), (long long) iocb->ki_pos); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 887f9136a9db..af557dc2cfe1 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -953,7 +953,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_filename_inline(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; /* * The type (size and byte order) of nfscookie isn't defined in diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 23d75cddbb2e..84369d51353a 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1968,7 +1968,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, bool plus) { struct user_namespace *userns = rpc_userns(entry->server->client); - struct nfs_entry old = *entry; __be32 *p; int error; u64 new_cookie; @@ -1988,15 +1987,15 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_fileid3(xdr, &entry->ino); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_inline_filename3(xdr, &entry->name, &entry->len); if (unlikely(error)) - return error; + return -EAGAIN; error = decode_cookie3(xdr, &new_cookie); if (unlikely(error)) - return error; + return -EAGAIN; entry->d_type = DT_UNKNOWN; @@ -2004,7 +2003,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->fattr->valid = 0; error = decode_post_op_attr(xdr, entry->fattr, userns); if (unlikely(error)) - return error; + return -EAGAIN; if (entry->fattr->valid & NFS_ATTR_FATTR_V3) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); @@ -2019,11 +2018,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, return -EAGAIN; if (*p != xdr_zero) { error = decode_nfs_fh3(xdr, entry->fh); - if (unlikely(error)) { - if (error == -E2BIG) - goto out_truncated; - return error; - } + if (unlikely(error)) + return -EAGAIN; } else zero_nfs_fh3(entry->fh); } @@ -2032,11 +2028,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, entry->cookie = new_cookie; return 0; - -out_truncated: - dprintk("NFS: directory entry contains invalid file handle\n"); - *entry = old; - return -EAGAIN; } /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fb3d1532f11d..76baf7b441f3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7918,6 +7918,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) case -NFS4ERR_DEADSESSION: nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); + return; } if (args->dir == NFS4_CDFC4_FORE_OR_BOTH && res->dir != NFS4_CDFS4_BOTH) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index aa2caba38a01..1d2b81a233bb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -49,6 +49,7 @@ #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/jiffies.h> +#include <linux/sched/mm.h> #include <linux/sunrpc/clnt.h> @@ -2504,9 +2505,17 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { + unsigned int memflags; int status = 0; const char *section = "", *section_sep = ""; + /* + * State recovery can deadlock if the direct reclaim code tries + * start NFS writeback. So ensure memory allocations are all + * GFP_NOFS. + */ + memflags = memalloc_nofs_save(); + /* Ensure exclusive access to NFSv4 state */ do { clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); @@ -2600,6 +2609,7 @@ static void nfs4_state_manager(struct nfs_client *clp) clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); } + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); @@ -2616,6 +2626,7 @@ static void nfs4_state_manager(struct nfs_client *clp) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; + memflags = memalloc_nofs_save(); } while (refcount_read(&clp->cl_count) > 1 && !signalled()); goto out_drain; @@ -2627,6 +2638,7 @@ out_error: clp->cl_hostname, -status); ssleep(1); out_drain: + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1b512df1003f..0471b6e0da16 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -92,6 +92,17 @@ find_pnfs_driver(u32 id) return local; } +const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) +{ + return find_pnfs_driver(id); +} + +void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) +{ + if (ld) + module_put(ld->owner); +} + void unset_pnfs_layoutdriver(struct nfs_server *nfss) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 3d55edd6b25a..68339680bb7d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -226,6 +226,8 @@ struct pnfs_devicelist { extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); +extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); +extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ extern size_t max_response_pages(struct nfs_server *server); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 754c763374dd..4aca93e11af7 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -230,7 +230,7 @@ nfsd_proc_write(struct svc_rqst *rqstp) unsigned long cnt = argp->len; unsigned int nvecs; - dprintk("nfsd: WRITE %s %d bytes at %d\n", + dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h index ea7cca3a64b7..6251d8754c82 100644 --- a/fs/nfsd/xdr.h +++ b/fs/nfsd/xdr.h @@ -33,7 +33,7 @@ struct nfsd_readargs { struct nfsd_writeargs { svc_fh fh; __u32 offset; - int len; + __u32 len; struct kvec first; }; diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index ea18e4a2a691..cf222c9225d6 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1881,6 +1881,10 @@ int ntfs_read_inode_mount(struct inode *vi) } /* Now allocate memory for the attribute list. */ ni->attr_list_size = (u32)ntfs_attr_size(a); + if (!ni->attr_list_size) { + ntfs_error(sb, "Attr_list_size is zero"); + goto put_err_out; + } ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); if (!ni->attr_list) { ntfs_error(sb, "Not enough memory to allocate buffer " diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index eeb93f009b28..83a173feb698 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -361,15 +361,18 @@ static int do_tmpfile(struct inode *dir, struct dentry *dentry, { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); int err, instantiated = 0; struct fscrypt_name nm; /* - * Budget request settings: new dirty inode, new direntry, - * budget for dirtied inode will be released via writeback. + * Budget request settings: new inode, new direntry, changing the + * parent directory inode. + * Allocate budget separately for new dirtied inode, the budget will + * be released via writeback. */ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", @@ -439,6 +442,8 @@ out_inode: make_bad_inode(inode); if (!instantiated) iput(inode); + else if (whiteout) + iput(*whiteout); out_budg: ubifs_release_budget(c, &req); if (!instantiated) @@ -955,7 +960,8 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change; - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, + .dirtied_ino = 1}; struct fscrypt_name nm; /* @@ -1330,6 +1336,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, if (flags & RENAME_WHITEOUT) { union ubifs_dev_desc *dev = NULL; + struct ubifs_budget_req wht_req; dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); if (!dev) { @@ -1351,6 +1358,23 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, whiteout_ui->data = dev; whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); ubifs_assert(c, !whiteout_ui->dirty); + + memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); + wht_req.dirtied_ino = 1; + wht_req.dirtied_ino_d = ALIGN(whiteout_ui->data_len, 8); + /* + * To avoid deadlock between space budget (holds ui_mutex and + * waits wb work) and writeback work(waits ui_mutex), do space + * budget before ubifs inodes locked. + */ + err = ubifs_budget_space(c, &wht_req); + if (err) { + iput(whiteout); + goto out_release; + } + + /* Add the old_dentry size to the old_dir size. */ + old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); } lock_4_inodes(old_dir, new_dir, new_inode, whiteout); @@ -1425,18 +1449,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, } if (whiteout) { - struct ubifs_budget_req wht_req = { .dirtied_ino = 1, - .dirtied_ino_d = \ - ALIGN(ubifs_inode(whiteout)->data_len, 8) }; - - err = ubifs_budget_space(c, &wht_req); - if (err) { - kfree(whiteout_ui->data); - whiteout_ui->data_len = 0; - iput(whiteout); - goto out_release; - } - inc_nlink(whiteout); mark_inode_dirty(whiteout); diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index eae9cf5a57b0..89b671ad0f9a 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -846,16 +846,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->max_write_shift; + int m = n - 1; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n); + + if (m) { + /* '(n-1)<<c->max_write_shift < len' is always true. */ + m <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, m); + if (err) + goto out; + wbuf->offs += m; + aligned_len -= m; + len -= m; + written += m; + } + + /* + * The non-written len of buf may be less than 'n' because + * parameter 'len' is not 8 bytes aligned, so here we read + * min(len, n) bytes from buf. + */ + n = 1 << c->max_write_shift; + memcpy(wbuf->buf, buf + written, min(len, n)); + if (n > len) { + ubifs_assert(c, n - len < 8); + ubifs_pad(c, wbuf->buf + len, n - len); + } + + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); if (err) goto out; wbuf->offs += n; aligned_len -= n; - len -= n; + len -= min(len, n); written += n; } diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index eeb1be259888..2923d5a6a7c0 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -101,7 +101,7 @@ static int setflags(struct inode *inode, int flags) struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) |