diff options
Diffstat (limited to 'fs/nfs')
35 files changed, 471 insertions, 278 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 7ccc30a757ec..c66916d77b4a 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -140,7 +140,7 @@ config PNFS_BLOCK config PNFS_FLEXFILE_LAYOUT tristate depends on NFS_V4_1 && NFS_V3 - default m + default NFS_V4 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain" diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 06cb0c1d9aee..a2bca78b80ab 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -604,6 +604,8 @@ retry: nfs4_delete_deviceid(node->ld, node->nfs_client, id); goto retry; } + + nfs4_put_deviceid_node(node); return ERR_PTR(-ENODEV); } diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index dec5880ac6de..6e3a14fdff9c 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -422,7 +422,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, int ret, i; d->children = kcalloc(v->concat.volumes_count, - sizeof(struct pnfs_block_dev), GFP_KERNEL); + sizeof(struct pnfs_block_dev), gfp_mask); if (!d->children) return -ENOMEM; @@ -451,7 +451,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, int ret, i; d->children = kcalloc(v->stripe.volumes_count, - sizeof(struct pnfs_block_dev), GFP_KERNEL); + sizeof(struct pnfs_block_dev), gfp_mask); if (!d->children) return -ENOMEM; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 8f34daf85f70..5d5227ce4d91 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -168,7 +168,7 @@ struct cb_devicenotifyitem { }; struct cb_devicenotifyargs { - int ndevs; + uint32_t ndevs; struct cb_devicenotifyitem *devs; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index bcc51f131a49..f2a854805f0e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -364,12 +364,11 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; - int i; + const struct pnfs_layoutdriver_type *ld = NULL; + uint32_t i; __be32 res = 0; - struct nfs_client *clp = cps->clp; - struct nfs_server *server = NULL; - if (!clp) { + if (!cps->clp) { res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); goto out; } @@ -377,23 +376,15 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, for (i = 0; i < args->ndevs; i++) { struct cb_devicenotifyitem *dev = &args->devs[i]; - if (!server || - server->pnfs_curr_ld->id != dev->cbd_layout_type) { - rcu_read_lock(); - list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) - if (server->pnfs_curr_ld && - server->pnfs_curr_ld->id == dev->cbd_layout_type) { - rcu_read_unlock(); - goto found; - } - rcu_read_unlock(); - continue; + if (!ld || ld->id != dev->cbd_layout_type) { + pnfs_put_layoutdriver(ld); + ld = pnfs_find_layoutdriver(dev->cbd_layout_type); + if (!ld) + continue; } - - found: - nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); + nfs4_delete_deviceid(ld, cps->clp, &dev->cbd_dev_id); } - + pnfs_put_layoutdriver(ld); out: kfree(args->devs); return res; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a87a56273407..2f84c612838c 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -268,11 +268,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, void *argp) { struct cb_devicenotifyargs *args = argp; + uint32_t tmp, n, i; __be32 *p; __be32 status = 0; - u32 tmp; - int n, i; - args->ndevs = 0; /* Num of device notifications */ p = read_buf(xdr, sizeof(uint32_t)); @@ -281,12 +279,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, goto out; } n = ntohl(*p++); - if (n <= 0) - goto out; - if (n > ULONG_MAX / sizeof(*args->devs)) { - status = htonl(NFS4ERR_BADXDR); + if (n == 0) goto out; - } args->devs = kmalloc_array(n, sizeof(*args->devs), GFP_KERNEL); if (!args->devs) { @@ -339,19 +333,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, dev->cbd_immediate = 0; } - args->ndevs++; - dprintk("%s: type %d layout 0x%x immediate %d\n", __func__, dev->cbd_notify_type, dev->cbd_layout_type, dev->cbd_immediate); } + args->ndevs = n; + dprintk("%s: ndevs %d\n", __func__, args->ndevs); + return 0; +err: + kfree(args->devs); out: + args->devs = NULL; + args->ndevs = 0; dprintk("%s: status %d ndevs %d\n", __func__, ntohl(status), args->ndevs); return status; -err: - kfree(args->devs); - goto out; } static __be32 decode_sessionid(struct xdr_stream *xdr, @@ -991,7 +987,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) out_invalidcred: pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n"); - return rpc_autherr_badcred; + return svc_return_autherr(rqstp, rpc_autherr_badcred); } /* diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8808d2ce6a9c..f3bfc4d8559e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -180,6 +180,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) INIT_LIST_HEAD(&clp->cl_superblocks); clp->cl_rpcclient = ERR_PTR(-EINVAL); + clp->cl_flags = cl_init->init_flags; clp->cl_proto = cl_init->proto; clp->cl_net = get_net(cl_init->net); @@ -410,7 +411,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) if (cl_init->hostname == NULL) { WARN_ON(1); - return NULL; + return ERR_PTR(-EINVAL); } /* see if the client already exists */ @@ -430,7 +431,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); - new->cl_flags = cl_init->init_flags; return rpc_ops->init_client(new, cl_init); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 733fd9e4f0a1..10bc04af2882 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1626,6 +1626,24 @@ out: no_open: res = nfs_lookup(dir, dentry, lookup_flags); + if (!res) { + inode = d_inode(dentry); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) + res = ERR_PTR(-ENOTDIR); + else if (inode && S_ISREG(inode->i_mode)) + res = ERR_PTR(-EOPENSTALE); + } else if (!IS_ERR(res)) { + inode = d_inode(res); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { + dput(res); + res = ERR_PTR(-ENOTDIR); + } else if (inode && S_ISREG(inode->i_mode)) { + dput(res); + res = ERR_PTR(-EOPENSTALE); + } + } if (switched) { d_lookup_done(dentry); if (!res) @@ -2015,6 +2033,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) trace_nfs_link_enter(inode, dir, dentry); d_drop(dentry); + if (S_ISREG(inode->i_mode)) + nfs_sync_inode(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { ihold(inode); @@ -2103,6 +2123,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } + if (S_ISREG(old_inode->i_mode)) + nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e5da9d7fb69e..6a4083d550c6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -288,8 +288,8 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); if (iov_iter_rw(iter) == READ) - return nfs_file_direct_read(iocb, iter); - return nfs_file_direct_write(iocb, iter); + return nfs_file_direct_read(iocb, iter, true); + return nfs_file_direct_write(iocb, iter, true); } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -553,6 +553,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers into which to read data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct reads instead of calling * generic_file_aio_read() in order to avoid gfar's check to see if @@ -568,7 +569,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -610,12 +612,14 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter) if (iter_is_iovec(iter)) dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; - nfs_start_io_direct(inode); + if (!swap) + nfs_start_io_direct(inode); NFS_I(inode)->read_io += count; requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); - nfs_end_io_direct(inode); + if (!swap) + nfs_end_io_direct(inode); if (requested > 0) { result = nfs_direct_wait(dreq); @@ -884,7 +888,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos) + loff_t pos, int ioflags) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -892,7 +896,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); @@ -971,6 +975,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data + * @swap: flag indicating this is swap IO, not O_DIRECT IO * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -987,7 +992,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + bool swap) { ssize_t result = -EINVAL, requested; size_t count; @@ -1001,7 +1007,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); + if (swap) + /* bypass generic checks */ + result = iov_iter_count(iter); + else + result = generic_write_checks(iocb, iter); if (result <= 0) return result; count = result; @@ -1031,16 +1041,22 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - nfs_start_io_direct(inode); + if (swap) { + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_STABLE); + } else { + nfs_start_io_direct(inode); - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, + FLUSH_COND_STABLE); - if (mapping->nrpages) { - invalidate_inode_pages2_range(mapping, - pos >> PAGE_SHIFT, end); - } + if (mapping->nrpages) { + invalidate_inode_pages2_range(mapping, + pos >> PAGE_SHIFT, end); + } - nfs_end_io_direct(inode); + nfs_end_io_direct(inode); + } if (requested > 0) { result = nfs_direct_wait(dreq); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 29553fdba8af..62a86c414391 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -157,7 +157,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) ssize_t result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_read(iocb, to); + return nfs_file_direct_read(iocb, to, false); dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, @@ -606,7 +606,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) return result; if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from); + return nfs_file_direct_write(iocb, from, false); dprintk("NFS: write(%pD2, %zu@%Ld)\n", file, iov_iter_count(from), (long long) iocb->ki_pos); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 2478a69da0f0..015d39ac2c8f 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -717,7 +717,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (unlikely(!p)) goto out_err; fl->fh_array[i]->size = be32_to_cpup(p++); - if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { + if (fl->fh_array[i]->size > NFS_MAXFHSIZE) { printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); goto out_err; @@ -837,6 +837,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, return &fl->generic_hdr; } +static bool +filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) +{ + return flseg->num_fh > 1; +} + /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * @@ -857,6 +863,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) + return size; /* see if req and prev are in the same stripe */ if (prev) { diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index d8cba46a9395..bb10f2b21cc1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -101,7 +101,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) if (unlikely(!p)) return -ENOBUFS; fh->size = be32_to_cpup(p++); - if (fh->size > sizeof(struct nfs_fh)) { + if (fh->size > NFS_MAXFHSIZE) { printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", fh->size); return -EOVERFLOW; @@ -1189,6 +1189,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -EPFNOSUPPORT: case -EPROTONOSUPPORT: case -EOPNOTSUPP: + case -EINVAL: case -ECONNREFUSED: case -ECONNRESET: case -EHOSTDOWN: diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 8da239b6cc16..f1f0519f1ece 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -429,10 +429,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, goto out_fail; ds = mirror->mirror_ds->ds; + if (READ_ONCE(ds->ds_clp)) + goto out; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); - if (ds->ds_clp) - goto out; /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e4cd3a2fe698..592b95ab378b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -787,12 +787,9 @@ int nfs_getattr(const struct path *path, struct kstat *stat, goto out_no_update; /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME|STATX_MTIME)) && - S_ISREG(inode->i_mode)) { - err = filemap_write_and_wait(inode->i_mapping); - if (err) - goto out; - } + if ((request_mask & (STATX_CTIME | STATX_MTIME)) && + S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); /* * We may force a getattr if the user cares about atime. @@ -1038,6 +1035,7 @@ EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { filp->private_data = get_nfs_open_context(ctx); + set_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); if (list_empty(&ctx->list)) nfs_inode_attach_open_context(ctx); } @@ -1057,6 +1055,8 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; + if (!test_bit(NFS_CONTEXT_FILE_OPEN, &pos->flags)) + continue; ctx = get_nfs_open_context(pos); break; } @@ -1071,6 +1071,7 @@ void nfs_file_clear_open_context(struct file *filp) if (ctx) { struct inode *inode = d_inode(ctx->dentry); + clear_bit(NFS_CONTEXT_FILE_OPEN, &ctx->flags); /* * We fatal error on write before. Try to writeback * every page again. @@ -1607,10 +1608,10 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle); */ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) { - const struct nfs_inode *nfsi = NFS_I(inode); + unsigned long attr_gencount = NFS_I(inode)->attr_gencount; - return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || - ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); + return (long)(fattr->gencount - attr_gencount) > 0 || + (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0; } static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) @@ -2034,7 +2035,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo_timestamp = now; } /* Set the barrier to be more recent than this fattr */ - if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) + if ((long)(fattr->gencount - nfsi->attr_gencount) > 0) nfsi->attr_gencount = fattr->gencount; } @@ -2142,7 +2143,7 @@ static int nfsiod_start(void) { struct workqueue_struct *wq; dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (wq == NULL) return -ENOMEM; nfsiod_workqueue = wq; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a97b081ab843..de626edc0dde 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -578,12 +578,14 @@ extern int nfs4_test_session_trunk(struct rpc_clnt *, static inline struct inode *nfs_igrab_and_active(struct inode *inode) { - inode = igrab(inode); - if (inode != NULL && !nfs_sb_active(inode->i_sb)) { - iput(inode); - inode = NULL; + struct super_block *sb = inode->i_sb; + + if (sb && nfs_sb_active(sb)) { + if (igrab(inode)) + return inode; + nfs_sb_deactive(sb); } - return inode; + return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e5686be67be8..d57d453aecc2 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -30,9 +30,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -47,15 +47,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen; rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ec8a9efa268f..e302f8370b9b 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -346,7 +346,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, break; case NFS3_CREATE_UNCHECKED: - goto out; + goto out_release_acls; } nfs_fattr_init(data->res.dir_attr); nfs_fattr_init(data->res.fattr); @@ -695,7 +695,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, break; default: status = -EINVAL; - goto out; + goto out_release_acls; } status = nfs3_do_create(dir, dentry, data); diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9956453aa6ff..0ed419bb02b0 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -34,6 +34,7 @@ */ #define NFS3_fhandle_sz (1+16) #define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ +#define NFS3_post_op_fh_sz (1+NFS3_fh_sz) #define NFS3_sattr_sz (15) #define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) #define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) @@ -71,7 +72,7 @@ #define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1) #define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3) #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) -#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_createres_sz (1+NFS3_post_op_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 526441de89c1..5b1d452e640b 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -59,7 +59,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, loff_t offset, loff_t len) { - struct nfs_server *server = NFS_SERVER(file_inode(filep)); + struct inode *inode = file_inode(filep); + struct nfs_server *server = NFS_SERVER(inode); struct nfs4_exception exception = { }; struct nfs_lock_context *lock; int err; @@ -68,9 +69,13 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, if (IS_ERR(lock)) return PTR_ERR(lock); - exception.inode = file_inode(filep); + exception.inode = inode; exception.state = lock->open_context->state; + err = nfs_sync_inode(inode); + if (err) + goto out; + do { err = _nfs42_proc_fallocate(msg, filep, lock, offset, len); if (err == -ENOTSUPP) { @@ -79,7 +84,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); - +out: nfs_put_lock_context(lock); return err; } @@ -117,16 +122,13 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len) return -EOPNOTSUPP; inode_lock(inode); - err = nfs_sync_inode(inode); - if (err) - goto out_unlock; err = nfs42_proc_fallocate(&msg, filep, offset, len); if (err == 0) truncate_pagecache_range(inode, offset, (offset + len) -1); if (err == -EOPNOTSUPP) NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; -out_unlock: + inode_unlock(inode); return err; } @@ -293,8 +295,9 @@ static ssize_t _nfs42_proc_copy(struct file *src, goto out; } - truncate_pagecache_range(dst_inode, pos_dst, - pos_dst + res->write_res.count); + WARN_ON_ONCE(invalidate_inode_pages2_range(dst_inode->i_mapping, + pos_dst >> PAGE_SHIFT, + (pos_dst + res->write_res.count - 1) >> PAGE_SHIFT)); status = res->write_res.count; out: @@ -498,7 +501,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep, if (status) return status; - return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); + if (whence == SEEK_DATA && res.sr_eof) + return -NFS4ERR_NXIO; + else + return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); } loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index ec9803088f6b..eee011de3f58 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -707,8 +707,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, status = decode_clone(xdr); if (status) goto out; - status = decode_getfattr(xdr, res->dst_fattr, res->server); - + decode_getfattr(xdr, res->dst_fattr, res->server); out: res->rpc_status = status; return status; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5b61520dce88..2d438318681a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -190,7 +190,7 @@ struct nfs4_state { unsigned int n_wronly; /* Number of write-only references */ unsigned int n_rdwr; /* Number of read/write references */ fmode_t state; /* State on the server (R,W, or RW) */ - atomic_t count; + refcount_t count; wait_queue_head_t waitq; }; @@ -201,6 +201,7 @@ struct nfs4_exception { struct inode *inode; nfs4_stateid *stateid; long timeout; + unsigned char task_is_privileged : 1; unsigned char delay : 1, recovering : 1, retry : 1; @@ -275,7 +276,8 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); - +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net); /* nfs4proc.c */ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_async_handle_error(struct rpc_task *task, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index faaabbedc891..50d352011ea6 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -191,8 +191,11 @@ void nfs40_shutdown_client(struct nfs_client *clp) struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) { - int err; + char buf[INET6_ADDRSTRLEN + 1]; + const char *ip_addr = cl_init->ip_addr; struct nfs_client *clp = nfs_alloc_client(cl_init); + int err; + if (IS_ERR(clp)) return clp; @@ -216,6 +219,44 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) init_waitqueue_head(&clp->cl_lock_waitq); #endif INIT_LIST_HEAD(&clp->pending_cb_stateids); + + if (cl_init->minorversion != 0) + __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); + __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); + + /* + * Set up the connection to the server before we add add to the + * global list. + */ + err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I); + if (err == -EINVAL) + err = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX); + if (err < 0) + goto error; + + /* If no clientaddr= option was specified, find a usable cb address */ + if (ip_addr == NULL) { + struct sockaddr_storage cb_addr; + struct sockaddr *sap = (struct sockaddr *)&cb_addr; + + err = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); + if (err < 0) + goto error; + err = rpc_ntop(sap, buf, sizeof(buf)); + if (err < 0) + goto error; + ip_addr = (const char *)buf; + } + strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); + + err = nfs_idmap_new(clp); + if (err < 0) { + dprintk("%s: failed to create idmapper. Error = %d\n", + __func__, err); + goto error; + } + __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); return clp; error: @@ -299,6 +340,7 @@ int nfs40_init_client(struct nfs_client *clp) ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE, "NFSv4.0 transport Slot table"); if (ret) { + nfs4_shutdown_slot_table(tbl); kfree(tbl); return ret; } @@ -368,8 +410,6 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct nfs_client_initdata *cl_init) { - char buf[INET6_ADDRSTRLEN + 1]; - const char *ip_addr = cl_init->ip_addr; struct nfs_client *old; int error; @@ -377,43 +417,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, /* the client is initialised already */ return clp; - /* Check NFS protocol revision and initialize RPC op vector */ - clp->rpc_ops = &nfs_v4_clientops; - - if (clp->cl_minorversion != 0) - __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); - __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); - __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); - - error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_GSS_KRB5I); - if (error == -EINVAL) - error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX); - if (error < 0) - goto error; - - /* If no clientaddr= option was specified, find a usable cb address */ - if (ip_addr == NULL) { - struct sockaddr_storage cb_addr; - struct sockaddr *sap = (struct sockaddr *)&cb_addr; - - error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); - if (error < 0) - goto error; - error = rpc_ntop(sap, buf, sizeof(buf)); - if (error < 0) - goto error; - ip_addr = (const char *)buf; - } - strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); - - error = nfs_idmap_new(clp); - if (error < 0) { - dprintk("%s: failed to create idmapper. Error = %d\n", - __func__, error); - goto error; - } - __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - error = nfs4_init_client_minor_version(clp); if (error < 0) goto error; @@ -431,8 +434,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, */ nfs_mark_client_ready(clp, -EPERM); } - nfs_put_client(clp); clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags); + nfs_put_client(clp); return old; error: @@ -1271,8 +1274,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, } nfs_put_client(clp); - if (server->nfs_client->cl_hostname == NULL) + if (server->nfs_client->cl_hostname == NULL) { server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (server->nfs_client->cl_hostname == NULL) + return -ENOMEM; + } nfs_server_insert_lists(server); return nfs_probe_destination(server); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 75d3cf86f172..e053a883d08d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -148,7 +148,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence) case SEEK_HOLE: case SEEK_DATA: ret = nfs42_proc_llseek(filep, offset, whence); - if (ret != -ENOTSUPP) + if (ret != -EOPNOTSUPP) return ret; /* Fall through */ default: diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index bf34ddaa2ad7..c1c26b06764f 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -547,22 +547,20 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, return true; } -static void -nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) +static void nfs_idmap_complete_pipe_upcall(struct idmap_legacy_upcalldata *data, + int ret) { - struct key *authkey = idmap->idmap_upcall_data->authkey; - - kfree(idmap->idmap_upcall_data); - idmap->idmap_upcall_data = NULL; - complete_request_key(authkey, ret); - key_put(authkey); + complete_request_key(data->authkey, ret); + key_put(data->authkey); + kfree(data); } -static void -nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) +static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap, + struct idmap_legacy_upcalldata *data, + int ret) { - if (idmap->idmap_upcall_data != NULL) - nfs_idmap_complete_pipe_upcall_locked(idmap, ret); + if (cmpxchg(&idmap->idmap_upcall_data, data, NULL) == data) + nfs_idmap_complete_pipe_upcall(data, ret); } static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) @@ -599,7 +597,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - nfs_idmap_abort_pipe_upcall(idmap, ret); + nfs_idmap_abort_pipe_upcall(idmap, data, ret); return ret; out2: @@ -655,6 +653,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct request_key_auth *rka; struct rpc_inode *rpci = RPC_I(file_inode(filp)); struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_legacy_upcalldata *data; struct key *authkey; struct idmap_msg im; size_t namelen_in; @@ -664,10 +663,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) * will have been woken up and someone else may now have used * idmap_key_cons - so after this point we may no longer touch it. */ - if (idmap->idmap_upcall_data == NULL) + data = xchg(&idmap->idmap_upcall_data, NULL); + if (data == NULL) goto out_noupcall; - authkey = idmap->idmap_upcall_data->authkey; + authkey = data->authkey; rka = get_request_key_auth(authkey); if (mlen != sizeof(im)) { @@ -689,18 +689,17 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { ret = -EINVAL; goto out; -} + } - ret = nfs_idmap_read_and_verify_message(&im, - &idmap->idmap_upcall_data->idmap_msg, - rka->target_key, authkey); + ret = nfs_idmap_read_and_verify_message(&im, &data->idmap_msg, + rka->target_key, authkey); if (ret >= 0) { key_set_timeout(rka->target_key, nfs_idmap_cache_timeout); ret = mlen; } out: - nfs_idmap_complete_pipe_upcall_locked(idmap, ret); + nfs_idmap_complete_pipe_upcall(data, ret); out_noupcall: return ret; } @@ -714,7 +713,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct idmap *idmap = data->idmap; if (msg->errno) - nfs_idmap_abort_pipe_upcall(idmap, msg->errno); + nfs_idmap_abort_pipe_upcall(idmap, data, msg->errno); } static void @@ -722,8 +721,11 @@ idmap_release_pipe(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_legacy_upcalldata *data; - nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); + data = xchg(&idmap->idmap_upcall_data, NULL); + if (data) + nfs_idmap_complete_pipe_upcall(data, -EPIPE); } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid) diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 24f06dcc2b08..936c412be28e 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -121,8 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry, return 0; } -static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct net *net) +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net) { ssize_t ret; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2a2ff3f22a4..c9db9a0fc733 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -163,6 +163,7 @@ static int nfs4_map_errors(int err) case -NFS4ERR_RESOURCE: case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: return -EREMOTEIO; case -NFS4ERR_WRONGSEC: case -NFS4ERR_WRONG_CRED: @@ -509,6 +510,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_GRACE: case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: exception->delay = 1; return 0; @@ -550,6 +552,8 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ goto out_retry; } if (exception->recovering) { + if (exception->task_is_privileged) + return -EDEADLOCK; ret = nfs4_wait_clnt_recover(clp); if (test_bit(NFS_MIG_FAILED, &server->mig_status)) return -EIO; @@ -575,6 +579,8 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, goto out_retry; } if (exception->recovering) { + if (exception->task_is_privileged) + return -EDEADLOCK; rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); @@ -1788,7 +1794,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) out: return ERR_PTR(ret); out_return_state: - atomic_inc(&state->count); + refcount_inc(&state->count); return state; } @@ -1847,8 +1853,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (!data->rpc_done) { if (data->rpc_status) return ERR_PTR(data->rpc_status); - /* cached opens have already been processed */ - goto update; + return nfs4_try_open_cached(data); } ret = nfs_refresh_inode(inode, &data->f_attr); @@ -1857,10 +1862,11 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); -update: - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); - atomic_inc(&state->count); + + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); + refcount_inc(&state->count); return state; } @@ -1898,7 +1904,7 @@ nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data) return ERR_CAST(inode); if (data->state != NULL && data->state->inode == inode) { state = data->state; - atomic_inc(&state->count); + refcount_inc(&state->count); } else state = nfs4_get_open_state(inode, data->owner); iput(inode); @@ -1924,8 +1930,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } out: nfs_release_seqid(data->o_arg.seqid); return state; @@ -1971,23 +1980,23 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; - atomic_inc(&state->count); + refcount_inc(&state->count); return opendata; } static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, - fmode_t fmode) + fmode_t fmode) { struct nfs4_state *newstate; + struct nfs_server *server = NFS_SB(opendata->dentry->d_sb); + int openflags = opendata->o_arg.open_flags; int ret; if (!nfs4_mode_match_open_stateid(opendata->state, fmode)) return 0; - opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; - opendata->o_arg.share_access = nfs4_map_atomic_open_share( - NFS_SB(opendata->dentry->d_sb), - fmode, 0); + opendata->o_arg.share_access = + nfs4_map_atomic_open_share(server, fmode, openflags); memset(&opendata->o_res, 0, sizeof(opendata->o_res)); memset(&opendata->c_res, 0, sizeof(opendata->c_res)); nfs4_init_opendata_res(opendata); @@ -2565,10 +2574,15 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_FH); + opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); + /* + * We're not recovering a delegation, so ask for no delegation. + * Otherwise the recovery thread could deadlock with an outstanding + * delegation return. + */ + opendata->o_arg.open_flags = O_DIRECT; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) d_drop(ctx->dentry); @@ -2916,8 +2930,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: - if (!opendata->cancelled) + if (!opendata->cancelled) { + if (opendata->lgp) { + nfs4_lgopen_release(opendata->lgp); + opendata->lgp = NULL; + } nfs4_sequence_free_slot(&opendata->o_res.seq_res); + } return ret; } @@ -4687,12 +4706,12 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct inode *dir = d_inode(dentry); + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), .pages = pages, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask, .plus = plus, }; struct nfs4_readdir_res res; @@ -4707,9 +4726,15 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, dentry, (unsigned long long)cookie); + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + args.bitmask = server->attr_bitmask_nl; + else + args.bitmask = server->attr_bitmask; + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 0); if (status >= 0) { memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; @@ -5120,7 +5145,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); - nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); + nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr); } static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) @@ -5161,7 +5186,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); - nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg); + nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client, + NFS_SP4_MACH_CRED_COMMIT, clnt, msg); } static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args, @@ -5529,6 +5555,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; + /* You can't remove system.nfs4_acl: */ + if (buflen == 0) + return -EINVAL; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) @@ -5567,6 +5596,14 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen do { err = __nfs4_proc_set_acl(inode, buf, buflen); trace_nfs4_set_acl(inode, err); + if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) { + /* + * no need to retry since the kernel + * isn't involved in encoding the ACEs. + */ + err = -EINVAL; + break; + } err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); @@ -5605,7 +5642,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) return -ENOENT; - return 0; + return label.len; } static int nfs4_get_security_label(struct inode *inode, void *buf, @@ -6008,6 +6045,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) struct nfs4_exception exception = { .inode = data->inode, .stateid = &data->stateid, + .task_is_privileged = data->args.seq_args.sa_privileged, }; if (!nfs4_sequence_done(task, &data->res.seq_res)) @@ -6151,7 +6189,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; - nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0); nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP, @@ -6181,6 +6218,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data->lr.roc = false; } + if (!data->inode) + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, + 1); + else + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, + 0); task_setup_data.callback_data = data; msg.rpc_argp = &data->args; msg.rpc_resp = &data->res; @@ -6581,6 +6624,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; + struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); dprintk("%s: begin!\n", __func__); @@ -6590,8 +6634,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; switch (task->tk_status) { case 0: - renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), - data->timestamp); + renew_lease(server, data->timestamp); if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) @@ -6612,6 +6655,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) goto out_restart; + else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) + goto out_restart; } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) goto out_restart; @@ -6715,9 +6760,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.new_lock_owner, ret); } else data->cancelled = true; + trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); rpc_put_task(task); dprintk("%s: done, ret = %d!\n", __func__, ret); - trace_nfs4_set_lock(fl, state, &data->res.stateid, cmd, ret); return ret; } @@ -7600,9 +7645,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -7997,7 +8044,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (status != 0) goto out; - status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out; @@ -8667,6 +8715,9 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf rpc_delay(task, NFS4_POLL_RETRY_MAX); /* fall through */ case -NFS4ERR_RETRY_UNCACHED_REP: + case -EACCES: + dprintk("%s: failed to reclaim complete error %d for server %s, retrying\n", + __func__, task->tk_status, clp->cl_hostname); return -EAGAIN; case -NFS4ERR_BADSESSION: case -NFS4ERR_DEADSESSION: @@ -8827,6 +8878,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, status = -EBUSY; break; case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: status = -ERECALLCONFLICT; break; case -NFS4ERR_DELEG_REVOKED: @@ -9045,15 +9097,20 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) &task_setup_data.rpc_client, &msg); dprintk("--> %s\n", __func__); + lrp->inode = nfs_igrab_and_active(lrp->args.inode); if (!sync) { - lrp->inode = nfs_igrab_and_active(lrp->args.inode); if (!lrp->inode) { nfs4_layoutreturn_release(lrp); return -EAGAIN; } task_setup_data.flags |= RPC_TASK_ASYNC; } - nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 0); + if (!lrp->inode) + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, + 1); + else + nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, + 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b3086e99420c..f0f0fb7499e3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -49,6 +49,7 @@ #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/jiffies.h> +#include <linux/sched/mm.h> #include <linux/sunrpc/clnt.h> @@ -65,6 +66,8 @@ #define OPENOWNER_POOL_SIZE 8 +static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp); + const nfs4_stateid zero_stateid = { { .data = { 0 } }, .type = NFS4_SPECIAL_STATEID_TYPE, @@ -337,6 +340,8 @@ do_confirm: status = nfs4_proc_create_session(clp, cred); if (status != 0) goto out; + if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R)) + nfs4_state_start_reclaim_reboot(clp); nfs41_finish_session_reset(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: @@ -674,7 +679,7 @@ nfs4_alloc_open_state(void) state = kzalloc(sizeof(*state), GFP_NOFS); if (!state) return NULL; - atomic_set(&state->count, 1); + refcount_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); seqlock_init(&state->seqlock); @@ -708,7 +713,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) continue; if (!nfs4_valid_open_stateid(state)) continue; - if (atomic_inc_not_zero(&state->count)) + if (refcount_inc_not_zero(&state->count)) return state; } return NULL; @@ -762,7 +767,7 @@ void nfs4_put_open_state(struct nfs4_state *state) struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) + if (!refcount_dec_and_lock(&state->count, &owner->so_lock)) return; spin_lock(&inode->i_lock); list_del(&state->inode_states); @@ -1246,6 +1251,8 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) if (IS_ERR(task)) { printk(KERN_ERR "%s: kthread_run: %ld\n", __func__, PTR_ERR(task)); + if (!nfs_client_init_is_complete(clp)) + nfs_mark_client_ready(clp, PTR_ERR(task)); nfs4_clear_state_manager_bit(clp); nfs_put_client(clp); module_put(THIS_MODULE); @@ -1593,7 +1600,7 @@ restart: continue; if (state->state == 0) continue; - atomic_inc(&state->count); + refcount_inc(&state->count); spin_unlock(&sp->so_lock); status = ops->recover_open(sp, state); if (status >= 0) { @@ -1735,6 +1742,7 @@ static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp, static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) { + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); @@ -2066,6 +2074,9 @@ static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred) } result = -NFS4ERR_NXIO; + if (!locations->nlocations) + goto out; + if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { dprintk("<-- %s: No fs_locations data, migration skipped\n", __func__); @@ -2502,9 +2513,17 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp) static void nfs4_state_manager(struct nfs_client *clp) { + unsigned int memflags; int status = 0; const char *section = "", *section_sep = ""; + /* + * State recovery can deadlock if the direct reclaim code tries + * start NFS writeback. So ensure memory allocations are all + * GFP_NOFS. + */ + memflags = memalloc_nofs_save(); + /* Ensure exclusive access to NFSv4 state */ do { clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); @@ -2577,6 +2596,7 @@ static void nfs4_state_manager(struct nfs_client *clp) if (status < 0) goto out_error; nfs4_state_end_reclaim_reboot(clp); + continue; } /* Detect expired delegations... */ @@ -2597,6 +2617,7 @@ static void nfs4_state_manager(struct nfs_client *clp) goto out_error; } + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); @@ -2613,6 +2634,7 @@ static void nfs4_state_manager(struct nfs_client *clp) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; + memflags = memalloc_nofs_save(); } while (refcount_read(&clp->cl_count) > 1 && !signalled()); goto out_drain; @@ -2624,6 +2646,7 @@ out_error: clp->cl_hostname, -status); ssleep(1); out_drain: + memalloc_nofs_restore(memflags); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 6fb7cb6b3f4b..e7a10f5f5405 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -95,7 +95,7 @@ static void nfs4_evict_inode(struct inode *inode) nfs_inode_return_delegation_noreclaim(inode); /* Note that above delegreturn would trigger pnfs return-on-close */ pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); + pnfs_destroy_layout_final(NFS_I(inode)); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0a5cae8f8aff..f0021e3b8efd 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3747,8 +3747,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(!p)) goto out_overflow; n = be32_to_cpup(p); - if (n <= 0) - goto out_eio; for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; struct nfs4_fs_location *loc; @@ -4279,12 +4277,10 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, if (unlikely(!p)) goto out_overflow; if (len < NFS4_MAXLABELLEN) { - if (label) { - if (label->len) { - if (label->len < len) - return -ERANGE; - memcpy(label->label, p, len); - } + if (label && label->len) { + if (label->len < len) + return -ERANGE; + memcpy(label->label, p, len); label->len = len; label->pi = pi; label->lfs = lfs; @@ -4294,10 +4290,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, } else printk(KERN_WARNING "%s: label too long (%u)!\n", __func__, len); + if (label && label->label) + dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n", + __func__, label->len, (char *)label->label, + label->len, label->pi, label->lfs); } - if (label && label->label) - dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, - (char *)label->label, label->len, label->pi, label->lfs); return status; out_overflow: diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2c7d76b4c5e1..a9e1bcdd9394 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -987,17 +987,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); - if (!list_empty(&mirror->pg_list)) { int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; - else + if (list_empty(&mirror->pg_list)) { mirror->pg_bytes_written += mirror->pg_count; - } - if (list_empty(&mirror->pg_list)) { - mirror->pg_count = 0; - mirror->pg_base = 0; + mirror->pg_count = 0; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; + } } } @@ -1095,7 +1094,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) do { list_splice_init(&mirror->pg_list, &head); - mirror->pg_bytes_written -= mirror->pg_count; mirror->pg_count = 0; mirror->pg_base = 0; mirror->pg_recoalesce = 0; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2b9e139a2997..cfb1fe5dfb1e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -92,6 +92,17 @@ find_pnfs_driver(u32 id) return local; } +const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id) +{ + return find_pnfs_driver(id); +} + +void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld) +{ + if (ld) + module_put(ld->owner); +} + void unset_pnfs_layoutdriver(struct nfs_server *nfss) { @@ -294,6 +305,7 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { struct inode *inode; + unsigned long i_state; if (!lo) return; @@ -304,8 +316,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); + i_state = inode->i_state; spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); + /* Notify pnfs_destroy_layout_final() that we're done */ + if (i_state & (I_FREEING | I_CLEAR)) + wake_up_var(lo); } } @@ -713,8 +729,7 @@ pnfs_free_lseg_list(struct list_head *free_me) } } -void -pnfs_destroy_layout(struct nfs_inode *nfsi) +static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi) { struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); @@ -732,9 +747,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_put_layout_hdr(lo); } else spin_unlock(&nfsi->vfs_inode.i_lock); + return lo; +} + +void pnfs_destroy_layout(struct nfs_inode *nfsi) +{ + __pnfs_destroy_layout(nfsi); } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); +static bool pnfs_layout_removed(struct nfs_inode *nfsi, + struct pnfs_layout_hdr *lo) +{ + bool ret; + + spin_lock(&nfsi->vfs_inode.i_lock); + ret = nfsi->layout != lo; + spin_unlock(&nfsi->vfs_inode.i_lock); + return ret; +} + +void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ + struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); + + if (lo) + wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); +} + static bool pnfs_layout_add_bulk_destroy_list(struct inode *inode, struct list_head *layout_list) @@ -1239,6 +1279,11 @@ _pnfs_return_layout(struct inode *ino) { struct pnfs_layout_hdr *lo = NULL; struct nfs_inode *nfsi = NFS_I(ino); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; LIST_HEAD(tmp_list); nfs4_stateid stateid; int status = 0; @@ -1265,16 +1310,10 @@ _pnfs_return_layout(struct inode *ino) } valid_layout = pnfs_layout_is_valid(lo); pnfs_clear_layoutcommit(ino, &tmp_list); - pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); + pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0); - if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { - struct pnfs_layout_range range = { - .iomode = IOMODE_ANY, - .offset = 0, - .length = NFS4_MAX_UINT64, - }; + if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range); - } /* Don't send a LAYOUTRETURN if list was initially empty */ if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) || @@ -1431,12 +1470,18 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, int ret) { struct pnfs_layout_hdr *lo = args->layout; + struct inode *inode = args->inode; const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; struct nfs4_xdr_opaque_data *ld_private = args->ld_private; switch (ret) { case -NFS4ERR_NOMATCHING_LAYOUT: + spin_lock(&inode->i_lock); + if (pnfs_layout_is_valid(lo) && + nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid)) + pnfs_set_plh_return_info(lo, args->range.iomode, 0); + spin_unlock(&inode->i_lock); break; case 0: if (res->lrs_present) @@ -1852,6 +1897,7 @@ lookup_again: lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { spin_unlock(&ino->i_lock); + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_NOMEM); goto out; @@ -1979,6 +2025,7 @@ lookup_again: lgp = pnfs_alloc_init_layoutget_args(ino, ctx, &stateid, &arg, gfp_flags); if (!lgp) { + lseg = ERR_PTR(-ENOMEM); trace_pnfs_update_layout(ino, pos, count, iomode, lo, NULL, PNFS_UPDATE_LAYOUT_NOMEM); nfs_layoutget_end(lo); @@ -1998,6 +2045,12 @@ lookup_again: case -ERECALLCONFLICT: case -EAGAIN: break; + case -ENODATA: + /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */ + pnfs_layout_set_fail_bit( + lo, pnfs_iomode_to_fail_bit(iomode)); + lseg = NULL; + goto out_put_layout_hdr; default: if (!nfs_error_is_fatal(PTR_ERR(lseg))) { pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode)); @@ -2112,6 +2165,7 @@ static void _lgopen_prepare_attached(struct nfs4_opendata *data, &rng, GFP_KERNEL); if (!lgp) { pnfs_clear_first_layoutget(lo); + nfs_layoutget_end(lo); pnfs_put_layout_hdr(lo); return; } @@ -2265,7 +2319,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) * We got an entirely new state ID. Mark all segments for the * inode invalid, and retry the layoutget */ - pnfs_mark_layout_stateid_invalid(lo, &free_me); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .length = NFS4_MAX_UINT64, + }; + pnfs_set_plh_return_info(lo, IOMODE_ANY, 0); + pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + &range, 0); goto out_forget; } @@ -2284,6 +2344,7 @@ out_forget: spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + pnfs_free_lseg_list(&free_me); return ERR_PTR(-EAGAIN); } @@ -2316,6 +2377,9 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, assert_spin_locked(&lo->plh_inode->i_lock); + if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) + tmp_list = &lo->plh_return_segs; + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (pnfs_match_lseg_recall(lseg, return_range, seq)) { dprintk("%s: marking lseg %p iomode %d " @@ -2323,6 +2387,8 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + tmp_list = &lo->plh_return_segs; if (mark_lseg_invalid(lseg, tmp_list)) continue; remaining++; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 3ba44819a88a..d5d818b1ac9d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -225,6 +225,8 @@ struct pnfs_devicelist { extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); +extern const struct pnfs_layoutdriver_type *pnfs_find_layoutdriver(u32 id); +extern void pnfs_put_layoutdriver(const struct pnfs_layoutdriver_type *ld); /* nfs4proc.c */ extern size_t max_response_pages(struct nfs_server *server); @@ -254,6 +256,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_layoutget_free(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); +void pnfs_destroy_layout_final(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, @@ -645,6 +648,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) { } +static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index e8a07b3f9aaa..ba67906d6b2c 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -152,7 +152,7 @@ nfs4_get_device_info(struct nfs_server *server, set_bit(NFS_DEVICEID_NOCACHE, &d->flags); out_free_pages: - for (i = 0; i < max_pages; i++) + while (--i >= 0) __free_page(pages[i]); kfree(pages); out_free_pdev: diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index acfb52bc0007..bd6190d794c4 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -555,19 +555,16 @@ out: } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); -static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) +static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) { might_sleep(); - wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, - TASK_KILLABLE); + return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE); } static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) { smp_mb__before_atomic(); - clear_bit(NFS4DS_CONNECTING, &ds->ds_state); - smp_mb__after_atomic(); - wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); + clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state); } static struct nfs_client *(*get_v3_ds_connect)( @@ -638,7 +635,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; @@ -711,7 +708,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; @@ -728,30 +725,33 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, { int err; -again: - err = 0; - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { - if (version == 3) { - err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, - retrans); - } else if (version == 4) { - err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, - retrans, minor_version); - } else { - dprintk("%s: unsupported DS version %d\n", __func__, - version); - err = -EPROTONOSUPPORT; - } + do { + err = nfs4_wait_ds_connect(ds); + if (err || ds->ds_clp) + goto out; + if (nfs4_test_deviceid_unavailable(devid)) + return -ENODEV; + } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0); - nfs4_clear_ds_conn_bit(ds); - } else { - nfs4_wait_ds_connect(ds); + if (ds->ds_clp) + goto connect_done; - /* what was waited on didn't connect AND didn't mark unavail */ - if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid)) - goto again; + switch (version) { + case 3: + err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans); + break; + case 4: + err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans, + minor_version); + break; + default: + dprintk("%s: unsupported DS version %d\n", __func__, version); + err = -EPROTONOSUPPORT; } +connect_done: + nfs4_clear_ds_conn_bit(ds); +out: /* * At this point the ds->ds_clp should be ready, but it might have * hit an error. diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d419d89b91f7..65aaa6eaad2c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -598,9 +598,8 @@ release_request: static void nfs_write_error_remove_page(struct nfs_page *req) { + SetPageError(req->wb_page); nfs_end_page_writeback(req); - generic_error_remove_page(page_file_mapping(req->wb_page), - req->wb_page); nfs_release_request(req); } @@ -1045,25 +1044,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_page *req, *tmp; int ret = 0; -restart: list_for_each_entry_safe(req, tmp, src, wb_list) { kref_get(&req->wb_kref); if (!nfs_lock_request(req)) { - int status; - - /* Prevent deadlock with nfs_lock_and_join_requests */ - if (!list_empty(dst)) { - nfs_release_request(req); - continue; - } - /* Ensure we make progress to prevent livelock */ - mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); - status = nfs_wait_on_request(req); nfs_release_request(req); - mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); - if (status < 0) - break; - goto restart; + continue; } nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); @@ -1911,6 +1896,7 @@ static int __nfs_commit_inode(struct inode *inode, int how, int may_wait = how & FLUSH_SYNC; int ret, nscan; + how &= ~FLUSH_SYNC; nfs_init_cinfo_from_inode(&cinfo, inode); nfs_commit_begin(cinfo.mds); for (;;) { |