aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c159
1 files changed, 127 insertions, 32 deletions
diff --git a/fs/namei.c b/fs/namei.c
index b952ecbd49c2..a4cba6991a4d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -52,8 +52,8 @@
* The new code replaces the old recursive symlink resolution with
* an iterative one (in case of non-nested symlink chains). It does
* this with calls to <fs>_follow_link().
- * As a side effect, dir_namei(), _namei() and follow_link() are now
- * replaced with a single function lookup_dentry() that can handle all
+ * As a side effect, dir_namei(), _namei() and follow_link() are now
+ * replaced with a single function lookup_dentry() that can handle all
* the special cases of the former code.
*
* With the new dcache, the pathname is stored at each inode, at least as
@@ -2565,6 +2565,26 @@ struct dentry *lookup_one_len_unlocked(const char *name,
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
+/*
+ * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
+ * on negatives. Returns known positive or ERR_PTR(); that's what
+ * most of the users want. Note that pinned negative with unlocked parent
+ * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
+ * need to be very careful; pinned positives have ->d_inode stable, so
+ * this one avoids such problems.
+ */
+struct dentry *lookup_positive_unlocked(const char *name,
+ struct dentry *base, int len)
+{
+ struct dentry *ret = lookup_one_len_unlocked(name, base, len);
+ if (!IS_ERR(ret) && d_is_negative(ret)) {
+ dput(ret);
+ ret = ERR_PTR(-ENOENT);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(lookup_positive_unlocked);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
@@ -2583,7 +2603,7 @@ int path_pts(struct path *path)
this.name = "pts";
this.len = 3;
child = d_hash_and_lookup(parent, &this);
- if (!child)
+ if (IS_ERR_OR_NULL(child))
return -ENOENT;
path->dentry = child;
@@ -2859,20 +2879,14 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
p = d_ancestor(p2, p1);
if (p) {
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
return p;
}
p = d_ancestor(p1, p2);
- if (p) {
- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
- return p;
- }
-
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
- return NULL;
+ return p;
}
EXPORT_SYMBOL(lock_rename);
@@ -2886,6 +2900,63 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
}
EXPORT_SYMBOL(unlock_rename);
+/**
+ * mode_strip_umask - handle vfs umask stripping
+ * @dir: parent directory of the new inode
+ * @mode: mode of the new inode to be created in @dir
+ *
+ * Umask stripping depends on whether or not the filesystem supports POSIX
+ * ACLs. If the filesystem doesn't support it umask stripping is done directly
+ * in here. If the filesystem does support POSIX ACLs umask stripping is
+ * deferred until the filesystem calls posix_acl_create().
+ *
+ * Returns: mode
+ */
+static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
+{
+ if (!IS_POSIXACL(dir))
+ mode &= ~current_umask();
+ return mode;
+}
+
+/**
+ * vfs_prepare_mode - prepare the mode to be used for a new inode
+ * @dir: parent directory of the new inode
+ * @mode: mode of the new inode
+ * @mask_perms: allowed permission by the vfs
+ * @type: type of file to be created
+ *
+ * This helper consolidates and enforces vfs restrictions on the @mode of a new
+ * object to be created.
+ *
+ * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
+ * the kernel documentation for mode_strip_umask()). Moving umask stripping
+ * after setgid stripping allows the same ordering for both non-POSIX ACL and
+ * POSIX ACL supporting filesystems.
+ *
+ * Note that it's currently valid for @type to be 0 if a directory is created.
+ * Filesystems raise that flag individually and we need to check whether each
+ * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
+ * non-zero type.
+ *
+ * Returns: mode to be passed to the filesystem
+ */
+static inline umode_t vfs_prepare_mode(const struct inode *dir, umode_t mode,
+ umode_t mask_perms, umode_t type)
+{
+ mode = mode_strip_sgid(dir, mode);
+ mode = mode_strip_umask(dir, mode);
+
+ /*
+ * Apply the vfs mandated allowed permission mask and set the type of
+ * file to be created before we call into the filesystem.
+ */
+ mode &= (mask_perms & ~S_IFMT);
+ mode |= (type & S_IFMT);
+
+ return mode;
+}
+
int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool want_excl)
{
@@ -2895,8 +2966,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (!dir->i_op->create)
return -EACCES; /* shouldn't it be ENOSYS? */
- mode &= S_IALLUGO;
- mode |= S_IFREG;
+
+ mode = vfs_prepare_mode(dir, mode, S_IALLUGO, S_IFREG);
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
@@ -3166,8 +3237,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
* O_EXCL open we want to return EEXIST not EROFS).
*/
if (open_flag & O_CREAT) {
- if (!IS_POSIXACL(dir->d_inode))
- mode &= ~current_umask();
+ mode = vfs_prepare_mode(dir->d_inode, mode, mode, mode);
if (unlikely(!got_write)) {
create_error = -EROFS;
open_flag &= ~O_CREAT;
@@ -3443,6 +3513,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
child = d_alloc(dentry, &slash_name);
if (unlikely(!child))
goto out_err;
+ mode = vfs_prepare_mode(dir, mode, mode, mode);
error = dir->i_op->tmpfile(dir, child, mode);
if (error)
goto out_err;
@@ -3701,6 +3772,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
if (!dir->i_op->mknod)
return -EPERM;
+ mode = vfs_prepare_mode(dir, mode, mode, mode);
error = devcgroup_inode_mknod(mode, dev);
if (error)
return error;
@@ -3749,9 +3821,8 @@ retry:
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (!IS_POSIXACL(path.dentry->d_inode))
- mode &= ~current_umask();
- error = security_path_mknod(&path, dentry, mode, dev);
+ error = security_path_mknod(&path, dentry,
+ mode_strip_umask(path.dentry->d_inode, mode), dev);
if (error)
goto out;
switch (mode & S_IFMT) {
@@ -3799,7 +3870,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!dir->i_op->mkdir)
return -EPERM;
- mode &= (S_IRWXUGO|S_ISVTX);
+ mode = vfs_prepare_mode(dir, mode, S_IRWXUGO | S_ISVTX, 0);
error = security_inode_mkdir(dir, dentry, mode);
if (error)
return error;
@@ -3826,9 +3897,8 @@ retry:
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- if (!IS_POSIXACL(path.dentry->d_inode))
- mode &= ~current_umask();
- error = security_path_mkdir(&path, dentry, mode);
+ error = security_path_mkdir(&path, dentry,
+ mode_strip_umask(path.dentry->d_inode, mode));
if (!error)
error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
done_path_create(&path, dentry);
@@ -4361,11 +4431,12 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
*
* a) we can get into loop creation.
* b) race potential - two innocent renames can create a loop together.
- * That's where 4.4 screws up. Current fix: serialization on
+ * That's where 4.4BSD screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
- * c) we have to lock _four_ objects - parents and victim (if it exists),
- * and source (if it is not a directory).
+ * c) we may have to lock up to _four_ objects - parents and victim (if it exists),
+ * and source (if it's a non-directory or a subdirectory that moves to
+ * different parent).
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -4394,6 +4465,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
struct name_snapshot old_name;
+ bool lock_old_subdir, lock_new_subdir;
if (source == target)
return 0;
@@ -4442,10 +4514,33 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
take_dentry_name_snapshot(&old_name, old_dentry);
dget(new_dentry);
- if (!is_dir || (flags & RENAME_EXCHANGE))
+ /*
+ * Lock children.
+ * The source subdirectory needs to be locked on cross-directory
+ * rename or cross-directory exchange since its parent changes.
+ * The target subdirectory needs to be locked on cross-directory
+ * exchange due to parent change and on any rename due to becoming
+ * a victim.
+ * Non-directories need locking in all cases (for NFS reasons);
+ * they get locked after any subdirectories (in inode address order).
+ *
+ * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE.
+ * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex.
+ */
+ lock_old_subdir = new_dir != old_dir;
+ lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE);
+ if (is_dir) {
+ if (lock_old_subdir)
+ inode_lock_nested(source, I_MUTEX_CHILD);
+ if (target && (!new_is_dir || lock_new_subdir))
+ inode_lock(target);
+ } else if (new_is_dir) {
+ if (lock_new_subdir)
+ inode_lock_nested(target, I_MUTEX_CHILD);
+ inode_lock(source);
+ } else {
lock_two_nondirectories(source, target);
- else if (target)
- inode_lock(target);
+ }
error = -EBUSY;
if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4489,9 +4584,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
d_exchange(old_dentry, new_dentry);
}
out:
- if (!is_dir || (flags & RENAME_EXCHANGE))
- unlock_two_nondirectories(source, target);
- else if (target)
+ if (!is_dir || lock_old_subdir)
+ inode_unlock(source);
+ if (target && (!new_is_dir || lock_new_subdir))
inode_unlock(target);
dput(new_dentry);
if (!error) {
@@ -4817,7 +4912,7 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
- void *fsdata;
+ void *fsdata = NULL;
int err;
unsigned int flags = 0;
if (nofs)