aboutsummaryrefslogtreecommitdiffstats
path: root/fs/aufs/xino.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/aufs/xino.c')
-rw-r--r--fs/aufs/xino.c1966
1 files changed, 1966 insertions, 0 deletions
diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c
new file mode 100644
index 000000000000..b3152c0ce0b5
--- /dev/null
+++ b/fs/aufs/xino.c
@@ -0,0 +1,1966 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2005-2020 Junjiro R. Okajima
+ *
+ * This program, aufs is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * external inode number translation table and bitmap
+ *
+ * things to consider
+ * - the lifetime
+ * + au_xino object
+ * + XINO files (xino, xib, xigen)
+ * + dynamic debugfs entries (xiN)
+ * + static debugfs entries (xib, xigen)
+ * + static sysfs entry (xi_path)
+ * - several entry points to handle them.
+ * + mount(2) without xino option (default)
+ * + mount(2) with xino option
+ * + mount(2) with noxino option
+ * + umount(2)
+ * + remount with add/del branches
+ * + remount with xino/noxino options
+ */
+
+#include <linux/seq_file.h>
+#include <linux/statfs.h>
+#include "aufs.h"
+
+static aufs_bindex_t sbr_find_shared(struct super_block *sb, aufs_bindex_t btop,
+ aufs_bindex_t bbot,
+ struct super_block *h_sb)
+{
+ /* todo: try binary-search if the branches are many */
+ for (; btop <= bbot; btop++)
+ if (h_sb == au_sbr_sb(sb, btop))
+ return btop;
+ return -1;
+}
+
+/*
+ * find another branch who is on the same filesystem of the specified
+ * branch{@btgt}. search until @bbot.
+ */
+static aufs_bindex_t is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
+ aufs_bindex_t bbot)
+{
+ aufs_bindex_t bindex;
+ struct super_block *tgt_sb;
+
+ tgt_sb = au_sbr_sb(sb, btgt);
+ bindex = sbr_find_shared(sb, /*btop*/0, btgt - 1, tgt_sb);
+ if (bindex < 0)
+ bindex = sbr_find_shared(sb, btgt + 1, bbot, tgt_sb);
+
+ return bindex;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * stop unnecessary notify events at creating xino files
+ */
+
+aufs_bindex_t au_xi_root(struct super_block *sb, struct dentry *dentry)
+{
+ aufs_bindex_t bfound, bindex, bbot;
+ struct dentry *parent;
+ struct au_branch *br;
+
+ bfound = -1;
+ parent = dentry->d_parent; /* safe d_parent access */
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
+ br = au_sbr(sb, bindex);
+ if (au_br_dentry(br) == parent) {
+ bfound = bindex;
+ break;
+ }
+ }
+
+ AuDbg("bfound b%d\n", bfound);
+ return bfound;
+}
+
+struct au_xino_lock_dir {
+ struct au_hinode *hdir;
+ struct dentry *parent;
+ struct inode *dir;
+};
+
+static struct dentry *au_dget_parent_lock(struct dentry *dentry,
+ unsigned int lsc)
+{
+ struct dentry *parent;
+ struct inode *dir;
+
+ parent = dget_parent(dentry);
+ dir = d_inode(parent);
+ inode_lock_nested(dir, lsc);
+#if 0 /* it should not happen */
+ spin_lock(&dentry->d_lock);
+ if (unlikely(dentry->d_parent != parent)) {
+ spin_unlock(&dentry->d_lock);
+ inode_unlock(dir);
+ dput(parent);
+ parent = NULL;
+ goto out;
+ }
+ spin_unlock(&dentry->d_lock);
+
+out:
+#endif
+ return parent;
+}
+
+static void au_xino_lock_dir(struct super_block *sb, struct path *xipath,
+ struct au_xino_lock_dir *ldir)
+{
+ aufs_bindex_t bindex;
+
+ ldir->hdir = NULL;
+ bindex = au_xi_root(sb, xipath->dentry);
+ if (bindex >= 0) {
+ /* rw branch root */
+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
+ au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
+ } else {
+ /* other */
+ ldir->parent = au_dget_parent_lock(xipath->dentry,
+ AuLsc_I_PARENT);
+ ldir->dir = d_inode(ldir->parent);
+ }
+}
+
+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
+{
+ if (ldir->hdir)
+ au_hn_inode_unlock(ldir->hdir);
+ else {
+ inode_unlock(ldir->dir);
+ dput(ldir->parent);
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * create and set a new xino file
+ */
+struct file *au_xino_create(struct super_block *sb, char *fpath, int silent,
+ int wbrtop)
+{
+ struct file *file;
+ struct dentry *h_parent, *d;
+ struct inode *h_dir, *inode;
+ int err;
+ static DEFINE_MUTEX(mtx);
+
+ /*
+ * at mount-time, and the xino file is the default path,
+ * hnotify is disabled so we have no notify events to ignore.
+ * when a user specified the xino, we cannot get au_hdir to be ignored.
+ */
+ if (!wbrtop)
+ mutex_lock(&mtx);
+ file = vfsub_filp_open(fpath, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
+ /* | __FMODE_NONOTIFY */,
+ 0666);
+ if (IS_ERR(file)) {
+ if (!wbrtop)
+ mutex_unlock(&mtx);
+ if (!silent)
+ pr_err("open %s(%ld)\n", fpath, PTR_ERR(file));
+ return file;
+ }
+
+ /* keep file count */
+ err = 0;
+ d = file->f_path.dentry;
+ h_parent = au_dget_parent_lock(d, AuLsc_I_PARENT);
+ if (!wbrtop)
+ mutex_unlock(&mtx);
+ /* mnt_want_write() is unnecessary here */
+ h_dir = d_inode(h_parent);
+ inode = file_inode(file);
+ /* no delegation since it is just created */
+ if (inode->i_nlink)
+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
+ /*force*/0);
+ inode_unlock(h_dir);
+ dput(h_parent);
+ if (unlikely(err)) {
+ if (!silent)
+ pr_err("unlink %s(%d)\n", fpath, err);
+ goto out;
+ }
+
+ err = -EINVAL;
+ if (unlikely(sb == d->d_sb)) {
+ if (!silent)
+ pr_err("%s must be outside\n", fpath);
+ goto out;
+ }
+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
+ if (!silent)
+ pr_err("xino doesn't support %s(%s)\n",
+ fpath, au_sbtype(d->d_sb));
+ goto out;
+ }
+ return file; /* success */
+
+out:
+ fput(file);
+ file = ERR_PTR(err);
+ return file;
+}
+
+/*
+ * create a new xinofile at the same place/path as @base.
+ */
+struct file *au_xino_create2(struct super_block *sb, struct path *base,
+ struct file *copy_src)
+{
+ struct file *file;
+ struct dentry *dentry, *parent;
+ struct inode *dir, *delegated;
+ struct qstr *name;
+ struct path path;
+ int err, do_unlock;
+ struct au_xino_lock_dir ldir;
+
+ do_unlock = 1;
+ au_xino_lock_dir(sb, base, &ldir);
+ dentry = base->dentry;
+ parent = dentry->d_parent; /* dir inode is locked */
+ dir = d_inode(parent);
+ IMustLock(dir);
+
+ name = &dentry->d_name;
+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
+ if (IS_ERR(path.dentry)) {
+ file = (void *)path.dentry;
+ pr_err("%pd lookup err %ld\n", dentry, PTR_ERR(path.dentry));
+ goto out;
+ }
+
+ /* no need to mnt_want_write() since we call dentry_open() later */
+ err = vfs_create(dir, path.dentry, 0666, NULL);
+ if (unlikely(err)) {
+ file = ERR_PTR(err);
+ pr_err("%pd create err %d\n", dentry, err);
+ goto out_dput;
+ }
+
+ path.mnt = base->mnt;
+ file = vfsub_dentry_open(&path,
+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
+ /* | __FMODE_NONOTIFY */);
+ if (IS_ERR(file)) {
+ pr_err("%pd open err %ld\n", dentry, PTR_ERR(file));
+ goto out_dput;
+ }
+
+ delegated = NULL;
+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
+ au_xino_unlock_dir(&ldir);
+ do_unlock = 0;
+ if (unlikely(err == -EWOULDBLOCK)) {
+ pr_warn("cannot retry for NFSv4 delegation"
+ " for an internal unlink\n");
+ iput(delegated);
+ }
+ if (unlikely(err)) {
+ pr_err("%pd unlink err %d\n", dentry, err);
+ goto out_fput;
+ }
+
+ if (copy_src) {
+ /* no one can touch copy_src xino */
+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
+ if (unlikely(err)) {
+ pr_err("%pd copy err %d\n", dentry, err);
+ goto out_fput;
+ }
+ }
+ goto out_dput; /* success */
+
+out_fput:
+ fput(file);
+ file = ERR_PTR(err);
+out_dput:
+ dput(path.dentry);
+out:
+ if (do_unlock)
+ au_xino_unlock_dir(&ldir);
+ return file;
+}
+
+struct file *au_xino_file1(struct au_xino *xi)
+{
+ struct file *file;
+ unsigned int u, nfile;
+
+ file = NULL;
+ nfile = xi->xi_nfile;
+ for (u = 0; u < nfile; u++) {
+ file = xi->xi_file[u];
+ if (file)
+ break;
+ }
+
+ return file;
+}
+
+static int au_xino_file_set(struct au_xino *xi, int idx, struct file *file)
+{
+ int err;
+ struct file *f;
+ void *p;
+
+ if (file)
+ get_file(file);
+
+ err = 0;
+ f = NULL;
+ if (idx < xi->xi_nfile) {
+ f = xi->xi_file[idx];
+ if (f)
+ fput(f);
+ } else {
+ p = au_kzrealloc(xi->xi_file,
+ sizeof(*xi->xi_file) * xi->xi_nfile,
+ sizeof(*xi->xi_file) * (idx + 1),
+ GFP_NOFS, /*may_shrink*/0);
+ if (p) {
+ MtxMustLock(&xi->xi_mtx);
+ xi->xi_file = p;
+ xi->xi_nfile = idx + 1;
+ } else {
+ err = -ENOMEM;
+ if (file)
+ fput(file);
+ goto out;
+ }
+ }
+ xi->xi_file[idx] = file;
+
+out:
+ return err;
+}
+
+/*
+ * if @xinew->xi is not set, then create new xigen file.
+ */
+struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew)
+{
+ struct file *file;
+ int err;
+
+ SiMustAnyLock(sb);
+
+ file = au_xino_create2(sb, xinew->base, xinew->copy_src);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ pr_err("%s[%d], err %d\n",
+ xinew->xi ? "xino" : "xigen",
+ xinew->idx, err);
+ goto out;
+ }
+
+ if (xinew->xi)
+ err = au_xino_file_set(xinew->xi, xinew->idx, file);
+ else {
+ BUG();
+ /* todo: make xigen file an array */
+ /* err = au_xigen_file_set(sb, xinew->idx, file); */
+ }
+ fput(file);
+ if (unlikely(err))
+ file = ERR_PTR(err);
+
+out:
+ return file;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * truncate xino files
+ */
+static int au_xino_do_trunc(struct super_block *sb, aufs_bindex_t bindex,
+ int idx, struct kstatfs *st)
+{
+ int err;
+ blkcnt_t blocks;
+ struct file *file, *new_xino;
+ struct au_xi_new xinew = {
+ .idx = idx
+ };
+
+ err = 0;
+ xinew.xi = au_sbr(sb, bindex)->br_xino;
+ file = au_xino_file(xinew.xi, idx);
+ if (!file)
+ goto out;
+
+ xinew.base = &file->f_path;
+ err = vfs_statfs(xinew.base, st);
+ if (unlikely(err)) {
+ AuErr1("statfs err %d, ignored\n", err);
+ err = 0;
+ goto out;
+ }
+
+ blocks = file_inode(file)->i_blocks;
+ pr_info("begin truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
+ bindex, idx, (u64)blocks, st->f_bfree, st->f_blocks);
+
+ xinew.copy_src = file;
+ new_xino = au_xi_new(sb, &xinew);
+ if (IS_ERR(new_xino)) {
+ err = PTR_ERR(new_xino);
+ pr_err("xino(b%d-%d), err %d, ignored\n", bindex, idx, err);
+ goto out;
+ }
+
+ err = vfs_statfs(&new_xino->f_path, st);
+ if (!err)
+ pr_info("end truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
+ bindex, idx, (u64)file_inode(new_xino)->i_blocks,
+ st->f_bfree, st->f_blocks);
+ else {
+ AuErr1("statfs err %d, ignored\n", err);
+ err = 0;
+ }
+
+out:
+ return err;
+}
+
+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin)
+{
+ int err, i;
+ unsigned long jiffy;
+ aufs_bindex_t bbot;
+ struct kstatfs *st;
+ struct au_branch *br;
+ struct au_xino *xi;
+
+ err = -ENOMEM;
+ st = kmalloc(sizeof(*st), GFP_NOFS);
+ if (unlikely(!st))
+ goto out;
+
+ err = -EINVAL;
+ bbot = au_sbbot(sb);
+ if (unlikely(bindex < 0 || bbot < bindex))
+ goto out_st;
+
+ err = 0;
+ jiffy = jiffies;
+ br = au_sbr(sb, bindex);
+ xi = br->br_xino;
+ for (i = idx_begin; !err && i < xi->xi_nfile; i++)
+ err = au_xino_do_trunc(sb, bindex, i, st);
+ if (!err)
+ au_sbi(sb)->si_xino_jiffy = jiffy;
+
+out_st:
+ au_kfree_rcu(st);
+out:
+ return err;
+}
+
+struct xino_do_trunc_args {
+ struct super_block *sb;
+ struct au_branch *br;
+ int idx;
+};
+
+static void xino_do_trunc(void *_args)
+{
+ struct xino_do_trunc_args *args = _args;
+ struct super_block *sb;
+ struct au_branch *br;
+ struct inode *dir;
+ int err, idx;
+ aufs_bindex_t bindex;
+
+ err = 0;
+ sb = args->sb;
+ dir = d_inode(sb->s_root);
+ br = args->br;
+ idx = args->idx;
+
+ si_noflush_write_lock(sb);
+ ii_read_lock_parent(dir);
+ bindex = au_br_index(sb, br->br_id);
+ err = au_xino_trunc(sb, bindex, idx);
+ ii_read_unlock(dir);
+ if (unlikely(err))
+ pr_warn("err b%d, (%d)\n", bindex, err);
+ atomic_dec(&br->br_xino->xi_truncating);
+ au_lcnt_dec(&br->br_count);
+ si_write_unlock(sb);
+ au_nwt_done(&au_sbi(sb)->si_nowait);
+ au_kfree_rcu(args);
+}
+
+/*
+ * returns the index in the xi_file array whose corresponding file is necessary
+ * to truncate, or -1 which means no need to truncate.
+ */
+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
+{
+ int err;
+ unsigned int u;
+ struct kstatfs st;
+ struct au_sbinfo *sbinfo;
+ struct au_xino *xi;
+ struct file *file;
+
+ /* todo: si_xino_expire and the ratio should be customizable */
+ sbinfo = au_sbi(sb);
+ if (time_before(jiffies,
+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
+ return -1;
+
+ /* truncation border */
+ xi = br->br_xino;
+ for (u = 0; u < xi->xi_nfile; u++) {
+ file = au_xino_file(xi, u);
+ if (!file)
+ continue;
+
+ err = vfs_statfs(&file->f_path, &st);
+ if (unlikely(err)) {
+ AuErr1("statfs err %d, ignored\n", err);
+ return -1;
+ }
+ if (div64_u64(st.f_bfree * 100, st.f_blocks)
+ >= AUFS_XINO_DEF_TRUNC)
+ return u;
+ }
+
+ return -1;
+}
+
+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
+{
+ int idx;
+ struct xino_do_trunc_args *args;
+ int wkq_err;
+
+ idx = xino_trunc_test(sb, br);
+ if (idx < 0)
+ return;
+
+ if (atomic_inc_return(&br->br_xino->xi_truncating) > 1)
+ goto out;
+
+ /* lock and kfree() will be called in trunc_xino() */
+ args = kmalloc(sizeof(*args), GFP_NOFS);
+ if (unlikely(!args)) {
+ AuErr1("no memory\n");
+ goto out;
+ }
+
+ au_lcnt_inc(&br->br_count);
+ args->sb = sb;
+ args->br = br;
+ args->idx = idx;
+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
+ if (!wkq_err)
+ return; /* success */
+
+ pr_err("wkq %d\n", wkq_err);
+ au_lcnt_dec(&br->br_count);
+ au_kfree_rcu(args);
+
+out:
+ atomic_dec(&br->br_xino->xi_truncating);
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_xi_calc {
+ int idx;
+ loff_t pos;
+};
+
+static void au_xi_calc(struct super_block *sb, ino_t h_ino,
+ struct au_xi_calc *calc)
+{
+ loff_t maxent;
+
+ maxent = au_xi_maxent(sb);
+ calc->idx = div64_u64_rem(h_ino, maxent, &calc->pos);
+ calc->pos *= sizeof(ino_t);
+}
+
+static int au_xino_do_new_async(struct super_block *sb, struct au_branch *br,
+ struct au_xi_calc *calc)
+{
+ int err;
+ struct file *file;
+ struct au_xino *xi = br->br_xino;
+ struct au_xi_new xinew = {
+ .xi = xi
+ };
+
+ SiMustAnyLock(sb);
+
+ err = 0;
+ if (!xi)
+ goto out;
+
+ mutex_lock(&xi->xi_mtx);
+ file = au_xino_file(xi, calc->idx);
+ if (file)
+ goto out_mtx;
+
+ file = au_xino_file(xi, /*idx*/-1);
+ AuDebugOn(!file);
+ xinew.idx = calc->idx;
+ xinew.base = &file->f_path;
+ /* xinew.copy_src = NULL; */
+ file = au_xi_new(sb, &xinew);
+ if (IS_ERR(file))
+ err = PTR_ERR(file);
+
+out_mtx:
+ mutex_unlock(&xi->xi_mtx);
+out:
+ return err;
+}
+
+struct au_xino_do_new_async_args {
+ struct super_block *sb;
+ struct au_branch *br;
+ struct au_xi_calc calc;
+ ino_t ino;
+};
+
+struct au_xi_writing {
+ struct hlist_bl_node node;
+ ino_t h_ino, ino;
+};
+
+static int au_xino_do_write(vfs_writef_t write, struct file *file,
+ struct au_xi_calc *calc, ino_t ino);
+
+static void au_xino_call_do_new_async(void *args)
+{
+ struct au_xino_do_new_async_args *a = args;
+ struct au_branch *br;
+ struct super_block *sb;
+ struct au_sbinfo *sbi;
+ struct inode *root;
+ struct file *file;
+ struct au_xi_writing *del, *p;
+ struct hlist_bl_head *hbl;
+ struct hlist_bl_node *pos;
+ int err;
+
+ br = a->br;
+ sb = a->sb;
+ sbi = au_sbi(sb);
+ si_noflush_read_lock(sb);
+ root = d_inode(sb->s_root);
+ ii_read_lock_child(root);
+ err = au_xino_do_new_async(sb, br, &a->calc);
+ if (unlikely(err)) {
+ AuIOErr("err %d\n", err);
+ goto out;
+ }
+
+ file = au_xino_file(br->br_xino, a->calc.idx);
+ AuDebugOn(!file);
+ err = au_xino_do_write(sbi->si_xwrite, file, &a->calc, a->ino);
+ if (unlikely(err)) {
+ AuIOErr("err %d\n", err);
+ goto out;
+ }
+
+ del = NULL;
+ hbl = &br->br_xino->xi_writing;
+ hlist_bl_lock(hbl);
+ au_hbl_for_each(pos, hbl) {
+ p = container_of(pos, struct au_xi_writing, node);
+ if (p->ino == a->ino) {
+ del = p;
+ hlist_bl_del(&p->node);
+ break;
+ }
+ }
+ hlist_bl_unlock(hbl);
+ au_kfree_rcu(del);
+
+out:
+ au_lcnt_dec(&br->br_count);
+ ii_read_unlock(root);
+ si_read_unlock(sb);
+ au_nwt_done(&sbi->si_nowait);
+ au_kfree_rcu(a);
+}
+
+/*
+ * create a new xino file asynchronously
+ */
+static int au_xino_new_async(struct super_block *sb, struct au_branch *br,
+ struct au_xi_calc *calc, ino_t ino)
+{
+ int err;
+ struct au_xino_do_new_async_args *arg;
+
+ err = -ENOMEM;
+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
+ if (unlikely(!arg))
+ goto out;
+
+ arg->sb = sb;
+ arg->br = br;
+ arg->calc = *calc;
+ arg->ino = ino;
+ au_lcnt_inc(&br->br_count);
+ err = au_wkq_nowait(au_xino_call_do_new_async, arg, sb, AuWkq_NEST);
+ if (unlikely(err)) {
+ pr_err("wkq %d\n", err);
+ au_lcnt_dec(&br->br_count);
+ au_kfree_rcu(arg);
+ }
+
+out:
+ return err;
+}
+
+/*
+ * read @ino from xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ */
+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+ ino_t *ino)
+{
+ int err;
+ ssize_t sz;
+ struct au_xi_calc calc;
+ struct au_sbinfo *sbinfo;
+ struct file *file;
+ struct au_xino *xi;
+ struct hlist_bl_head *hbl;
+ struct hlist_bl_node *pos;
+ struct au_xi_writing *p;
+
+ *ino = 0;
+ if (!au_opt_test(au_mntflags(sb), XINO))
+ return 0; /* no xino */
+
+ err = 0;
+ au_xi_calc(sb, h_ino, &calc);
+ xi = au_sbr(sb, bindex)->br_xino;
+ file = au_xino_file(xi, calc.idx);
+ if (!file) {
+ hbl = &xi->xi_writing;
+ hlist_bl_lock(hbl);
+ au_hbl_for_each(pos, hbl) {
+ p = container_of(pos, struct au_xi_writing, node);
+ if (p->h_ino == h_ino) {
+ AuDbg("hi%llu, i%llu, found\n",
+ (u64)p->h_ino, (u64)p->ino);
+ *ino = p->ino;
+ break;
+ }
+ }
+ hlist_bl_unlock(hbl);
+ return 0;
+ } else if (vfsub_f_size_read(file) < calc.pos + sizeof(*ino))
+ return 0; /* no xino */
+
+ sbinfo = au_sbi(sb);
+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &calc.pos);
+ if (sz == sizeof(*ino))
+ return 0; /* success */
+
+ err = sz;
+ if (unlikely(sz >= 0)) {
+ err = -EIO;
+ AuIOErr("xino read error (%zd)\n", sz);
+ }
+ return err;
+}
+
+static int au_xino_do_write(vfs_writef_t write, struct file *file,
+ struct au_xi_calc *calc, ino_t ino)
+{
+ ssize_t sz;
+
+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &calc->pos);
+ if (sz == sizeof(ino))
+ return 0; /* success */
+
+ AuIOErr("write failed (%zd)\n", sz);
+ return -EIO;
+}
+
+/*
+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
+ * at the position of @h_ino.
+ * even if @ino is zero, it is written to the xinofile and means no entry.
+ * if the size of the xino file on a specific filesystem exceeds the watermark,
+ * try truncating it.
+ */
+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+ ino_t ino)
+{
+ int err;
+ unsigned int mnt_flags;
+ struct au_xi_calc calc;
+ struct file *file;
+ struct au_branch *br;
+ struct au_xino *xi;
+ struct au_xi_writing *p;
+
+ SiMustAnyLock(sb);
+
+ mnt_flags = au_mntflags(sb);
+ if (!au_opt_test(mnt_flags, XINO))
+ return 0;
+
+ au_xi_calc(sb, h_ino, &calc);
+ br = au_sbr(sb, bindex);
+ xi = br->br_xino;
+ file = au_xino_file(xi, calc.idx);
+ if (!file) {
+ /* store the inum pair into the list */
+ p = kmalloc(sizeof(*p), GFP_NOFS | __GFP_NOFAIL);
+ p->h_ino = h_ino;
+ p->ino = ino;
+ au_hbl_add(&p->node, &xi->xi_writing);
+
+ /* create and write a new xino file asynchronously */
+ err = au_xino_new_async(sb, br, &calc, ino);
+ if (!err)
+ return 0; /* success */
+ goto out;
+ }
+
+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, file, &calc, ino);
+ if (!err) {
+ br = au_sbr(sb, bindex);
+ if (au_opt_test(mnt_flags, TRUNC_XINO)
+ && au_test_fs_trunc_xino(au_br_sb(br)))
+ xino_try_trunc(sb, br);
+ return 0; /* success */
+ }
+
+out:
+ AuIOErr("write failed (%d)\n", err);
+ return -EIO;
+}
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos);
+
+/* todo: unnecessary to support mmap_sem since kernel-space? */
+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
+ loff_t *pos)
+{
+ ssize_t err;
+ mm_segment_t oldfs;
+ union {
+ void *k;
+ char __user *u;
+ } buf;
+ int i;
+ const int prevent_endless = 10;
+
+ i = 0;
+ buf.k = kbuf;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ do {
+ err = func(file, buf.u, size, pos);
+ if (err == -EINTR
+ && !au_wkq_test()
+ && fatal_signal_pending(current)) {
+ set_fs(oldfs);
+ err = xino_fread_wkq(func, file, kbuf, size, pos);
+ BUG_ON(err == -EINTR);
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ }
+ } while (i++ < prevent_endless
+ && (err == -EAGAIN || err == -EINTR));
+ set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+ if (err > 0)
+ fsnotify_access(file->f_path.dentry);
+#endif
+
+ return err;
+}
+
+struct xino_fread_args {
+ ssize_t *errp;
+ vfs_readf_t func;
+ struct file *file;
+ void *buf;
+ size_t size;
+ loff_t *pos;
+};
+
+static void call_xino_fread(void *args)
+{
+ struct xino_fread_args *a = args;
+ *a->errp = xino_fread(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fread_wkq(vfs_readf_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+ int wkq_err;
+ struct xino_fread_args args = {
+ .errp = &err,
+ .func = func,
+ .file = file,
+ .buf = buf,
+ .size = size,
+ .pos = pos
+ };
+
+ wkq_err = au_wkq_wait(call_xino_fread, &args);
+ if (unlikely(wkq_err))
+ err = wkq_err;
+
+ return err;
+}
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos);
+
+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+ mm_segment_t oldfs;
+ union {
+ void *k;
+ const char __user *u;
+ } buf;
+ int i;
+ const int prevent_endless = 10;
+
+ i = 0;
+ buf.k = kbuf;
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ do {
+ err = func(file, buf.u, size, pos);
+ if (err == -EINTR
+ && !au_wkq_test()
+ && fatal_signal_pending(current)) {
+ set_fs(oldfs);
+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
+ BUG_ON(err == -EINTR);
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ }
+ } while (i++ < prevent_endless
+ && (err == -EAGAIN || err == -EINTR));
+ set_fs(oldfs);
+
+#if 0 /* reserved for future use */
+ if (err > 0)
+ fsnotify_modify(file->f_path.dentry);
+#endif
+
+ return err;
+}
+
+struct do_xino_fwrite_args {
+ ssize_t *errp;
+ vfs_writef_t func;
+ struct file *file;
+ void *buf;
+ size_t size;
+ loff_t *pos;
+};
+
+static void call_do_xino_fwrite(void *args)
+{
+ struct do_xino_fwrite_args *a = args;
+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
+}
+
+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+ int wkq_err;
+ struct do_xino_fwrite_args args = {
+ .errp = &err,
+ .func = func,
+ .file = file,
+ .buf = buf,
+ .size = size,
+ .pos = pos
+ };
+
+ /*
+ * it breaks RLIMIT_FSIZE and normal user's limit,
+ * users should care about quota and real 'filesystem full.'
+ */
+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
+ if (unlikely(wkq_err))
+ err = wkq_err;
+
+ return err;
+}
+
+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
+ size_t size, loff_t *pos)
+{
+ ssize_t err;
+
+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
+ lockdep_off();
+ err = do_xino_fwrite(func, file, buf, size, pos);
+ lockdep_on();
+ } else {
+ lockdep_off();
+ err = xino_fwrite_wkq(func, file, buf, size, pos);
+ lockdep_on();
+ }
+
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * inode number bitmap
+ */
+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
+static ino_t xib_calc_ino(unsigned long pindex, int bit)
+{
+ ino_t ino;
+
+ AuDebugOn(bit < 0 || page_bits <= bit);
+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
+ return ino;
+}
+
+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
+{
+ AuDebugOn(ino < AUFS_FIRST_INO);
+ ino -= AUFS_FIRST_INO;
+ *pindex = ino / page_bits;
+ *bit = ino % page_bits;
+}
+
+static int xib_pindex(struct super_block *sb, unsigned long pindex)
+{
+ int err;
+ loff_t pos;
+ ssize_t sz;
+ struct au_sbinfo *sbinfo;
+ struct file *xib;
+ unsigned long *p;
+
+ sbinfo = au_sbi(sb);
+ MtxMustLock(&sbinfo->si_xib_mtx);
+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
+ || !au_opt_test(sbinfo->si_mntflags, XINO));
+
+ if (pindex == sbinfo->si_xib_last_pindex)
+ return 0;
+
+ xib = sbinfo->si_xib;
+ p = sbinfo->si_xib_buf;
+ pos = sbinfo->si_xib_last_pindex;
+ pos *= PAGE_SIZE;
+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+ if (unlikely(sz != PAGE_SIZE))
+ goto out;
+
+ pos = pindex;
+ pos *= PAGE_SIZE;
+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
+ else {
+ memset(p, 0, PAGE_SIZE);
+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
+ }
+ if (sz == PAGE_SIZE) {
+ sbinfo->si_xib_last_pindex = pindex;
+ return 0; /* success */
+ }
+
+out:
+ AuIOErr1("write failed (%zd)\n", sz);
+ err = sz;
+ if (sz >= 0)
+ err = -EIO;
+ return err;
+}
+
+static void au_xib_clear_bit(struct inode *inode)
+{
+ int err, bit;
+ unsigned long pindex;
+ struct super_block *sb;
+ struct au_sbinfo *sbinfo;
+
+ AuDebugOn(inode->i_nlink);
+
+ sb = inode->i_sb;
+ xib_calc_bit(inode->i_ino, &pindex, &bit);
+ AuDebugOn(page_bits <= bit);
+ sbinfo = au_sbi(sb);
+ mutex_lock(&sbinfo->si_xib_mtx);
+ err = xib_pindex(sb, pindex);
+ if (!err) {
+ clear_bit(bit, sbinfo->si_xib_buf);
+ sbinfo->si_xib_next_bit = bit;
+ }
+ mutex_unlock(&sbinfo->si_xib_mtx);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * truncate a xino bitmap file
+ */
+
+/* todo: slow */
+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
+{
+ int err, bit;
+ ssize_t sz;
+ unsigned long pindex;
+ loff_t pos, pend;
+ struct au_sbinfo *sbinfo;
+ vfs_readf_t func;
+ ino_t *ino;
+ unsigned long *p;
+
+ err = 0;
+ sbinfo = au_sbi(sb);
+ MtxMustLock(&sbinfo->si_xib_mtx);
+ p = sbinfo->si_xib_buf;
+ func = sbinfo->si_xread;
+ pend = vfsub_f_size_read(file);
+ pos = 0;
+ while (pos < pend) {
+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
+ err = sz;
+ if (unlikely(sz <= 0))
+ goto out;
+
+ err = 0;
+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
+ if (unlikely(*ino < AUFS_FIRST_INO))
+ continue;
+
+ xib_calc_bit(*ino, &pindex, &bit);
+ AuDebugOn(page_bits <= bit);
+ err = xib_pindex(sb, pindex);
+ if (!err)
+ set_bit(bit, p);
+ else
+ goto out;
+ }
+ }
+
+out:
+ return err;
+}
+
+static int xib_restore(struct super_block *sb)
+{
+ int err, i;
+ unsigned int nfile;
+ aufs_bindex_t bindex, bbot;
+ void *page;
+ struct au_branch *br;
+ struct au_xino *xi;
+ struct file *file;
+
+ err = -ENOMEM;
+ page = (void *)__get_free_page(GFP_NOFS);
+ if (unlikely(!page))
+ goto out;
+
+ err = 0;
+ bbot = au_sbbot(sb);
+ for (bindex = 0; !err && bindex <= bbot; bindex++)
+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0) {
+ br = au_sbr(sb, bindex);
+ xi = br->br_xino;
+ nfile = xi->xi_nfile;
+ for (i = 0; i < nfile; i++) {
+ file = au_xino_file(xi, i);
+ if (file)
+ err = do_xib_restore(sb, file, page);
+ }
+ } else
+ AuDbg("skip shared b%d\n", bindex);
+ free_page((unsigned long)page);
+
+out:
+ return err;
+}
+
+int au_xib_trunc(struct super_block *sb)
+{
+ int err;
+ ssize_t sz;
+ loff_t pos;
+ struct au_sbinfo *sbinfo;
+ unsigned long *p;
+ struct file *file;
+
+ SiMustWriteLock(sb);
+
+ err = 0;
+ sbinfo = au_sbi(sb);
+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
+ goto out;
+
+ file = sbinfo->si_xib;
+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
+ goto out;
+
+ file = au_xino_create2(sb, &sbinfo->si_xib->f_path, NULL);
+ err = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out;
+ fput(sbinfo->si_xib);
+ sbinfo->si_xib = file;
+
+ p = sbinfo->si_xib_buf;
+ memset(p, 0, PAGE_SIZE);
+ pos = 0;
+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
+ if (unlikely(sz != PAGE_SIZE)) {
+ err = sz;
+ AuIOErr("err %d\n", err);
+ if (sz >= 0)
+ err = -EIO;
+ goto out;
+ }
+
+ mutex_lock(&sbinfo->si_xib_mtx);
+ /* mnt_want_write() is unnecessary here */
+ err = xib_restore(sb);
+ mutex_unlock(&sbinfo->si_xib_mtx);
+
+out:
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+struct au_xino *au_xino_alloc(unsigned int nfile)
+{
+ struct au_xino *xi;
+
+ xi = kzalloc(sizeof(*xi), GFP_NOFS);
+ if (unlikely(!xi))
+ goto out;
+ xi->xi_nfile = nfile;
+ xi->xi_file = kcalloc(nfile, sizeof(*xi->xi_file), GFP_NOFS);
+ if (unlikely(!xi->xi_file))
+ goto out_free;
+
+ xi->xi_nondir.total = 8; /* initial size */
+ xi->xi_nondir.array = kcalloc(xi->xi_nondir.total, sizeof(ino_t),
+ GFP_NOFS);
+ if (unlikely(!xi->xi_nondir.array))
+ goto out_file;
+
+ spin_lock_init(&xi->xi_nondir.spin);
+ init_waitqueue_head(&xi->xi_nondir.wqh);
+ mutex_init(&xi->xi_mtx);
+ INIT_HLIST_BL_HEAD(&xi->xi_writing);
+ atomic_set(&xi->xi_truncating, 0);
+ kref_init(&xi->xi_kref);
+ goto out; /* success */
+
+out_file:
+ au_kfree_try_rcu(xi->xi_file);
+out_free:
+ au_kfree_rcu(xi);
+ xi = NULL;
+out:
+ return xi;
+}
+
+static int au_xino_init(struct au_branch *br, int idx, struct file *file)
+{
+ int err;
+ struct au_xino *xi;
+
+ err = 0;
+ xi = au_xino_alloc(idx + 1);
+ if (unlikely(!xi)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (file)
+ get_file(file);
+ xi->xi_file[idx] = file;
+ AuDebugOn(br->br_xino);
+ br->br_xino = xi;
+
+out:
+ return err;
+}
+
+static void au_xino_release(struct kref *kref)
+{
+ struct au_xino *xi;
+ int i;
+ unsigned long ul;
+ struct hlist_bl_head *hbl;
+ struct hlist_bl_node *pos, *n;
+ struct au_xi_writing *p;
+
+ xi = container_of(kref, struct au_xino, xi_kref);
+ for (i = 0; i < xi->xi_nfile; i++)
+ if (xi->xi_file[i])
+ fput(xi->xi_file[i]);
+ for (i = xi->xi_nondir.total - 1; i >= 0; i--)
+ AuDebugOn(xi->xi_nondir.array[i]);
+ mutex_destroy(&xi->xi_mtx);
+ hbl = &xi->xi_writing;
+ ul = au_hbl_count(hbl);
+ if (unlikely(ul)) {
+ pr_warn("xi_writing %lu\n", ul);
+ hlist_bl_lock(hbl);
+ hlist_bl_for_each_entry_safe(p, pos, n, hbl, node) {
+ hlist_bl_del(&p->node);
+ /* kmemleak reported au_kfree_rcu() doesn't free it */
+ kfree(p);
+ }
+ hlist_bl_unlock(hbl);
+ }
+ au_kfree_try_rcu(xi->xi_file);
+ au_kfree_try_rcu(xi->xi_nondir.array);
+ au_kfree_rcu(xi);
+}
+
+int au_xino_put(struct au_branch *br)
+{
+ int ret;
+ struct au_xino *xi;
+
+ ret = 0;
+ xi = br->br_xino;
+ if (xi) {
+ br->br_xino = NULL;
+ ret = kref_put(&xi->xi_kref, au_xino_release);
+ }
+
+ return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * xino mount option handlers
+ */
+
+/* xino bitmap */
+static void xino_clear_xib(struct super_block *sb)
+{
+ struct au_sbinfo *sbinfo;
+
+ SiMustWriteLock(sb);
+
+ sbinfo = au_sbi(sb);
+ /* unnecessary to clear sbinfo->si_xread and ->si_xwrite */
+ if (sbinfo->si_xib)
+ fput(sbinfo->si_xib);
+ sbinfo->si_xib = NULL;
+ if (sbinfo->si_xib_buf)
+ free_page((unsigned long)sbinfo->si_xib_buf);
+ sbinfo->si_xib_buf = NULL;
+}
+
+static int au_xino_set_xib(struct super_block *sb, struct path *path)
+{
+ int err;
+ loff_t pos;
+ struct au_sbinfo *sbinfo;
+ struct file *file;
+ struct super_block *xi_sb;
+
+ SiMustWriteLock(sb);
+
+ sbinfo = au_sbi(sb);
+ file = au_xino_create2(sb, path, sbinfo->si_xib);
+ err = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out;
+ if (sbinfo->si_xib)
+ fput(sbinfo->si_xib);
+ sbinfo->si_xib = file;
+ sbinfo->si_xread = vfs_readf(file);
+ sbinfo->si_xwrite = vfs_writef(file);
+ xi_sb = file_inode(file)->i_sb;
+ sbinfo->si_ximaxent = xi_sb->s_maxbytes;
+ if (unlikely(sbinfo->si_ximaxent < PAGE_SIZE)) {
+ err = -EIO;
+ pr_err("s_maxbytes(%llu) on %s is too small\n",
+ (u64)sbinfo->si_ximaxent, au_sbtype(xi_sb));
+ goto out_unset;
+ }
+ sbinfo->si_ximaxent /= sizeof(ino_t);
+
+ err = -ENOMEM;
+ if (!sbinfo->si_xib_buf)
+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
+ if (unlikely(!sbinfo->si_xib_buf))
+ goto out_unset;
+
+ sbinfo->si_xib_last_pindex = 0;
+ sbinfo->si_xib_next_bit = 0;
+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
+ pos = 0;
+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
+ PAGE_SIZE, &pos);
+ if (unlikely(err != PAGE_SIZE))
+ goto out_free;
+ }
+ err = 0;
+ goto out; /* success */
+
+out_free:
+ if (sbinfo->si_xib_buf)
+ free_page((unsigned long)sbinfo->si_xib_buf);
+ sbinfo->si_xib_buf = NULL;
+ if (err >= 0)
+ err = -EIO;
+out_unset:
+ fput(sbinfo->si_xib);
+ sbinfo->si_xib = NULL;
+out:
+ AuTraceErr(err);
+ return err;
+}
+
+/* xino for each branch */
+static void xino_clear_br(struct super_block *sb)
+{
+ aufs_bindex_t bindex, bbot;
+ struct au_branch *br;
+
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
+ br = au_sbr(sb, bindex);
+ AuDebugOn(!br);
+ au_xino_put(br);
+ }
+}
+
+static void au_xino_set_br_shared(struct super_block *sb, struct au_branch *br,
+ aufs_bindex_t bshared)
+{
+ struct au_branch *brshared;
+
+ brshared = au_sbr(sb, bshared);
+ AuDebugOn(!brshared->br_xino);
+ AuDebugOn(!brshared->br_xino->xi_file);
+ if (br->br_xino != brshared->br_xino) {
+ au_xino_get(brshared);
+ au_xino_put(br);
+ br->br_xino = brshared->br_xino;
+ }
+}
+
+struct au_xino_do_set_br {
+ vfs_writef_t writef;
+ struct au_branch *br;
+ ino_t h_ino;
+ aufs_bindex_t bshared;
+};
+
+static int au_xino_do_set_br(struct super_block *sb, struct path *path,
+ struct au_xino_do_set_br *args)
+{
+ int err;
+ struct au_xi_calc calc;
+ struct file *file;
+ struct au_branch *br;
+ struct au_xi_new xinew = {
+ .base = path
+ };
+
+ br = args->br;
+ xinew.xi = br->br_xino;
+ au_xi_calc(sb, args->h_ino, &calc);
+ xinew.copy_src = au_xino_file(xinew.xi, calc.idx);
+ if (args->bshared >= 0)
+ /* shared xino */
+ au_xino_set_br_shared(sb, br, args->bshared);
+ else if (!xinew.xi) {
+ /* new xino */
+ err = au_xino_init(br, calc.idx, xinew.copy_src);
+ if (unlikely(err))
+ goto out;
+ }
+
+ /* force re-creating */
+ xinew.xi = br->br_xino;
+ xinew.idx = calc.idx;
+ mutex_lock(&xinew.xi->xi_mtx);
+ file = au_xi_new(sb, &xinew);
+ mutex_unlock(&xinew.xi->xi_mtx);
+ err = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out;
+ AuDebugOn(!file);
+
+ err = au_xino_do_write(args->writef, file, &calc, AUFS_ROOT_INO);
+ if (unlikely(err))
+ au_xino_put(br);
+
+out:
+ AuTraceErr(err);
+ return err;
+}
+
+static int au_xino_set_br(struct super_block *sb, struct path *path)
+{
+ int err;
+ aufs_bindex_t bindex, bbot;
+ struct au_xino_do_set_br args;
+ struct inode *inode;
+
+ SiMustWriteLock(sb);
+
+ bbot = au_sbbot(sb);
+ inode = d_inode(sb->s_root);
+ args.writef = au_sbi(sb)->si_xwrite;
+ for (bindex = 0; bindex <= bbot; bindex++) {
+ args.h_ino = au_h_iptr(inode, bindex)->i_ino;
+ args.br = au_sbr(sb, bindex);
+ args.bshared = is_sb_shared(sb, bindex, bindex - 1);
+ err = au_xino_do_set_br(sb, path, &args);
+ if (unlikely(err))
+ break;
+ }
+
+ AuTraceErr(err);
+ return err;
+}
+
+void au_xino_clr(struct super_block *sb)
+{
+ struct au_sbinfo *sbinfo;
+
+ au_xigen_clr(sb);
+ xino_clear_xib(sb);
+ xino_clear_br(sb);
+ dbgaufs_brs_del(sb, 0);
+ sbinfo = au_sbi(sb);
+ /* lvalue, do not call au_mntflags() */
+ au_opt_clr(sbinfo->si_mntflags, XINO);
+}
+
+int au_xino_set(struct super_block *sb, struct au_opt_xino *xiopt, int remount)
+{
+ int err, skip;
+ struct dentry *dentry, *parent, *cur_dentry, *cur_parent;
+ struct qstr *dname, *cur_name;
+ struct file *cur_xino;
+ struct au_sbinfo *sbinfo;
+ struct path *path, *cur_path;
+
+ SiMustWriteLock(sb);
+
+ err = 0;
+ sbinfo = au_sbi(sb);
+ path = &xiopt->file->f_path;
+ dentry = path->dentry;
+ parent = dget_parent(dentry);
+ if (remount) {
+ skip = 0;
+ cur_xino = sbinfo->si_xib;
+ if (cur_xino) {
+ cur_path = &cur_xino->f_path;
+ cur_dentry = cur_path->dentry;
+ cur_parent = dget_parent(cur_dentry);
+ cur_name = &cur_dentry->d_name;
+ dname = &dentry->d_name;
+ skip = (cur_parent == parent
+ && au_qstreq(dname, cur_name));
+ dput(cur_parent);
+ }
+ if (skip)
+ goto out;
+ }
+
+ au_opt_set(sbinfo->si_mntflags, XINO);
+ err = au_xino_set_xib(sb, path);
+ /* si_x{read,write} are set */
+ if (!err)
+ err = au_xigen_set(sb, path);
+ if (!err)
+ err = au_xino_set_br(sb, path);
+ if (!err) {
+ dbgaufs_brs_add(sb, 0, /*topdown*/1);
+ goto out; /* success */
+ }
+
+ /* reset all */
+ AuIOErr("failed setting xino(%d).\n", err);
+ au_xino_clr(sb);
+
+out:
+ dput(parent);
+ return err;
+}
+
+/*
+ * create a xinofile at the default place/path.
+ */
+struct file *au_xino_def(struct super_block *sb)
+{
+ struct file *file;
+ char *page, *p;
+ struct au_branch *br;
+ struct super_block *h_sb;
+ struct path path;
+ aufs_bindex_t bbot, bindex, bwr;
+
+ br = NULL;
+ bbot = au_sbbot(sb);
+ bwr = -1;
+ for (bindex = 0; bindex <= bbot; bindex++) {
+ br = au_sbr(sb, bindex);
+ if (au_br_writable(br->br_perm)
+ && !au_test_fs_bad_xino(au_br_sb(br))) {
+ bwr = bindex;
+ break;
+ }
+ }
+
+ if (bwr >= 0) {
+ file = ERR_PTR(-ENOMEM);
+ page = (void *)__get_free_page(GFP_NOFS);
+ if (unlikely(!page))
+ goto out;
+ path.mnt = au_br_mnt(br);
+ path.dentry = au_h_dptr(sb->s_root, bwr);
+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
+ file = (void *)p;
+ if (!IS_ERR(p)) {
+ strcat(p, "/" AUFS_XINO_FNAME);
+ AuDbg("%s\n", p);
+ file = au_xino_create(sb, p, /*silent*/0, /*wbrtop*/1);
+ }
+ free_page((unsigned long)page);
+ } else {
+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0,
+ /*wbrtop*/0);
+ if (IS_ERR(file))
+ goto out;
+ h_sb = file->f_path.dentry->d_sb;
+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
+ pr_err("xino doesn't support %s(%s)\n",
+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
+ fput(file);
+ file = ERR_PTR(-EINVAL);
+ }
+ }
+
+out:
+ return file;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * initialize the xinofile for the specified branch @br
+ * at the place/path where @base_file indicates.
+ * test whether another branch is on the same filesystem or not,
+ * if found then share the xinofile with another branch.
+ */
+int au_xino_init_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
+ struct path *base)
+{
+ int err;
+ struct au_xino_do_set_br args = {
+ .h_ino = h_ino,
+ .br = br
+ };
+
+ args.writef = au_sbi(sb)->si_xwrite;
+ args.bshared = sbr_find_shared(sb, /*btop*/0, au_sbbot(sb),
+ au_br_sb(br));
+ err = au_xino_do_set_br(sb, base, &args);
+ if (unlikely(err))
+ au_xino_put(br);
+
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/*
+ * get an unused inode number from bitmap
+ */
+ino_t au_xino_new_ino(struct super_block *sb)
+{
+ ino_t ino;
+ unsigned long *p, pindex, ul, pend;
+ struct au_sbinfo *sbinfo;
+ struct file *file;
+ int free_bit, err;
+
+ if (!au_opt_test(au_mntflags(sb), XINO))
+ return iunique(sb, AUFS_FIRST_INO);
+
+ sbinfo = au_sbi(sb);
+ mutex_lock(&sbinfo->si_xib_mtx);
+ p = sbinfo->si_xib_buf;
+ free_bit = sbinfo->si_xib_next_bit;
+ if (free_bit < page_bits && !test_bit(free_bit, p))
+ goto out; /* success */
+ free_bit = find_first_zero_bit(p, page_bits);
+ if (free_bit < page_bits)
+ goto out; /* success */
+
+ pindex = sbinfo->si_xib_last_pindex;
+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
+ err = xib_pindex(sb, ul);
+ if (unlikely(err))
+ goto out_err;
+ free_bit = find_first_zero_bit(p, page_bits);
+ if (free_bit < page_bits)
+ goto out; /* success */
+ }
+
+ file = sbinfo->si_xib;
+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
+ for (ul = pindex + 1; ul <= pend; ul++) {
+ err = xib_pindex(sb, ul);
+ if (unlikely(err))
+ goto out_err;
+ free_bit = find_first_zero_bit(p, page_bits);
+ if (free_bit < page_bits)
+ goto out; /* success */
+ }
+ BUG();
+
+out:
+ set_bit(free_bit, p);
+ sbinfo->si_xib_next_bit = free_bit + 1;
+ pindex = sbinfo->si_xib_last_pindex;
+ mutex_unlock(&sbinfo->si_xib_mtx);
+ ino = xib_calc_ino(pindex, free_bit);
+ AuDbg("i%lu\n", (unsigned long)ino);
+ return ino;
+out_err:
+ mutex_unlock(&sbinfo->si_xib_mtx);
+ AuDbg("i0\n");
+ return 0;
+}
+
+/* for s_op->delete_inode() */
+void au_xino_delete_inode(struct inode *inode, const int unlinked)
+{
+ int err;
+ unsigned int mnt_flags;
+ aufs_bindex_t bindex, bbot, bi;
+ unsigned char try_trunc;
+ struct au_iinfo *iinfo;
+ struct super_block *sb;
+ struct au_hinode *hi;
+ struct inode *h_inode;
+ struct au_branch *br;
+ vfs_writef_t xwrite;
+ struct au_xi_calc calc;
+ struct file *file;
+
+ AuDebugOn(au_is_bad_inode(inode));
+
+ sb = inode->i_sb;
+ mnt_flags = au_mntflags(sb);
+ if (!au_opt_test(mnt_flags, XINO)
+ || inode->i_ino == AUFS_ROOT_INO)
+ return;
+
+ if (unlinked) {
+ au_xigen_inc(inode);
+ au_xib_clear_bit(inode);
+ }
+
+ iinfo = au_ii(inode);
+ bindex = iinfo->ii_btop;
+ if (bindex < 0)
+ return;
+
+ xwrite = au_sbi(sb)->si_xwrite;
+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
+ hi = au_hinode(iinfo, bindex);
+ bbot = iinfo->ii_bbot;
+ for (; bindex <= bbot; bindex++, hi++) {
+ h_inode = hi->hi_inode;
+ if (!h_inode
+ || (!unlinked && h_inode->i_nlink))
+ continue;
+
+ /* inode may not be revalidated */
+ bi = au_br_index(sb, hi->hi_id);
+ if (bi < 0)
+ continue;
+
+ br = au_sbr(sb, bi);
+ au_xi_calc(sb, h_inode->i_ino, &calc);
+ file = au_xino_file(br->br_xino, calc.idx);
+ if (IS_ERR_OR_NULL(file))
+ continue;
+
+ err = au_xino_do_write(xwrite, file, &calc, /*ino*/0);
+ if (!err && try_trunc
+ && au_test_fs_trunc_xino(au_br_sb(br)))
+ xino_try_trunc(sb, br);
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int au_xinondir_find(struct au_xino *xi, ino_t h_ino)
+{
+ int found, total, i;
+
+ found = -1;
+ total = xi->xi_nondir.total;
+ for (i = 0; i < total; i++) {
+ if (xi->xi_nondir.array[i] != h_ino)
+ continue;
+ found = i;
+ break;
+ }
+
+ return found;
+}
+
+static int au_xinondir_expand(struct au_xino *xi)
+{
+ int err, sz;
+ ino_t *p;
+
+ BUILD_BUG_ON(KMALLOC_MAX_SIZE > INT_MAX);
+
+ err = -ENOMEM;
+ sz = xi->xi_nondir.total * sizeof(ino_t);
+ if (unlikely(sz > KMALLOC_MAX_SIZE / 2))
+ goto out;
+ p = au_kzrealloc(xi->xi_nondir.array, sz, sz << 1, GFP_ATOMIC,
+ /*may_shrink*/0);
+ if (p) {
+ xi->xi_nondir.array = p;
+ xi->xi_nondir.total <<= 1;
+ AuDbg("xi_nondir.total %d\n", xi->xi_nondir.total);
+ err = 0;
+ }
+
+out:
+ return err;
+}
+
+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
+ ino_t h_ino, int idx)
+{
+ struct au_xino *xi;
+
+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
+ xi = au_sbr(sb, bindex)->br_xino;
+ AuDebugOn(idx < 0 || xi->xi_nondir.total <= idx);
+
+ spin_lock(&xi->xi_nondir.spin);
+ AuDebugOn(xi->xi_nondir.array[idx] != h_ino);
+ xi->xi_nondir.array[idx] = 0;
+ spin_unlock(&xi->xi_nondir.spin);
+ wake_up_all(&xi->xi_nondir.wqh);
+}
+
+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
+ int *idx)
+{
+ int err, found, empty;
+ struct au_xino *xi;
+
+ err = 0;
+ *idx = -1;
+ if (!au_opt_test(au_mntflags(sb), XINO))
+ goto out; /* no xino */
+
+ xi = au_sbr(sb, bindex)->br_xino;
+
+again:
+ spin_lock(&xi->xi_nondir.spin);
+ found = au_xinondir_find(xi, h_ino);
+ if (found == -1) {
+ empty = au_xinondir_find(xi, /*h_ino*/0);
+ if (empty == -1) {
+ empty = xi->xi_nondir.total;
+ err = au_xinondir_expand(xi);
+ if (unlikely(err))
+ goto out_unlock;
+ }
+ xi->xi_nondir.array[empty] = h_ino;
+ *idx = empty;
+ } else {
+ spin_unlock(&xi->xi_nondir.spin);
+ wait_event(xi->xi_nondir.wqh,
+ xi->xi_nondir.array[found] != h_ino);
+ goto again;
+ }
+
+out_unlock:
+ spin_unlock(&xi->xi_nondir.spin);
+out:
+ return err;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int au_xino_path(struct seq_file *seq, struct file *file)
+{
+ int err;
+
+ err = au_seq_path(seq, &file->f_path);
+ if (unlikely(err))
+ goto out;
+
+#define Deleted "\\040(deleted)"
+ seq->count -= sizeof(Deleted) - 1;
+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
+ sizeof(Deleted) - 1));
+#undef Deleted
+
+out:
+ return err;
+}