aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c110
1 files changed, 64 insertions, 46 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5ed84d6c7059..cd81d6d9848d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1772,10 +1772,31 @@ xfs_inactive_ifree(
return error;
}
+ /*
+ * We do not hold the inode locked across the entire rolling transaction
+ * here. We only need to hold it for the first transaction that
+ * xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
+ * underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
+ * here breaks the relationship between cluster buffer invalidation and
+ * stale inode invalidation on cluster buffer item journal commit
+ * completion, and can result in leaving dirty stale inodes hanging
+ * around in memory.
+ *
+ * We have no need for serialising this inode operation against other
+ * operations - we freed the inode and hence reallocation is required
+ * and that will serialise on reallocating the space the deferops need
+ * to free. Hence we can unlock the inode on the first commit of
+ * the transaction rather than roll it right through the deferops. This
+ * avoids relogging the XFS_ISTALE inode.
+ *
+ * We check that xfs_ifree() hasn't grown an internal transaction roll
+ * by asserting that the inode is still locked when it returns.
+ */
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
error = xfs_ifree(tp, ip);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (error) {
/*
* If we fail to free the inode, shut down. The cancel
@@ -1788,7 +1809,6 @@ xfs_inactive_ifree(
xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
}
xfs_trans_cancel(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
@@ -1806,7 +1826,6 @@ xfs_inactive_ifree(
xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
__func__, error);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
return 0;
}
@@ -2949,7 +2968,8 @@ xfs_rename(
spaceres);
/*
- * Set up the target.
+ * Check for expected errors before we dirty the transaction
+ * so we can return an error without a transaction abort.
*/
if (target_ip == NULL) {
/*
@@ -2961,6 +2981,46 @@ xfs_rename(
if (error)
goto out_trans_cancel;
}
+ } else {
+ /*
+ * If target exists and it's a directory, check that whether
+ * it can be destroyed.
+ */
+ if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
+ (!xfs_dir_isempty(target_ip) ||
+ (VFS_I(target_ip)->i_nlink > 2))) {
+ error = -EEXIST;
+ goto out_trans_cancel;
+ }
+ }
+
+ /*
+ * Directory entry creation below may acquire the AGF. Remove
+ * the whiteout from the unlinked list first to preserve correct
+ * AGI/AGF locking order. This dirties the transaction so failures
+ * after this point will abort and log recovery will clean up the
+ * mess.
+ *
+ * For whiteouts, we need to bump the link count on the whiteout
+ * inode. After this point, we have a real link, clear the tmpfile
+ * state flag from the inode so it doesn't accidentally get misused
+ * in future.
+ */
+ if (wip) {
+ ASSERT(VFS_I(wip)->i_nlink == 0);
+ error = xfs_iunlink_remove(tp, wip);
+ if (error)
+ goto out_trans_cancel;
+
+ xfs_bumplink(tp, wip);
+ xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
+ VFS_I(wip)->i_state &= ~I_LINKABLE;
+ }
+
+ /*
+ * Set up the target.
+ */
+ if (target_ip == NULL) {
/*
* If target does not exist and the rename crosses
* directories, adjust the target directory link count
@@ -2981,22 +3041,6 @@ xfs_rename(
}
} else { /* target_ip != NULL */
/*
- * If target exists and it's a directory, check that both
- * target and source are directories and that target can be
- * destroyed, or that neither is a directory.
- */
- if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
- /*
- * Make sure target dir is empty.
- */
- if (!(xfs_dir_isempty(target_ip)) ||
- (VFS_I(target_ip)->i_nlink > 2)) {
- error = -EEXIST;
- goto out_trans_cancel;
- }
- }
-
- /*
* Link the source inode under the target name.
* If the source inode is a directory and we are moving
* it across directories, its ".." entry will be
@@ -3086,32 +3130,6 @@ xfs_rename(
if (error)
goto out_trans_cancel;
- /*
- * For whiteouts, we need to bump the link count on the whiteout inode.
- * This means that failures all the way up to this point leave the inode
- * on the unlinked list and so cleanup is a simple matter of dropping
- * the remaining reference to it. If we fail here after bumping the link
- * count, we're shutting down the filesystem so we'll never see the
- * intermediate state on disk.
- */
- if (wip) {
- ASSERT(VFS_I(wip)->i_nlink == 0);
- error = xfs_bumplink(tp, wip);
- if (error)
- goto out_trans_cancel;
- error = xfs_iunlink_remove(tp, wip);
- if (error)
- goto out_trans_cancel;
- xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
-
- /*
- * Now we have a real link, clear the "I'm a tmpfile" state
- * flag from the inode so it doesn't accidentally get misused in
- * future.
- */
- VFS_I(wip)->i_state &= ~I_LINKABLE;
- }
-
xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
if (new_parent)