aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ntfs/attrib.c8
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/slot_map.c46
-rw-r--r--fs/ocfs2/super.c21
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h11
-rw-r--r--fs/xfs/libxfs/xfs_types.h1
-rw-r--r--fs/xfs/xfs_buf_item.c60
-rw-r--r--fs/xfs/xfs_buf_item_recover.c1
-rw-r--r--fs/xfs/xfs_dquot_item.c2
-rw-r--r--fs/xfs/xfs_file.c81
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_inode_item.c4
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_inode_item_recover.c39
-rw-r--r--fs/xfs/xfs_log.c30
-rw-r--r--fs/xfs/xfs_log.h4
-rw-r--r--fs/xfs/xfs_log_cil.c32
-rw-r--r--fs/xfs/xfs_log_priv.h15
-rw-r--r--fs/xfs/xfs_log_recover.c5
-rw-r--r--fs/xfs/xfs_mount.c10
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_trans.h4
22 files changed, 205 insertions, 191 deletions
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index d563abc3e136..914e99173130 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name,
a = (ATTR_RECORD*)((u8*)ctx->attr +
le32_to_cpu(ctx->attr->length));
for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
- le32_to_cpu(ctx->mrec->bytes_allocated))
+ u8 *mrec_end = (u8 *)ctx->mrec +
+ le32_to_cpu(ctx->mrec->bytes_allocated);
+ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
+ a->name_length * sizeof(ntfschar);
+ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
+ name_end > mrec_end)
break;
ctx->attr = a;
if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7993d527edae..0a8cd8e59a92 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -279,7 +279,6 @@ enum ocfs2_mount_options
OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -675,8 +674,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
{
- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
+ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
}
static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 4da0e4b1e79b..8caeceeaeda7 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -254,16 +254,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
int i, ret = -ENOSPC;
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
- if (!si->si_slots[preferred].sl_valid ||
- !si->si_slots[preferred].sl_node_num) {
+ if (!si->si_slots[preferred].sl_valid) {
ret = preferred;
goto out;
}
}
for(i = 0; i < si->si_num_slots; i++) {
- if (!si->si_slots[i].sl_valid ||
- !si->si_slots[i].sl_node_num) {
+ if (!si->si_slots[i].sl_valid) {
ret = i;
break;
}
@@ -458,30 +456,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
- if (ocfs2_mount_local(osb))
- /* use slot 0 directly in local mode */
- slot = 0;
- else {
- /* search for ourselves first and take the slot if it already
- * exists. Perhaps we need to mark this in a variable for our
- * own journal recovery? Possibly not, though we certainly
- * need to warn to the user */
- slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ /* search for ourselves first and take the slot if it already
+ * exists. Perhaps we need to mark this in a variable for our
+ * own journal recovery? Possibly not, though we certainly
+ * need to warn to the user */
+ slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ if (slot < 0) {
+ /* if no slot yet, then just take 1st available
+ * one. */
+ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
if (slot < 0) {
- /* if no slot yet, then just take 1st available
- * one. */
- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
- if (slot < 0) {
- spin_unlock(&osb->osb_lock);
- mlog(ML_ERROR, "no free slots available!\n");
- status = -EINVAL;
- goto bail;
- }
- } else
- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
- "already allocated to this node!\n",
- slot, osb->dev_str);
- }
+ spin_unlock(&osb->osb_lock);
+ mlog(ML_ERROR, "no free slots available!\n");
+ status = -EINVAL;
+ goto bail;
+ }
+ } else
+ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
+ "allocated to this node!\n", slot, osb->dev_str);
ocfs2_set_slot(si, slot, osb->node_num);
osb->slot_num = slot;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 477ad05a34ea..c0e5f1bad499 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -175,7 +175,6 @@ enum {
Opt_dir_resv_level,
Opt_journal_async_commit,
Opt_err_cont,
- Opt_nocluster,
Opt_err,
};
@@ -209,7 +208,6 @@ static const match_table_t tokens = {
{Opt_dir_resv_level, "dir_resv_level=%u"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_err_cont, "errors=continue"},
- {Opt_nocluster, "nocluster"},
{Opt_err, NULL}
};
@@ -621,13 +619,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto out;
}
- tmp = OCFS2_MOUNT_NOCLUSTER;
- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
- ret = -EINVAL;
- mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
- goto out;
- }
-
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE;
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
@@ -868,7 +859,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
}
if (ocfs2_userspace_stack(osb) &&
- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
OCFS2_STACK_LABEL_LEN)) {
mlog(ML_ERROR,
@@ -1149,11 +1139,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
"ordered");
- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
- "without cluster aware mode.\n", osb->dev_str);
-
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
@@ -1460,9 +1445,6 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_journal_async_commit:
mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
break;
- case Opt_nocluster:
- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
- break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1574,9 +1556,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
seq_printf(s, ",journal_async_commit");
- if (opts & OCFS2_MOUNT_NOCLUSTER)
- seq_printf(s, ",nocluster");
-
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 8bd00da6d2a4..2f46ef3800aa 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -414,7 +414,16 @@ struct xfs_log_dinode {
/* start of the extended dinode, writable fields */
uint32_t di_crc; /* CRC of the inode */
uint64_t di_changecount; /* number of attribute changes */
- xfs_lsn_t di_lsn; /* flush sequence */
+
+ /*
+ * The LSN we write to this field during formatting is not a reflection
+ * of the current on-disk LSN. It should never be used for recovery
+ * sequencing, nor should it be recovered into the on-disk inode at all.
+ * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
+ * for details.
+ */
+ xfs_lsn_t di_lsn;
+
uint64_t di_flags2; /* more random flags */
uint32_t di_cowextsize; /* basic cow extent size for file */
uint8_t di_pad2[12]; /* more padding for future expansion */
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 397d94775440..1ce06173c2f5 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -21,6 +21,7 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */
typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */
typedef int64_t xfs_lsn_t; /* log sequence number */
+typedef int64_t xfs_csn_t; /* CIL sequence number */
typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
typedef uint32_t xfs_dahash_t; /* dir/attr hash value */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 8c6e26d62ef2..a3d5ecccfc2c 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -393,17 +393,8 @@ xfs_buf_item_pin(
}
/*
- * This is called to unpin the buffer associated with the buf log
- * item which was previously pinned with a call to xfs_buf_item_pin().
- *
- * Also drop the reference to the buf item for the current transaction.
- * If the XFS_BLI_STALE flag is set and we are the last reference,
- * then free up the buf log item and unlock the buffer.
- *
- * If the remove flag is set we are called from uncommit in the
- * forced-shutdown path. If that is true and the reference count on
- * the log item is going to drop to zero we need to free the item's
- * descriptor in the transaction.
+ * This is called to unpin the buffer associated with the buf log item which
+ * was previously pinned with a call to xfs_buf_item_pin().
*/
STATIC void
xfs_buf_item_unpin(
@@ -420,38 +411,35 @@ xfs_buf_item_unpin(
trace_xfs_buf_item_unpin(bip);
+ /*
+ * Drop the bli ref associated with the pin and grab the hold required
+ * for the I/O simulation failure in the abort case. We have to do this
+ * before the pin count drops because the AIL doesn't acquire a bli
+ * reference. Therefore if the refcount drops to zero, the bli could
+ * still be AIL resident and the buffer submitted for I/O (and freed on
+ * completion) at any point before we return. This can be removed once
+ * the AIL properly holds a reference on the bli.
+ */
freed = atomic_dec_and_test(&bip->bli_refcount);
-
+ if (freed && !stale && remove)
+ xfs_buf_hold(bp);
if (atomic_dec_and_test(&bp->b_pin_count))
wake_up_all(&bp->b_waiters);
- if (freed && stale) {
+ /* nothing to do but drop the pin count if the bli is active */
+ if (!freed)
+ return;
+
+ if (stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(xfs_buf_islocked(bp));
ASSERT(bp->b_flags & XBF_STALE);
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(list_empty(&lip->li_trans));
+ ASSERT(!bp->b_transp);
trace_xfs_buf_item_unpin_stale(bip);
- if (remove) {
- /*
- * If we are in a transaction context, we have to
- * remove the log item from the transaction as we are
- * about to release our reference to the buffer. If we
- * don't, the unlock that occurs later in
- * xfs_trans_uncommit() will try to reference the
- * buffer which we no longer have a hold on.
- */
- if (!list_empty(&lip->li_trans))
- xfs_trans_del_item(lip);
-
- /*
- * Since the transaction no longer refers to the buffer,
- * the buffer should no longer refer to the transaction.
- */
- bp->b_transp = NULL;
- }
-
/*
* If we get called here because of an IO error, we may or may
* not have the item on the AIL. xfs_trans_ail_delete() will
@@ -468,13 +456,13 @@ xfs_buf_item_unpin(
ASSERT(bp->b_log_item == NULL);
}
xfs_buf_relse(bp);
- } else if (freed && remove) {
+ } else if (remove) {
/*
* The buffer must be locked and held by the caller to simulate
- * an async I/O failure.
+ * an async I/O failure. We acquired the hold for this case
+ * before the buffer was unpinned.
*/
xfs_buf_lock(bp);
- xfs_buf_hold(bp);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioend_fail(bp);
}
@@ -632,7 +620,7 @@ xfs_buf_item_release(
STATIC void
xfs_buf_item_committing(
struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
+ xfs_csn_t seq)
{
return xfs_buf_item_release(lip);
}
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 1d649462d731..b374c9cee117 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn(
switch (magicda) {
case XFS_DIR3_LEAF1_MAGIC:
case XFS_DIR3_LEAFN_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
case XFS_DA3_NODE_MAGIC:
lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 8c1fdf37ee8f..8ed47b739b6c 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -188,7 +188,7 @@ xfs_qm_dquot_logitem_release(
STATIC void
xfs_qm_dquot_logitem_committing(
struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
+ xfs_csn_t seq)
{
return xfs_qm_dquot_logitem_release(lip);
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5b0f93f73837..4d6bf8d4974f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -118,6 +118,54 @@ xfs_dir_fsync(
return xfs_log_force_inode(ip);
}
+static xfs_csn_t
+xfs_fsync_seq(
+ struct xfs_inode *ip,
+ bool datasync)
+{
+ if (!xfs_ipincount(ip))
+ return 0;
+ if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
+ return 0;
+ return ip->i_itemp->ili_commit_seq;
+}
+
+/*
+ * All metadata updates are logged, which means that we just have to flush the
+ * log up to the latest LSN that touched the inode.
+ *
+ * If we have concurrent fsync/fdatasync() calls, we need them to all block on
+ * the log force before we clear the ili_fsync_fields field. This ensures that
+ * we don't get a racing sync operation that does not wait for the metadata to
+ * hit the journal before returning. If we race with clearing ili_fsync_fields,
+ * then all that will happen is the log force will do nothing as the lsn will
+ * already be on disk. We can't race with setting ili_fsync_fields because that
+ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
+ * shared until after the ili_fsync_fields is cleared.
+ */
+static int
+xfs_fsync_flush_log(
+ struct xfs_inode *ip,
+ bool datasync,
+ int *log_flushed)
+{
+ int error = 0;
+ xfs_csn_t seq;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ seq = xfs_fsync_seq(ip, datasync);
+ if (seq) {
+ error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
+ log_flushed);
+
+ spin_lock(&ip->i_itemp->ili_lock);
+ ip->i_itemp->ili_fsync_fields = 0;
+ spin_unlock(&ip->i_itemp->ili_lock);
+ }
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ return error;
+}
+
STATIC int
xfs_file_fsync(
struct file *file,
@@ -125,13 +173,10 @@ xfs_file_fsync(
loff_t end,
int datasync)
{
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_inode_log_item *iip = ip->i_itemp;
+ struct xfs_inode *ip = XFS_I(file->f_mapping->host);
struct xfs_mount *mp = ip->i_mount;
int error = 0;
int log_flushed = 0;
- xfs_lsn_t lsn = 0;
trace_xfs_file_fsync(ip);
@@ -155,33 +200,7 @@ xfs_file_fsync(
else if (mp->m_logdev_targp != mp->m_ddev_targp)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
- /*
- * All metadata updates are logged, which means that we just have to
- * flush the log up to the latest LSN that touched the inode. If we have
- * concurrent fsync/fdatasync() calls, we need them to all block on the
- * log force before we clear the ili_fsync_fields field. This ensures
- * that we don't get a racing sync operation that does not wait for the
- * metadata to hit the journal before returning. If we race with
- * clearing the ili_fsync_fields, then all that will happen is the log
- * force will do nothing as the lsn will already be on disk. We can't
- * race with setting ili_fsync_fields because that is done under
- * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
- * until after the ili_fsync_fields is cleared.
- */
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- if (xfs_ipincount(ip)) {
- if (!datasync ||
- (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
- lsn = iip->ili_last_lsn;
- }
-
- if (lsn) {
- error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
- spin_lock(&iip->ili_lock);
- iip->ili_fsync_fields = 0;
- spin_unlock(&iip->ili_lock);
- }
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
/*
* If we only have a single device, and the log force about was
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 03497741aef7..1f61e085676b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2754,7 +2754,7 @@ xfs_iunpin(
trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
/* Give the log a push to start the unpinning I/O */
- xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
+ xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
}
@@ -3716,16 +3716,16 @@ int
xfs_log_force_inode(
struct xfs_inode *ip)
{
- xfs_lsn_t lsn = 0;
+ xfs_csn_t seq = 0;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
- lsn = ip->i_itemp->ili_last_lsn;
+ seq = ip->i_itemp->ili_commit_seq;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (!lsn)
+ if (!seq)
return 0;
- return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
+ return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
}
/*
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 6ff91e5bf3cd..3aba4559469f 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -617,9 +617,9 @@ xfs_inode_item_committed(
STATIC void
xfs_inode_item_committing(
struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
+ xfs_csn_t seq)
{
- INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
+ INODE_ITEM(lip)->ili_commit_seq = seq;
return xfs_inode_item_release(lip);
}
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 4b926e32831c..403b45ab9aa2 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -33,7 +33,7 @@ struct xfs_inode_log_item {
unsigned int ili_fields; /* fields to be logged */
unsigned int ili_fsync_fields; /* logged since last fsync */
xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
- xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
+ xfs_csn_t ili_commit_seq; /* last transaction commit */
};
static inline int xfs_inode_clean(struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index cb44f7653f03..538724f9f85c 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
STATIC void
xfs_log_dinode_to_disk(
struct xfs_log_dinode *from,
- struct xfs_dinode *to)
+ struct xfs_dinode *to,
+ xfs_lsn_t lsn)
{
to->di_magic = cpu_to_be16(from->di_magic);
to->di_mode = cpu_to_be16(from->di_mode);
@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
to->di_flags2 = cpu_to_be64(from->di_flags2);
to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
to->di_ino = cpu_to_be64(from->di_ino);
- to->di_lsn = cpu_to_be64(from->di_lsn);
+ to->di_lsn = cpu_to_be64(lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
}
/*
- * If the inode has an LSN in it, recover the inode only if it's less
- * than the lsn of the transaction we are replaying. Note: we still
- * need to replay an owner change even though the inode is more recent
- * than the transaction as there is no guarantee that all the btree
- * blocks are more recent than this transaction, too.
+ * If the inode has an LSN in it, recover the inode only if the on-disk
+ * inode's LSN is older than the lsn of the transaction we are
+ * replaying. We can have multiple checkpoints with the same start LSN,
+ * so the current LSN being equal to the on-disk LSN doesn't necessarily
+ * mean that the on-disk inode is more recent than the change being
+ * replayed.
+ *
+ * We must check the current_lsn against the on-disk inode
+ * here because the we can't trust the log dinode to contain a valid LSN
+ * (see comment below before replaying the log dinode for details).
+ *
+ * Note: we still need to replay an owner change even though the inode
+ * is more recent than the transaction as there is no guarantee that all
+ * the btree blocks are more recent than this transaction, too.
*/
if (dip->di_version >= 3) {
xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
trace_xfs_log_recover_inode_skip(log, in_f);
error = 0;
goto out_owner_change;
@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
goto out_release;
}
- /* recover the log dinode inode into the on disk inode */
- xfs_log_dinode_to_disk(ldip, dip);
+ /*
+ * Recover the log dinode inode into the on disk inode.
+ *
+ * The LSN in the log dinode is garbage - it can be zero or reflect
+ * stale in-memory runtime state that isn't coherent with the changes
+ * logged in this transaction or the changes written to the on-disk
+ * inode. Hence we write the current lSN into the inode because that
+ * matches what xfs_iflush() would write inode the inode when flushing
+ * the changes in this transaction.
+ */
+ xfs_log_dinode_to_disk(ldip, dip, current_lsn);
fields = in_f->ilf_fields;
if (fields & XFS_ILOG_DEV)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b445e63cbc3c..22d7d74231d4 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -765,6 +765,9 @@ xfs_log_mount_finish(
if (readonly)
mp->m_flags |= XFS_MOUNT_RDONLY;
+ /* Make sure the log is dead if we're returning failure. */
+ ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR));
+
return error;
}
@@ -3210,14 +3213,13 @@ out_error:
}
static int
-__xfs_log_force_lsn(
- struct xfs_mount *mp,
+xlog_force_lsn(
+ struct xlog *log,
xfs_lsn_t lsn,
uint flags,
int *log_flushed,
bool already_slept)
{
- struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
spin_lock(&log->l_icloglock);
@@ -3250,8 +3252,6 @@ __xfs_log_force_lsn(
if (!already_slept &&
(iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
- XFS_STATS_INC(mp, xs_log_force_sleep);
-
xlog_wait(&iclog->ic_prev->ic_write_wait,
&log->l_icloglock);
return -EAGAIN;
@@ -3289,25 +3289,29 @@ out_error:
* to disk, that thread will wake up all threads waiting on the queue.
*/
int
-xfs_log_force_lsn(
+xfs_log_force_seq(
struct xfs_mount *mp,
- xfs_lsn_t lsn,
+ xfs_csn_t seq,
uint flags,
int *log_flushed)
{
+ struct xlog *log = mp->m_log;
+ xfs_lsn_t lsn;
int ret;
- ASSERT(lsn != 0);
+ ASSERT(seq != 0);
XFS_STATS_INC(mp, xs_log_force);
- trace_xfs_log_force(mp, lsn, _RET_IP_);
+ trace_xfs_log_force(mp, seq, _RET_IP_);
- lsn = xlog_cil_force_lsn(mp->m_log, lsn);
+ lsn = xlog_cil_force_seq(log, seq);
if (lsn == NULLCOMMITLSN)
return 0;
- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
- if (ret == -EAGAIN)
- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
+ ret = xlog_force_lsn(log, lsn, flags, log_flushed, false);
+ if (ret == -EAGAIN) {
+ XFS_STATS_INC(mp, xs_log_force_sleep);
+ ret = xlog_force_lsn(log, lsn, flags, log_flushed, true);
+ }
return ret;
}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 98c913da7587..a1089f8b7169 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -106,7 +106,7 @@ struct xfs_item_ops;
struct xfs_trans;
int xfs_log_force(struct xfs_mount *mp, uint flags);
-int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
+int xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags,
int *log_forced);
int xfs_log_mount(struct xfs_mount *mp,
struct xfs_buftarg *log_target,
@@ -132,8 +132,6 @@ bool xfs_log_writable(struct xfs_mount *mp);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
-void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn, bool regrant);
void xlog_cil_process_committed(struct list_head *list);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index cd5c04dabe2e..fbe160d5e9b9 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -777,7 +777,7 @@ xlog_cil_push_work(
* that higher sequences will wait for us to write out a commit record
* before they do.
*
- * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+ * xfs_log_force_seq requires us to mirror the new sequence into the cil
* structure atomically with the addition of this sequence to the
* committing list. This also ensures that we can do unlocked checks
* against the current sequence in log forces without risking
@@ -1020,16 +1020,14 @@ xlog_cil_empty(
* allowed again.
*/
void
-xfs_log_commit_cil(
- struct xfs_mount *mp,
+xlog_cil_commit(
+ struct xlog *log,
struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn,
+ xfs_csn_t *commit_seq,
bool regrant)
{
- struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
struct xfs_log_item *lip, *next;
- xfs_lsn_t xc_commit_lsn;
/*
* Do all necessary memory allocation before we lock the CIL.
@@ -1043,10 +1041,6 @@ xfs_log_commit_cil(
xlog_cil_insert_items(log, tp);
- xc_commit_lsn = cil->xc_ctx->sequence;
- if (commit_lsn)
- *commit_lsn = xc_commit_lsn;
-
if (regrant && !XLOG_FORCED_SHUTDOWN(log))
xfs_log_ticket_regrant(log, tp->t_ticket);
else
@@ -1069,8 +1063,10 @@ xfs_log_commit_cil(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
if (lip->li_ops->iop_committing)
- lip->li_ops->iop_committing(lip, xc_commit_lsn);
+ lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence);
}
+ if (commit_seq)
+ *commit_seq = cil->xc_ctx->sequence;
/* xlog_cil_push_background() releases cil->xc_ctx_lock */
xlog_cil_push_background(log);
@@ -1087,9 +1083,9 @@ xfs_log_commit_cil(
* iclog flush is necessary following this call.
*/
xfs_lsn_t
-xlog_cil_force_lsn(
+xlog_cil_force_seq(
struct xlog *log,
- xfs_lsn_t sequence)
+ xfs_csn_t sequence)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx;
@@ -1183,23 +1179,19 @@ out_shutdown:
*/
bool
xfs_log_item_in_current_chkpt(
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip)
{
- struct xfs_cil_ctx *ctx;
+ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp;
if (list_empty(&lip->li_cil))
return false;
- ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
-
/*
* li_seq is written on the first commit of a log item to record the
* first checkpoint it is written to. Hence if it is different to the
* current sequence, we're in a new checkpoint.
*/
- if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
- return false;
- return true;
+ return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
}
/*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 1c6fdbf3d506..42cd1602ac25 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -230,7 +230,7 @@ struct xfs_cil;
struct xfs_cil_ctx {
struct xfs_cil *cil;
- xfs_lsn_t sequence; /* chkpt sequence # */
+ xfs_csn_t sequence; /* chkpt sequence # */
xfs_lsn_t start_lsn; /* first LSN of chkpt commit */
xfs_lsn_t commit_lsn; /* chkpt commit record lsn */
struct xlog_ticket *ticket; /* chkpt ticket */
@@ -268,10 +268,10 @@ struct xfs_cil {
struct xfs_cil_ctx *xc_ctx;
spinlock_t xc_push_lock ____cacheline_aligned_in_smp;
- xfs_lsn_t xc_push_seq;
+ xfs_csn_t xc_push_seq;
struct list_head xc_committing;
wait_queue_head_t xc_commit_wait;
- xfs_lsn_t xc_current_sequence;
+ xfs_csn_t xc_current_sequence;
struct work_struct xc_push_work;
wait_queue_head_t xc_push_wait; /* background push throttle */
} ____cacheline_aligned_in_smp;
@@ -547,19 +547,18 @@ int xlog_cil_init(struct xlog *log);
void xlog_cil_init_post_recovery(struct xlog *log);
void xlog_cil_destroy(struct xlog *log);
bool xlog_cil_empty(struct xlog *log);
+void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp,
+ xfs_csn_t *commit_seq, bool regrant);
/*
* CIL force routines
*/
-xfs_lsn_t
-xlog_cil_force_lsn(
- struct xlog *log,
- xfs_lsn_t sequence);
+xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence);
static inline void
xlog_cil_force(struct xlog *log)
{
- xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
+ xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence);
}
/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 87886b7f77da..69408782019e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2457,8 +2457,10 @@ xlog_finish_defer_ops(
error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,
dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);
- if (error)
+ if (error) {
+ xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
return error;
+ }
/*
* Transfer to this new transaction all the dfops we captured
@@ -3454,6 +3456,7 @@ xlog_recover_finish(
* this) before we get around to xfs_log_mount_cancel.
*/
xlog_recover_cancel_intents(log);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
xfs_alert(log->l_mp, "Failed to recover intents");
return error;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 44b05e1d5d32..a2a5a0fd9233 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -968,9 +968,17 @@ xfs_mountfs(
/*
* Finish recovering the file system. This part needed to be delayed
* until after the root and real-time bitmap inodes were consistently
- * read in.
+ * read in. Temporarily create per-AG space reservations for metadata
+ * btree shape changes because space freeing transactions (for inode
+ * inactivation) require the per-AG reservation in lieu of reserving
+ * blocks.
*/
+ error = xfs_fs_reserve_ag_blocks(mp);
+ if (error && error == -ENOSPC)
+ xfs_warn(mp,
+ "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
error = xfs_log_mount_finish(mp);
+ xfs_fs_unreserve_ag_blocks(mp);
if (error) {
xfs_warn(mp, "log mount finish failed");
goto out_rtunmount;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 36166bae24a6..73a1de7ceefc 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -832,7 +832,7 @@ __xfs_trans_commit(
bool regrant)
{
struct xfs_mount *mp = tp->t_mountp;
- xfs_lsn_t commit_lsn = -1;
+ xfs_csn_t commit_seq = 0;
int error = 0;
int sync = tp->t_flags & XFS_TRANS_SYNC;
@@ -874,7 +874,7 @@ __xfs_trans_commit(
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
- xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
+ xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant);
xfs_trans_free(tp);
@@ -883,7 +883,7 @@ __xfs_trans_commit(
* log out now and wait for it.
*/
if (sync) {
- error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
+ error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);
XFS_STATS_INC(mp, xs_trans_sync);
} else {
XFS_STATS_INC(mp, xs_trans_async);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 075eeade4f7d..97485559008b 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -43,7 +43,7 @@ struct xfs_log_item {
struct list_head li_cil; /* CIL pointers */
struct xfs_log_vec *li_lv; /* active log vector */
struct xfs_log_vec *li_lv_shadow; /* standby vector */
- xfs_lsn_t li_seq; /* CIL commit seq */
+ xfs_csn_t li_seq; /* CIL commit seq */
};
/*
@@ -69,7 +69,7 @@ struct xfs_item_ops {
void (*iop_pin)(struct xfs_log_item *);
void (*iop_unpin)(struct xfs_log_item *, int remove);
uint (*iop_push)(struct xfs_log_item *, struct list_head *);
- void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
+ void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq);
void (*iop_release)(struct xfs_log_item *);
xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
int (*iop_recover)(struct xfs_log_item *lip,