aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/libxfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/libxfs')
-rw-r--r--fs/xfs/libxfs/xfs_ag.c91
-rw-r--r--fs/xfs/libxfs/xfs_ag.h18
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c24
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c262
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c191
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.h10
-rw-r--r--fs/xfs/libxfs/xfs_attr.c234
-rw-r--r--fs/xfs/libxfs/xfs_attr.h46
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c176
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h4
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c129
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h8
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c732
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h32
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c152
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.h5
-rw-r--r--fs/xfs/libxfs/xfs_btree.c1078
-rw-r--r--fs/xfs/libxfs/xfs_btree.h274
-rw-r--r--fs/xfs/libxfs/xfs_btree_mem.c347
-rw-r--r--fs/xfs/libxfs/xfs_btree_mem.h75
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.c133
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.h10
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c248
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h34
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h48
-rw-r--r--fs/xfs/libxfs/xfs_defer.c37
-rw-r--r--fs/xfs/libxfs/xfs_defer.h10
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c332
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h36
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c50
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c21
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c103
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c51
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h15
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c16
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h4
-rw-r--r--fs/xfs/libxfs/xfs_exchmaps.c1235
-rw-r--r--fs/xfs/libxfs/xfs_exchmaps.h124
-rw-r--r--fs/xfs/libxfs/xfs_format.h55
-rw-r--r--fs/xfs/libxfs/xfs_fs.h164
-rw-r--r--fs/xfs/libxfs/xfs_health.h99
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c288
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h5
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c157
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h11
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c26
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c20
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c102
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h7
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h93
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h4
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c46
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h6
-rw-r--r--fs/xfs/libxfs/xfs_parent.c379
-rw-r--r--fs/xfs/libxfs/xfs_parent.h110
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c69
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c78
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.h2
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c284
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h31
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c231
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.h8
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c68
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h17
-rw-r--r--fs/xfs/libxfs/xfs_sb.c51
-rw-r--r--fs/xfs/libxfs/xfs_sb.h5
-rw-r--r--fs/xfs/libxfs/xfs_shared.h73
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c205
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.h28
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c6
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c326
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.c121
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h29
-rw-r--r--fs/xfs/libxfs/xfs_types.h26
76 files changed, 7194 insertions, 2434 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 39d9525270b7..240e079cb3fb 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -194,7 +194,7 @@ xfs_initialize_perag_data(
pag = xfs_perag_get(mp, index);
error = xfs_alloc_read_agf(pag, NULL, 0, NULL);
if (!error)
- error = xfs_ialloc_read_agi(pag, NULL, NULL);
+ error = xfs_ialloc_read_agi(pag, NULL, 0, NULL);
if (error) {
xfs_perag_put(pag);
return error;
@@ -217,6 +217,7 @@ xfs_initialize_perag_data(
*/
if (fdblocks > sbp->sb_dblocks || ifree > ialloc) {
xfs_alert(mp, "AGF corruption. Please run xfs_repair.");
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
error = -EFSCORRUPTED;
goto out;
}
@@ -241,7 +242,7 @@ __xfs_free_perag(
struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
- kmem_free(pag);
+ kfree(pag);
}
/*
@@ -263,7 +264,7 @@ xfs_free_perag(
xfs_defer_drain_free(&pag->pag_intents_drain);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
- xfs_buf_hash_destroy(pag);
+ xfs_buf_cache_destroy(&pag->pag_bcache);
/* drop the mount's active reference */
xfs_perag_rele(pag);
@@ -351,9 +352,9 @@ xfs_free_unused_perag_range(
spin_unlock(&mp->m_perag_lock);
if (!pag)
break;
- xfs_buf_hash_destroy(pag);
+ xfs_buf_cache_destroy(&pag->pag_bcache);
xfs_defer_drain_free(&pag->pag_intents_drain);
- kmem_free(pag);
+ kfree(pag);
}
}
@@ -381,7 +382,7 @@ xfs_initialize_perag(
continue;
}
- pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
+ pag = kzalloc(sizeof(*pag), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!pag) {
error = -ENOMEM;
goto out_unwind_new_pags;
@@ -389,7 +390,7 @@ xfs_initialize_perag(
pag->pag_agno = index;
pag->pag_mount = mp;
- error = radix_tree_preload(GFP_NOFS);
+ error = radix_tree_preload(GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (error)
goto out_free_pag;
@@ -416,9 +417,10 @@ xfs_initialize_perag(
init_waitqueue_head(&pag->pag_active_wq);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
+ xfs_hooks_init(&pag->pag_rmap_update_hooks);
#endif /* __KERNEL__ */
- error = xfs_buf_hash_init(pag);
+ error = xfs_buf_cache_init(&pag->pag_bcache);
if (error)
goto out_remove_pag;
@@ -453,7 +455,7 @@ out_remove_pag:
radix_tree_delete(&mp->m_perag_tree, index);
spin_unlock(&mp->m_perag_lock);
out_free_pag:
- kmem_free(pag);
+ kfree(pag);
out_unwind_new_pags:
/* unwind any prior newly initialized pags */
xfs_free_unused_perag_range(mp, first_initialised, agcount);
@@ -491,7 +493,7 @@ xfs_btroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
+ xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno);
}
/* Finish initializing a free space btree. */
@@ -549,7 +551,7 @@ xfs_freesp_init_recs(
}
/*
- * Alloc btree root block init functions
+ * bnobt/cntbt btree root block init functions
*/
static void
xfs_bnoroot_init(
@@ -557,17 +559,7 @@ xfs_bnoroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
- xfs_freesp_init_recs(mp, bp, id);
-}
-
-static void
-xfs_cntroot_init(
- struct xfs_mount *mp,
- struct xfs_buf *bp,
- struct aghdr_init_data *id)
-{
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
+ xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id);
}
@@ -583,7 +575,7 @@ xfs_rmaproot_init(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_rmap_rec *rrec;
- xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
+ xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 4, id->agno);
/*
* mark the AG header regions as static metadata The BNO
@@ -678,14 +670,13 @@ xfs_agfblock_init(
agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
agf->agf_seqno = cpu_to_be32(id->agno);
agf->agf_length = cpu_to_be32(id->agsize);
- agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
- agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
- agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
- agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
+ agf->agf_bno_root = cpu_to_be32(XFS_BNO_BLOCK(mp));
+ agf->agf_cnt_root = cpu_to_be32(XFS_CNT_BLOCK(mp));
+ agf->agf_bno_level = cpu_to_be32(1);
+ agf->agf_cnt_level = cpu_to_be32(1);
if (xfs_has_rmapbt(mp)) {
- agf->agf_roots[XFS_BTNUM_RMAPi] =
- cpu_to_be32(XFS_RMAP_BLOCK(mp));
- agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+ agf->agf_rmap_root = cpu_to_be32(XFS_RMAP_BLOCK(mp));
+ agf->agf_rmap_level = cpu_to_be32(1);
agf->agf_rmap_blocks = cpu_to_be32(1);
}
@@ -796,7 +787,7 @@ struct xfs_aghdr_grow_data {
size_t numblks;
const struct xfs_buf_ops *ops;
aghdr_init_work_f work;
- xfs_btnum_t type;
+ const struct xfs_btree_ops *bc_ops;
bool need_init;
};
@@ -850,13 +841,15 @@ xfs_ag_init_headers(
.numblks = BTOBB(mp->m_sb.sb_blocksize),
.ops = &xfs_bnobt_buf_ops,
.work = &xfs_bnoroot_init,
+ .bc_ops = &xfs_bnobt_ops,
.need_init = true
},
{ /* CNT root block */
.daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)),
.numblks = BTOBB(mp->m_sb.sb_blocksize),
.ops = &xfs_cntbt_buf_ops,
- .work = &xfs_cntroot_init,
+ .work = &xfs_bnoroot_init,
+ .bc_ops = &xfs_cntbt_ops,
.need_init = true
},
{ /* INO root block */
@@ -864,7 +857,7 @@ xfs_ag_init_headers(
.numblks = BTOBB(mp->m_sb.sb_blocksize),
.ops = &xfs_inobt_buf_ops,
.work = &xfs_btroot_init,
- .type = XFS_BTNUM_INO,
+ .bc_ops = &xfs_inobt_ops,
.need_init = true
},
{ /* FINO root block */
@@ -872,7 +865,7 @@ xfs_ag_init_headers(
.numblks = BTOBB(mp->m_sb.sb_blocksize),
.ops = &xfs_finobt_buf_ops,
.work = &xfs_btroot_init,
- .type = XFS_BTNUM_FINO,
+ .bc_ops = &xfs_finobt_ops,
.need_init = xfs_has_finobt(mp)
},
{ /* RMAP root block */
@@ -880,6 +873,7 @@ xfs_ag_init_headers(
.numblks = BTOBB(mp->m_sb.sb_blocksize),
.ops = &xfs_rmapbt_buf_ops,
.work = &xfs_rmaproot_init,
+ .bc_ops = &xfs_rmapbt_ops,
.need_init = xfs_has_rmapbt(mp)
},
{ /* REFC root block */
@@ -887,7 +881,7 @@ xfs_ag_init_headers(
.numblks = BTOBB(mp->m_sb.sb_blocksize),
.ops = &xfs_refcountbt_buf_ops,
.work = &xfs_btroot_init,
- .type = XFS_BTNUM_REFC,
+ .bc_ops = &xfs_refcountbt_ops,
.need_init = xfs_has_reflink(mp)
},
{ /* NULL terminating block */
@@ -905,7 +899,7 @@ xfs_ag_init_headers(
id->daddr = dp->daddr;
id->numblks = dp->numblks;
- id->type = dp->type;
+ id->bc_ops = dp->bc_ops;
error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops);
if (error)
break;
@@ -937,7 +931,7 @@ xfs_ag_shrink_space(
int error, err2;
ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1);
- error = xfs_ialloc_read_agi(pag, *tpp, &agibp);
+ error = xfs_ialloc_read_agi(pag, *tpp, 0, &agibp);
if (error)
return error;
@@ -950,8 +944,10 @@ xfs_ag_shrink_space(
agf = agfbp->b_addr;
aglen = be32_to_cpu(agi->agi_length);
/* some extra paranoid checks before we shrink the ag */
- if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length))
+ if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
return -EFSCORRUPTED;
+ }
if (delta >= aglen)
return -EINVAL;
@@ -967,9 +963,7 @@ xfs_ag_shrink_space(
* Disable perag reservations so it doesn't cause the allocation request
* to fail. We'll reestablish reservation before we return.
*/
- error = xfs_ag_resv_free(pag);
- if (error)
- return error;
+ xfs_ag_resv_free(pag);
/* internal log shouldn't also show up in the free space btrees */
error = xfs_alloc_vextent_exact_bno(&args,
@@ -979,14 +973,23 @@ xfs_ag_shrink_space(
if (error) {
/*
- * if extent allocation fails, need to roll the transaction to
+ * If extent allocation fails, need to roll the transaction to
* ensure that the AGFL fixup has been committed anyway.
+ *
+ * We need to hold the AGF across the roll to ensure nothing can
+ * access the AG for allocation until the shrink is fully
+ * cleaned up. And due to the resetting of the AG block
+ * reservation space needing to lock the AGI, we also have to
+ * hold that so we don't get AGI/AGF lock order inversions in
+ * the error handling path.
*/
xfs_trans_bhold(*tpp, agfbp);
+ xfs_trans_bhold(*tpp, agibp);
err2 = xfs_trans_roll(tpp);
if (err2)
return err2;
xfs_trans_bjoin(*tpp, agfbp);
+ xfs_trans_bjoin(*tpp, agibp);
goto resv_init_out;
}
@@ -1057,7 +1060,7 @@ xfs_ag_extend_space(
ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1);
- error = xfs_ialloc_read_agi(pag, tp, &bp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &bp);
if (error)
return error;
@@ -1114,7 +1117,7 @@ xfs_ag_get_geometry(
int error;
/* Lock the AG headers. */
- error = xfs_ialloc_read_agi(pag, NULL, &agi_bp);
+ error = xfs_ialloc_read_agi(pag, NULL, 0, &agi_bp);
if (error)
return error;
error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp);
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 4b343c4fac28..35de09a2516c 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -36,8 +36,9 @@ struct xfs_perag {
atomic_t pag_active_ref; /* active reference count */
wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */
unsigned long pag_opstate;
- uint8_t pagf_levels[XFS_BTNUM_AGF];
- /* # of levels in bno & cnt btree */
+ uint8_t pagf_bno_level; /* # of levels in bno btree */
+ uint8_t pagf_cnt_level; /* # of levels in cnt btree */
+ uint8_t pagf_rmap_level;/* # of levels in rmap btree */
uint32_t pagf_flcount; /* count of blocks in freelist */
xfs_extlen_t pagf_freeblks; /* total free blocks */
xfs_extlen_t pagf_longest; /* longest free space */
@@ -86,8 +87,10 @@ struct xfs_perag {
* Alternate btree heights so that online repair won't trip the write
* verifiers while rebuilding the AG btrees.
*/
- uint8_t pagf_repair_levels[XFS_BTNUM_AGF];
+ uint8_t pagf_repair_bno_level;
+ uint8_t pagf_repair_cnt_level;
uint8_t pagf_repair_refcount_level;
+ uint8_t pagf_repair_rmap_level;
#endif
spinlock_t pag_state_lock;
@@ -104,9 +107,7 @@ struct xfs_perag {
int pag_ici_reclaimable; /* reclaimable inodes */
unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
- /* buffer cache index */
- spinlock_t pag_buf_lock; /* lock for pag_buf_hash */
- struct rhashtable pag_buf_hash;
+ struct xfs_buf_cache pag_bcache;
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
@@ -119,6 +120,9 @@ struct xfs_perag {
* inconsistencies.
*/
struct xfs_defer_drain pag_intents_drain;
+
+ /* Hook to feed rmapbt updates to an active online repair. */
+ struct xfs_hooks pag_rmap_update_hooks;
#endif /* __KERNEL__ */
};
@@ -331,7 +335,7 @@ struct aghdr_init_data {
/* per header data */
xfs_daddr_t daddr; /* header location */
size_t numblks; /* size of header */
- xfs_btnum_t type; /* type of btree root block */
+ const struct xfs_btree_ops *bc_ops; /* btree ops */
};
int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index da1057bd0e60..216423df939e 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -126,14 +126,13 @@ xfs_ag_resv_needed(
}
/* Clean out a reservation */
-static int
+static void
__xfs_ag_resv_free(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
struct xfs_ag_resv *resv;
xfs_extlen_t oldresv;
- int error;
trace_xfs_ag_resv_free(pag, type, 0);
@@ -149,30 +148,19 @@ __xfs_ag_resv_free(
oldresv = resv->ar_orig_reserved;
else
oldresv = resv->ar_reserved;
- error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
+ xfs_add_fdblocks(pag->pag_mount, oldresv);
resv->ar_reserved = 0;
resv->ar_asked = 0;
resv->ar_orig_reserved = 0;
-
- if (error)
- trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
- error, _RET_IP_);
- return error;
}
/* Free a per-AG reservation. */
-int
+void
xfs_ag_resv_free(
struct xfs_perag *pag)
{
- int error;
- int err2;
-
- error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
- err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
- if (err2 && !error)
- error = err2;
- return error;
+ __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
+ __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
}
static int
@@ -216,7 +204,7 @@ __xfs_ag_resv_init(
if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL))
error = -ENOSPC;
else
- error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
+ error = xfs_dec_fdblocks(mp, hidden_space, true);
if (error) {
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
index b74b210008ea..ff20ed93de77 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.h
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -6,7 +6,7 @@
#ifndef __XFS_AG_RESV_H__
#define __XFS_AG_RESV_H__
-int xfs_ag_resv_free(struct xfs_perag *pag);
+void xfs_ag_resv_free(struct xfs_perag *pag);
int xfs_ag_resv_init(struct xfs_perag *pag, struct xfs_trans *tp);
bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3bd0a33fee0a..6cb8b2ddc541 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -26,6 +26,7 @@
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_bmap.h"
+#include "xfs_health.h"
struct kmem_cache *xfs_extfree_item_cache;
@@ -78,7 +79,7 @@ xfs_prealloc_blocks(
}
/*
- * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
+ * The number of blocks per AG that we withhold from xfs_dec_fdblocks to
* guarantee that we can refill the AGFL prior to allocating space in a nearly
* full AG. Although the space described by the free space btrees, the
* blocks used by the freesp btrees themselves, and the blocks owned by the
@@ -88,7 +89,7 @@ xfs_prealloc_blocks(
* until the fs goes down, we subtract this many AG blocks from the incore
* fdblocks to ensure user allocation does not overcommit the space the
* filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
- * withhold space from xfs_mod_fdblocks, so we do not account for that here.
+ * withhold space from xfs_dec_fdblocks, so we do not account for that here.
*/
#define XFS_ALLOCBT_AGFL_RESERVE 4
@@ -150,23 +151,38 @@ xfs_alloc_ag_max_usable(
return mp->m_sb.sb_agblocks - blocks;
}
+
+static int
+xfs_alloc_lookup(
+ struct xfs_btree_cur *cur,
+ xfs_lookup_t dir,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ int *stat)
+{
+ int error;
+
+ cur->bc_rec.a.ar_startblock = bno;
+ cur->bc_rec.a.ar_blockcount = len;
+ error = xfs_btree_lookup(cur, dir, stat);
+ if (*stat == 1)
+ cur->bc_flags |= XFS_BTREE_ALLOCBT_ACTIVE;
+ else
+ cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE;
+ return error;
+}
+
/*
* Lookup the record equal to [bno, len] in the btree given by cur.
*/
-STATIC int /* error */
+static inline int /* error */
xfs_alloc_lookup_eq(
struct xfs_btree_cur *cur, /* btree cursor */
xfs_agblock_t bno, /* starting block of extent */
xfs_extlen_t len, /* length of extent */
int *stat) /* success/failure */
{
- int error;
-
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
- cur->bc_ag.abt.active = (*stat == 1);
- return error;
+ return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, bno, len, stat);
}
/*
@@ -180,13 +196,7 @@ xfs_alloc_lookup_ge(
xfs_extlen_t len, /* length of extent */
int *stat) /* success/failure */
{
- int error;
-
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
- cur->bc_ag.abt.active = (*stat == 1);
- return error;
+ return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, bno, len, stat);
}
/*
@@ -200,19 +210,14 @@ xfs_alloc_lookup_le(
xfs_extlen_t len, /* length of extent */
int *stat) /* success/failure */
{
- int error;
- cur->bc_rec.a.ar_startblock = bno;
- cur->bc_rec.a.ar_blockcount = len;
- error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
- cur->bc_ag.abt.active = (*stat == 1);
- return error;
+ return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, bno, len, stat);
}
static inline bool
xfs_alloc_cur_active(
struct xfs_btree_cur *cur)
{
- return cur && cur->bc_ag.abt.active;
+ return cur && (cur->bc_flags & XFS_BTREE_ALLOCBT_ACTIVE);
}
/*
@@ -268,12 +273,12 @@ xfs_alloc_complain_bad_rec(
struct xfs_mount *mp = cur->bc_mp;
xfs_warn(mp,
- "%s Freespace BTree record corruption in AG %d detected at %pS!",
- cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
- cur->bc_ag.pag->pag_agno, fa);
+ "%sbt record corruption in AG %d detected at %pS!",
+ cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"start block 0x%x block count 0x%x", irec->ar_startblock,
irec->ar_blockcount);
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@@ -497,14 +502,18 @@ xfs_alloc_fixup_trees(
if (XFS_IS_CORRUPT(mp,
i != 1 ||
nfbno1 != fbno ||
- nflen1 != flen))
+ nflen1 != flen)) {
+ xfs_btree_mark_sick(cnt_cur);
return -EFSCORRUPTED;
+ }
#endif
} else {
if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
return -EFSCORRUPTED;
+ }
}
/*
* Look up the record in the by-block tree if necessary.
@@ -516,14 +525,18 @@ xfs_alloc_fixup_trees(
if (XFS_IS_CORRUPT(mp,
i != 1 ||
nfbno1 != fbno ||
- nflen1 != flen))
+ nflen1 != flen)) {
+ xfs_btree_mark_sick(bno_cur);
return -EFSCORRUPTED;
+ }
#endif
} else {
if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
return -EFSCORRUPTED;
+ }
}
#ifdef DEBUG
@@ -536,8 +549,10 @@ xfs_alloc_fixup_trees(
if (XFS_IS_CORRUPT(mp,
bnoblock->bb_numrecs !=
- cntblock->bb_numrecs))
+ cntblock->bb_numrecs)) {
+ xfs_btree_mark_sick(bno_cur);
return -EFSCORRUPTED;
+ }
}
#endif
@@ -567,30 +582,40 @@ xfs_alloc_fixup_trees(
*/
if ((error = xfs_btree_delete(cnt_cur, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
return -EFSCORRUPTED;
+ }
/*
* Add new by-size btree entry(s).
*/
if (nfbno1 != NULLAGBLOCK) {
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 0))
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cnt_cur);
return -EFSCORRUPTED;
+ }
if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
return -EFSCORRUPTED;
+ }
}
if (nfbno2 != NULLAGBLOCK) {
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 0))
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cnt_cur);
return -EFSCORRUPTED;
+ }
if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
return -EFSCORRUPTED;
+ }
}
/*
* Fix up the by-block btree entry(s).
@@ -601,8 +626,10 @@ xfs_alloc_fixup_trees(
*/
if ((error = xfs_btree_delete(bno_cur, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
return -EFSCORRUPTED;
+ }
} else {
/*
* Update the by-block entry to start later|be shorter.
@@ -616,12 +643,16 @@ xfs_alloc_fixup_trees(
*/
if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 0))
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(bno_cur);
return -EFSCORRUPTED;
+ }
if ((error = xfs_btree_insert(bno_cur, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
return -EFSCORRUPTED;
+ }
}
return 0;
}
@@ -755,6 +786,8 @@ xfs_alloc_read_agfl(
mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
if (error)
return error;
xfs_buf_set_ref(bp, XFS_AGFL_REF);
@@ -776,6 +809,7 @@ xfs_alloc_update_counters(
if (unlikely(be32_to_cpu(agf->agf_freeblks) >
be32_to_cpu(agf->agf_length))) {
xfs_buf_mark_corrupt(agbp);
+ xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
return -EFSCORRUPTED;
}
@@ -828,8 +862,8 @@ xfs_alloc_cur_setup(
* attempt a small allocation.
*/
if (!acur->cnt)
- acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
- args->agbp, args->pag, XFS_BTNUM_CNT);
+ acur->cnt = xfs_cntbt_init_cursor(args->mp, args->tp,
+ args->agbp, args->pag);
error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
if (error)
return error;
@@ -838,11 +872,11 @@ xfs_alloc_cur_setup(
* Allocate the bnobt left and right search cursors.
*/
if (!acur->bnolt)
- acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
- args->agbp, args->pag, XFS_BTNUM_BNO);
+ acur->bnolt = xfs_bnobt_init_cursor(args->mp, args->tp,
+ args->agbp, args->pag);
if (!acur->bnogt)
- acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
- args->agbp, args->pag, XFS_BTNUM_BNO);
+ acur->bnogt = xfs_bnobt_init_cursor(args->mp, args->tp,
+ args->agbp, args->pag);
return i == 1 ? 0 : -ENOSPC;
}
@@ -884,15 +918,17 @@ xfs_alloc_cur_check(
bool busy;
unsigned busy_gen = 0;
bool deactivate = false;
- bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
+ bool isbnobt = xfs_btree_is_bno(cur->bc_ops);
*new = 0;
error = xfs_alloc_get_rec(cur, &bno, &len, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(args->mp, i != 1))
+ if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
/*
* Check minlen and deactivate a cntbt cursor if out of acceptable size
@@ -958,9 +994,8 @@ xfs_alloc_cur_check(
deactivate = true;
out:
if (deactivate)
- cur->bc_ag.abt.active = false;
- trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
- *new);
+ cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE;
+ trace_xfs_alloc_cur_check(cur, bno, len, diff, *new);
return 0;
}
@@ -1098,6 +1133,7 @@ xfs_alloc_ag_vextent_small(
if (error)
goto error;
if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ xfs_btree_mark_sick(ccur);
error = -EFSCORRUPTED;
goto error;
}
@@ -1132,6 +1168,7 @@ xfs_alloc_ag_vextent_small(
*fbnop = args->agbno = fbno;
*flenp = args->len = 1;
if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
+ xfs_btree_mark_sick(ccur);
error = -EFSCORRUPTED;
goto error;
}
@@ -1197,8 +1234,8 @@ xfs_alloc_ag_vextent_exact(
/*
* Allocate/initialize a cursor for the by-number freespace btree.
*/
- bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->pag, XFS_BTNUM_BNO);
+ bno_cur = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp,
+ args->pag);
/*
* Lookup bno and minlen in the btree (minlen is irrelevant, really).
@@ -1218,6 +1255,7 @@ xfs_alloc_ag_vextent_exact(
if (error)
goto error0;
if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1257,8 +1295,8 @@ xfs_alloc_ag_vextent_exact(
* We are allocating agbno for args->len
* Allocate/initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->pag, XFS_BTNUM_CNT);
+ cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->pag);
ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
args->len, XFSA_FIXUP_BNO_OK);
@@ -1330,7 +1368,7 @@ xfs_alloc_walk_iter(
if (error)
return error;
if (i == 0)
- cur->bc_ag.abt.active = false;
+ cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE;
if (count > 0)
count--;
@@ -1444,7 +1482,7 @@ xfs_alloc_ag_vextent_locality(
if (error)
return error;
if (i) {
- acur->cnt->bc_ag.abt.active = true;
+ acur->cnt->bc_flags |= XFS_BTREE_ALLOCBT_ACTIVE;
fbcur = acur->cnt;
fbinc = false;
}
@@ -1497,8 +1535,10 @@ xfs_alloc_ag_vextent_lastblock(
error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(args->mp, i != 1))
+ if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ xfs_btree_mark_sick(acur->cnt);
return -EFSCORRUPTED;
+ }
if (*len >= args->minlen)
break;
error = xfs_btree_increment(acur->cnt, 0, &i);
@@ -1670,8 +1710,8 @@ restart:
/*
* Allocate and initialize a cursor for the by-size btree.
*/
- cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->pag, XFS_BTNUM_CNT);
+ cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp,
+ args->pag);
bno_cur = NULL;
/*
@@ -1710,6 +1750,7 @@ restart:
if (error)
goto error0;
if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1756,6 +1797,7 @@ restart:
rlen != 0 &&
(rlen > flen ||
rbno + rlen > fbno + flen))) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1778,6 +1820,7 @@ restart:
&i)))
goto error0;
if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1790,6 +1833,7 @@ restart:
rlen != 0 &&
(rlen > flen ||
rbno + rlen > fbno + flen))) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1806,6 +1850,7 @@ restart:
&i)))
goto error0;
if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1844,14 +1889,15 @@ restart:
rlen = args->len;
if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
/*
* Allocate and initialize a cursor for the by-block tree.
*/
- bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->pag, XFS_BTNUM_BNO);
+ bno_cur = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp,
+ args->pag);
if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
rbno, rlen, XFSA_FIXUP_CNT_OK)))
goto error0;
@@ -1863,6 +1909,7 @@ restart:
if (XFS_IS_CORRUPT(args->mp,
args->agbno + args->len >
be32_to_cpu(agf->agf_length))) {
+ xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1924,7 +1971,7 @@ xfs_free_ag_extent(
/*
* Allocate and initialize a cursor for the by-block btree.
*/
- bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
+ bno_cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
/*
* Look for a neighboring block on the left (lower block numbers)
* that is contiguous with this space.
@@ -1938,6 +1985,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1953,6 +2001,7 @@ xfs_free_ag_extent(
* Very bad.
*/
if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1971,6 +2020,7 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1986,6 +2036,7 @@ xfs_free_ag_extent(
* Very bad.
*/
if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1994,7 +2045,7 @@ xfs_free_ag_extent(
/*
* Now allocate and initialize a cursor for the by-size tree.
*/
- cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
+ cnt_cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag);
/*
* Have both left and right contiguous neighbors.
* Merge all three into a single free block.
@@ -2006,12 +2057,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2021,12 +2074,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2036,6 +2091,7 @@ xfs_free_ag_extent(
if ((error = xfs_btree_delete(bno_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2045,6 +2101,7 @@ xfs_free_ag_extent(
if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2064,6 +2121,7 @@ xfs_free_ag_extent(
i != 1 ||
xxbno != ltbno ||
xxlen != ltlen)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2088,12 +2146,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2104,6 +2164,7 @@ xfs_free_ag_extent(
if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2123,12 +2184,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2151,6 +2214,7 @@ xfs_free_ag_extent(
if ((error = xfs_btree_insert(bno_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bno_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2163,12 +2227,14 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2267,8 +2333,9 @@ xfs_alloc_min_freelist(
struct xfs_perag *pag)
{
/* AG btrees have at least 1 level. */
- static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
- const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
+ const unsigned int bno_level = pag ? pag->pagf_bno_level : 1;
+ const unsigned int cnt_level = pag ? pag->pagf_cnt_level : 1;
+ const unsigned int rmap_level = pag ? pag->pagf_rmap_level : 1;
unsigned int min_free;
ASSERT(mp->m_alloc_maxlevels > 0);
@@ -2295,16 +2362,12 @@ xfs_alloc_min_freelist(
*/
/* space needed by-bno freespace btree */
- min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
- mp->m_alloc_maxlevels) * 2 - 2;
+ min_free = min(bno_level + 1, mp->m_alloc_maxlevels) * 2 - 2;
/* space needed by-size freespace btree */
- min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
- mp->m_alloc_maxlevels) * 2 - 2;
+ min_free += min(cnt_level + 1, mp->m_alloc_maxlevels) * 2 - 2;
/* space needed reverse mapping used space btree */
if (xfs_has_rmapbt(mp))
- min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
- mp->m_rmap_maxlevels) * 2 - 2;
-
+ min_free += min(rmap_level + 1, mp->m_rmap_maxlevels) * 2 - 2;
return min_free;
}
@@ -2691,13 +2754,14 @@ xfs_exact_minlen_extent_available(
xfs_extlen_t flen;
int error = 0;
- cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
- args->pag, XFS_BTNUM_CNT);
+ cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, agbp,
+ args->pag);
error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
if (error)
goto out;
if (*stat == 0) {
+ xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto out;
}
@@ -2987,8 +3051,8 @@ xfs_alloc_log_agf(
offsetof(xfs_agf_t, agf_versionnum),
offsetof(xfs_agf_t, agf_seqno),
offsetof(xfs_agf_t, agf_length),
- offsetof(xfs_agf_t, agf_roots[0]),
- offsetof(xfs_agf_t, agf_levels[0]),
+ offsetof(xfs_agf_t, agf_bno_root), /* also cnt/rmap root */
+ offsetof(xfs_agf_t, agf_bno_level), /* also cnt/rmap levels */
offsetof(xfs_agf_t, agf_flfirst),
offsetof(xfs_agf_t, agf_fllast),
offsetof(xfs_agf_t, agf_flcount),
@@ -3167,12 +3231,10 @@ xfs_agf_verify(
be32_to_cpu(agf->agf_freeblks) > agf_length)
return __this_address;
- if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
- mp->m_alloc_maxlevels ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
- mp->m_alloc_maxlevels)
+ if (be32_to_cpu(agf->agf_bno_level) < 1 ||
+ be32_to_cpu(agf->agf_cnt_level) < 1 ||
+ be32_to_cpu(agf->agf_bno_level) > mp->m_alloc_maxlevels ||
+ be32_to_cpu(agf->agf_cnt_level) > mp->m_alloc_maxlevels)
return __this_address;
if (xfs_has_lazysbcount(mp) &&
@@ -3183,9 +3245,8 @@ xfs_agf_verify(
if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
return __this_address;
- if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
- mp->m_rmap_maxlevels)
+ if (be32_to_cpu(agf->agf_rmap_level) < 1 ||
+ be32_to_cpu(agf->agf_rmap_level) > mp->m_rmap_maxlevels)
return __this_address;
}
@@ -3268,6 +3329,8 @@ xfs_read_agf(
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
if (error)
return error;
@@ -3309,12 +3372,9 @@ xfs_alloc_read_agf(
pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
pag->pagf_longest = be32_to_cpu(agf->agf_longest);
- pag->pagf_levels[XFS_BTNUM_BNOi] =
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
- pag->pagf_levels[XFS_BTNUM_CNTi] =
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
- pag->pagf_levels[XFS_BTNUM_RMAPi] =
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
+ pag->pagf_bno_level = be32_to_cpu(agf->agf_bno_level);
+ pag->pagf_cnt_level = be32_to_cpu(agf->agf_cnt_level);
+ pag->pagf_rmap_level = be32_to_cpu(agf->agf_rmap_level);
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
if (xfs_agfl_needs_reset(pag->pag_mount, agf))
set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
@@ -3343,10 +3403,8 @@ xfs_alloc_read_agf(
ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
- ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
- ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
+ ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level));
+ ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level));
}
#endif
if (agfbpp)
@@ -3895,17 +3953,23 @@ __xfs_free_extent(
return -EIO;
error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
- if (error)
+ if (error) {
+ if (xfs_metadata_is_sick(error))
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
return error;
+ }
+
agf = agbp->b_addr;
if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
error = -EFSCORRUPTED;
goto err_release;
}
/* validate the extent size is legal now we have the agf locked */
if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
error = -EFSCORRUPTED;
goto err_release;
}
@@ -3962,7 +4026,7 @@ xfs_alloc_query_range(
union xfs_btree_irec high_brec = { .a = *high_rec };
struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
- ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ ASSERT(xfs_btree_is_bno(cur->bc_ops));
return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_alloc_query_range_helper, &query);
}
@@ -3976,7 +4040,7 @@ xfs_alloc_query_all(
{
struct xfs_alloc_query_range_info query;
- ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
+ ASSERT(xfs_btree_is_bno(cur->bc_ops));
query.priv = priv;
query.fn = fn;
return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index a7032bf0cd37..6ef5ddd89600 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -16,6 +16,7 @@
#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
#include "xfs_error.h"
+#include "xfs_health.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_ag.h"
@@ -23,13 +24,22 @@
static struct kmem_cache *xfs_allocbt_cur_cache;
STATIC struct xfs_btree_cur *
-xfs_allocbt_dup_cursor(
+xfs_bnobt_dup_cursor(
struct xfs_btree_cur *cur)
{
- return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp,
- cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum);
+ return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
+ cur->bc_ag.pag);
}
+STATIC struct xfs_btree_cur *
+xfs_cntbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
+ cur->bc_ag.pag);
+}
+
+
STATIC void
xfs_allocbt_set_root(
struct xfs_btree_cur *cur,
@@ -38,13 +48,18 @@ xfs_allocbt_set_root(
{
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
- int btnum = cur->bc_btnum;
ASSERT(ptr->s != 0);
- agf->agf_roots[btnum] = ptr->s;
- be32_add_cpu(&agf->agf_levels[btnum], inc);
- cur->bc_ag.pag->pagf_levels[btnum] += inc;
+ if (xfs_btree_is_bno(cur->bc_ops)) {
+ agf->agf_bno_root = ptr->s;
+ be32_add_cpu(&agf->agf_bno_level, inc);
+ cur->bc_ag.pag->pagf_bno_level += inc;
+ } else {
+ agf->agf_cnt_root = ptr->s;
+ be32_add_cpu(&agf->agf_cnt_level, inc);
+ cur->bc_ag.pag->pagf_cnt_level += inc;
+ }
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
@@ -116,7 +131,7 @@ xfs_allocbt_update_lastrec(
__be32 len;
int numrecs;
- ASSERT(cur->bc_btnum == XFS_BTNUM_CNT);
+ ASSERT(!xfs_btree_is_bno(cur->bc_ops));
switch (reason) {
case LASTREC_UPDATE:
@@ -226,7 +241,10 @@ xfs_allocbt_init_ptr_from_cur(
ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
- ptr->s = agf->agf_roots[cur->bc_btnum];
+ if (xfs_btree_is_bno(cur->bc_ops))
+ ptr->s = agf->agf_bno_root;
+ else
+ ptr->s = agf->agf_cnt_root;
}
STATIC int64_t
@@ -299,13 +317,12 @@ xfs_allocbt_verify(
struct xfs_perag *pag = bp->b_pag;
xfs_failaddr_t fa;
unsigned int level;
- xfs_btnum_t btnum = XFS_BTNUM_BNOi;
if (!xfs_verify_magic(bp, block->bb_magic))
return __this_address;
if (xfs_has_crc(mp)) {
- fa = xfs_btree_sblock_v5hdr_verify(bp);
+ fa = xfs_btree_agblock_v5hdr_verify(bp);
if (fa)
return fa;
}
@@ -320,26 +337,32 @@ xfs_allocbt_verify(
* against.
*/
level = be16_to_cpu(block->bb_level);
- if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC))
- btnum = XFS_BTNUM_CNTi;
if (pag && xfs_perag_initialised_agf(pag)) {
- unsigned int maxlevel = pag->pagf_levels[btnum];
+ unsigned int maxlevel, repair_maxlevel = 0;
-#ifdef CONFIG_XFS_ONLINE_REPAIR
/*
* Online repair could be rewriting the free space btrees, so
* we'll validate against the larger of either tree while this
* is going on.
*/
- maxlevel = max_t(unsigned int, maxlevel,
- pag->pagf_repair_levels[btnum]);
+ if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) {
+ maxlevel = pag->pagf_cnt_level;
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+ repair_maxlevel = pag->pagf_repair_cnt_level;
+#endif
+ } else {
+ maxlevel = pag->pagf_bno_level;
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+ repair_maxlevel = pag->pagf_repair_bno_level;
#endif
- if (level >= maxlevel)
+ }
+
+ if (level >= max(maxlevel, repair_maxlevel))
return __this_address;
} else if (level >= mp->m_alloc_maxlevels)
return __this_address;
- return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
+ return xfs_btree_agblock_verify(bp, mp->m_alloc_mxr[level != 0]);
}
static void
@@ -348,7 +371,7 @@ xfs_allocbt_read_verify(
{
xfs_failaddr_t fa;
- if (!xfs_btree_sblock_verify_crc(bp))
+ if (!xfs_btree_agblock_verify_crc(bp))
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_allocbt_verify(bp);
@@ -372,7 +395,7 @@ xfs_allocbt_write_verify(
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
- xfs_btree_sblock_calc_crc(bp);
+ xfs_btree_agblock_calc_crc(bp);
}
@@ -454,11 +477,19 @@ xfs_allocbt_keys_contiguous(
be32_to_cpu(key2->alloc.ar_startblock));
}
-static const struct xfs_btree_ops xfs_bnobt_ops = {
+const struct xfs_btree_ops xfs_bnobt_ops = {
+ .name = "bno",
+ .type = XFS_BTREE_TYPE_AG,
+
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
+ .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
- .dup_cursor = xfs_allocbt_dup_cursor,
+ .lru_refs = XFS_ALLOC_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_abtb_2),
+ .sick_mask = XFS_SICK_AG_BNOBT,
+
+ .dup_cursor = xfs_bnobt_dup_cursor,
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
@@ -477,11 +508,20 @@ static const struct xfs_btree_ops xfs_bnobt_ops = {
.keys_contiguous = xfs_allocbt_keys_contiguous,
};
-static const struct xfs_btree_ops xfs_cntbt_ops = {
+const struct xfs_btree_ops xfs_cntbt_ops = {
+ .name = "cnt",
+ .type = XFS_BTREE_TYPE_AG,
+ .geom_flags = XFS_BTGEO_LASTREC_UPDATE,
+
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
+ .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
+
+ .lru_refs = XFS_ALLOC_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_abtc_2),
+ .sick_mask = XFS_SICK_AG_CNTBT,
- .dup_cursor = xfs_allocbt_dup_cursor,
+ .dup_cursor = xfs_cntbt_dup_cursor,
.set_root = xfs_allocbt_set_root,
.alloc_block = xfs_allocbt_alloc_block,
.free_block = xfs_allocbt_free_block,
@@ -500,76 +540,55 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
.keys_contiguous = NULL, /* not needed right now */
};
-/* Allocate most of a new allocation btree cursor. */
-STATIC struct xfs_btree_cur *
-xfs_allocbt_init_common(
+/*
+ * Allocate a new bnobt cursor.
+ *
+ * For staging cursors tp and agbp are NULL.
+ */
+struct xfs_btree_cur *
+xfs_bnobt_init_cursor(
struct xfs_mount *mp,
struct xfs_trans *tp,
- struct xfs_perag *pag,
- xfs_btnum_t btnum)
+ struct xfs_buf *agbp,
+ struct xfs_perag *pag)
{
struct xfs_btree_cur *cur;
- ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT);
-
- cur = xfs_btree_alloc_cursor(mp, tp, btnum, mp->m_alloc_maxlevels,
- xfs_allocbt_cur_cache);
- cur->bc_ag.abt.active = false;
-
- if (btnum == XFS_BTNUM_CNT) {
- cur->bc_ops = &xfs_cntbt_ops;
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2);
- cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
- } else {
- cur->bc_ops = &xfs_bnobt_ops;
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2);
- }
-
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops,
+ mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_ag.agbp = agbp;
+ if (agbp) {
+ struct xfs_agf *agf = agbp->b_addr;
- if (xfs_has_crc(mp))
- cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
-
+ cur->bc_nlevels = be32_to_cpu(agf->agf_bno_level);
+ }
return cur;
}
/*
- * Allocate a new allocation btree cursor.
+ * Allocate a new cntbt cursor.
+ *
+ * For staging cursors tp and agbp are NULL.
*/
-struct xfs_btree_cur * /* new alloc btree cursor */
-xfs_allocbt_init_cursor(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_buf *agbp, /* buffer for agf structure */
- struct xfs_perag *pag,
- xfs_btnum_t btnum) /* btree identifier */
-{
- struct xfs_agf *agf = agbp->b_addr;
- struct xfs_btree_cur *cur;
-
- cur = xfs_allocbt_init_common(mp, tp, pag, btnum);
- if (btnum == XFS_BTNUM_CNT)
- cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
- else
- cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
-
- cur->bc_ag.agbp = agbp;
-
- return cur;
-}
-
-/* Create a free space btree cursor with a fake root for staging. */
struct xfs_btree_cur *
-xfs_allocbt_stage_cursor(
+xfs_cntbt_init_cursor(
struct xfs_mount *mp,
- struct xbtree_afakeroot *afake,
- struct xfs_perag *pag,
- xfs_btnum_t btnum)
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ struct xfs_perag *pag)
{
struct xfs_btree_cur *cur;
- cur = xfs_allocbt_init_common(mp, NULL, pag, btnum);
- xfs_btree_stage_afakeroot(cur, afake);
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops,
+ mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
+ cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_ag.agbp = agbp;
+ if (agbp) {
+ struct xfs_agf *agf = agbp->b_addr;
+
+ cur->bc_nlevels = be32_to_cpu(agf->agf_cnt_level);
+ }
return cur;
}
@@ -588,16 +607,16 @@ xfs_allocbt_commit_staged_btree(
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
- agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
- agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
- xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
-
- if (cur->bc_btnum == XFS_BTNUM_BNO) {
- xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_bnobt_ops);
+ if (xfs_btree_is_bno(cur->bc_ops)) {
+ agf->agf_bno_root = cpu_to_be32(afake->af_root);
+ agf->agf_bno_level = cpu_to_be32(afake->af_levels);
} else {
- cur->bc_flags |= XFS_BTREE_LASTREC_UPDATE;
- xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_cntbt_ops);
+ agf->agf_cnt_root = cpu_to_be32(afake->af_root);
+ agf->agf_cnt_level = cpu_to_be32(afake->af_levels);
}
+ xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
+
+ xfs_btree_commit_afakeroot(cur, tp, agbp);
}
/* Calculate number of records in an alloc btree block. */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h
index 45df893ef6bb..155b47f231ab 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.h
+++ b/fs/xfs/libxfs/xfs_alloc_btree.h
@@ -47,12 +47,12 @@ struct xbtree_afakeroot;
(maxrecs) * sizeof(xfs_alloc_key_t) + \
((index) - 1) * sizeof(xfs_alloc_ptr_t)))
-extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *mp,
+struct xfs_btree_cur *xfs_bnobt_init_cursor(struct xfs_mount *mp,
struct xfs_trans *tp, struct xfs_buf *bp,
- struct xfs_perag *pag, xfs_btnum_t btnum);
-struct xfs_btree_cur *xfs_allocbt_stage_cursor(struct xfs_mount *mp,
- struct xbtree_afakeroot *afake, struct xfs_perag *pag,
- xfs_btnum_t btnum);
+ struct xfs_perag *pag);
+struct xfs_btree_cur *xfs_cntbt_init_cursor(struct xfs_mount *mp,
+ struct xfs_trans *tp, struct xfs_buf *bp,
+ struct xfs_perag *pag);
extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int);
extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index e965a48e7db9..430cd3244c14 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -26,6 +26,7 @@
#include "xfs_trace.h"
#include "xfs_attr_item.h"
#include "xfs_xattr.h"
+#include "xfs_parent.h"
struct kmem_cache *xfs_attr_intent_cache;
@@ -87,6 +88,8 @@ xfs_attr_is_leaf(
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec imap;
+ ASSERT(!xfs_need_iread_extents(ifp));
+
if (ifp->if_nextents != 1 || ifp->if_format != XFS_DINODE_FMT_EXTENTS)
return false;
@@ -224,11 +227,21 @@ int
xfs_attr_get_ilocked(
struct xfs_da_args *args)
{
- ASSERT(xfs_isilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+ int error;
+
+ xfs_assert_ilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
if (!xfs_inode_hasattr(args->dp))
return -ENOATTR;
+ /*
+ * The incore attr fork iext tree must be loaded for xfs_attr_is_leaf
+ * to work correctly.
+ */
+ error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
if (args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL)
return xfs_attr_shortform_getvalue(args);
if (xfs_attr_is_leaf(args->dp))
@@ -264,9 +277,11 @@ xfs_attr_get(
if (xfs_is_shutdown(args->dp->i_mount))
return -EIO;
+ if (!args->owner)
+ args->owner = args->dp->i_ino;
args->geo = args->dp->i_mount->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
- args->hashval = xfs_da_hashname(args->name, args->namelen);
+ xfs_attr_sethash(args);
/* Entirely possible to look up a name which doesn't exist */
args->op_flags = XFS_DA_OP_OKNOENT;
@@ -363,7 +378,7 @@ xfs_attr_try_sf_addname(
* Commit the shortform mods, and we're done.
* NOTE: this is also the error path (EEXIST, etc).
*/
- if (!error && !(args->op_flags & XFS_DA_OP_NOTIME))
+ if (!error)
xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
if (xfs_has_wsync(dp->i_mount))
@@ -401,6 +416,50 @@ out:
return error;
}
+/* Compute the hash value for a user/root/secure extended attribute */
+xfs_dahash_t
+xfs_attr_hashname(
+ const uint8_t *name,
+ int namelen)
+{
+ return xfs_da_hashname(name, namelen);
+}
+
+/* Compute the hash value for any extended attribute from any namespace. */
+xfs_dahash_t
+xfs_attr_hashval(
+ struct xfs_mount *mp,
+ unsigned int attr_flags,
+ const uint8_t *name,
+ int namelen,
+ const void *value,
+ int valuelen)
+{
+ ASSERT(xfs_attr_check_namespace(attr_flags));
+
+ if (attr_flags & XFS_ATTR_PARENT)
+ return xfs_parent_hashattr(mp, name, namelen, value, valuelen);
+
+ return xfs_attr_hashname(name, namelen);
+}
+
+/*
+ * PPTR_REPLACE operations require the caller to set the old and new names and
+ * values explicitly. Update the canonical fields to the new name and value
+ * here now that the removal phase has finished.
+ */
+static void
+xfs_attr_update_pptr_replace_args(
+ struct xfs_da_args *args)
+{
+ ASSERT(args->new_namelen > 0);
+ args->name = args->new_name;
+ args->namelen = args->new_namelen;
+ args->value = args->new_value;
+ args->valuelen = args->new_valuelen;
+ xfs_attr_sethash(args);
+}
+
/*
* Handle the state change on completion of a multi-state attr operation.
*
@@ -418,14 +477,15 @@ xfs_attr_complete_op(
enum xfs_delattr_state replace_state)
{
struct xfs_da_args *args = attr->xattri_da_args;
- bool do_replace = args->op_flags & XFS_DA_OP_REPLACE;
+
+ if (!(args->op_flags & XFS_DA_OP_REPLACE))
+ replace_state = XFS_DAS_DONE;
+ else if (xfs_attr_intent_op(attr) == XFS_ATTRI_OP_FLAGS_PPTR_REPLACE)
+ xfs_attr_update_pptr_replace_args(args);
args->op_flags &= ~XFS_DA_OP_REPLACE;
args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
- if (do_replace)
- return replace_state;
-
- return XFS_DAS_DONE;
+ return replace_state;
}
static int
@@ -647,8 +707,8 @@ xfs_attr_leaf_remove_attr(
int forkoff;
int error;
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
- &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp);
if (error)
return error;
@@ -679,7 +739,7 @@ xfs_attr_leaf_shrink(
if (!xfs_attr_is_leaf(dp))
return 0;
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
if (error)
return error;
@@ -868,6 +928,11 @@ xfs_attr_lookup(
return -ENOATTR;
}
+ /* Prerequisite for xfs_attr_is_leaf */
+ error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
if (xfs_attr_is_leaf(dp)) {
error = xfs_attr_leaf_hasname(args, &bp);
@@ -883,73 +948,72 @@ xfs_attr_lookup(
return error;
}
-static void
-xfs_attr_defer_add(
- struct xfs_da_args *args,
- unsigned int op_flags)
+int
+xfs_attr_add_fork(
+ struct xfs_inode *ip, /* incore inode pointer */
+ int size, /* space new attribute needs */
+ int rsvd) /* xact may use reserved blks */
{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp; /* transaction pointer */
+ unsigned int blks; /* space reservation */
+ int error; /* error return value */
- struct xfs_attr_intent *new;
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
- new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL);
- new->xattri_op_flags = op_flags;
- new->xattri_da_args = args;
+ blks = XFS_ADDAFORK_SPACE_RES(mp);
- switch (op_flags) {
- case XFS_ATTRI_OP_FLAGS_SET:
- new->xattri_dela_state = xfs_attr_init_add_state(args);
- break;
- case XFS_ATTRI_OP_FLAGS_REPLACE:
- new->xattri_dela_state = xfs_attr_init_replace_state(args);
- break;
- case XFS_ATTRI_OP_FLAGS_REMOVE:
- new->xattri_dela_state = xfs_attr_init_remove_state(args);
- break;
- default:
- ASSERT(0);
- }
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
+ rsvd, &tp);
+ if (error)
+ return error;
+
+ if (xfs_inode_has_attr_fork(ip))
+ goto trans_cancel;
+
+ error = xfs_bmap_add_attrfork(tp, ip, size, rsvd);
+ if (error)
+ goto trans_cancel;
+
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
- xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type);
- trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp);
+trans_cancel:
+ xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
}
/*
- * Note: If args->value is NULL the attribute will be removed, just like the
- * Linux ->setattr API.
+ * Make a change to the xattr structure.
+ *
+ * The caller must have initialized @args, attached dquots, and must not hold
+ * any ILOCKs. Reserved data blocks may be used if @rsvd is set.
+ *
+ * Returns -EEXIST for XFS_ATTRUPDATE_CREATE if the name already exists.
+ * Returns -ENOATTR for XFS_ATTRUPDATE_REMOVE if the name does not exist.
+ * Returns 0 on success, or a negative errno if something else went wrong.
*/
int
xfs_attr_set(
- struct xfs_da_args *args)
+ struct xfs_da_args *args,
+ enum xfs_attr_update op,
+ bool rsvd)
{
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
struct xfs_trans_res tres;
- bool rsvd = (args->attr_filter & XFS_ATTR_ROOT);
int error, local;
int rmt_blks = 0;
unsigned int total;
- if (xfs_is_shutdown(dp->i_mount))
- return -EIO;
-
- error = xfs_qm_dqattach(dp);
- if (error)
- return error;
-
- args->geo = mp->m_attr_geo;
- args->whichfork = XFS_ATTR_FORK;
- args->hashval = xfs_da_hashname(args->name, args->namelen);
+ ASSERT(!args->trans);
- /*
- * We have no control over the attribute names that userspace passes us
- * to remove, so we have to allow the name lookup prior to attribute
- * removal to fail as well. Preserve the logged flag, since we need
- * to pass that through to the logging code.
- */
- args->op_flags = XFS_DA_OP_OKNOENT |
- (args->op_flags & XFS_DA_OP_LOGGED);
-
- if (args->value) {
+ switch (op) {
+ case XFS_ATTRUPDATE_UPSERT:
+ case XFS_ATTRUPDATE_CREATE:
+ case XFS_ATTRUPDATE_REPLACE:
XFS_STATS_INC(mp, xs_attr_set);
args->total = xfs_attr_calc_size(args, &local);
@@ -962,16 +1026,18 @@ xfs_attr_set(
xfs_attr_sf_entsize_byname(args->namelen,
args->valuelen);
- error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
+ error = xfs_attr_add_fork(dp, sf_size, rsvd);
if (error)
return error;
}
if (!local)
rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen);
- } else {
+ break;
+ case XFS_ATTRUPDATE_REMOVE:
XFS_STATS_INC(mp, xs_attr_remove);
- rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+ rmt_blks = xfs_attr3_max_rmt_blocks(mp);
+ break;
}
/*
@@ -983,12 +1049,9 @@ xfs_attr_set(
if (error)
return error;
- if (args->value || xfs_inode_hasattr(dp)) {
- error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK,
+ if (op != XFS_ATTRUPDATE_REMOVE || xfs_inode_hasattr(dp)) {
+ error = xfs_iext_count_extend(args->trans, dp, XFS_ATTR_FORK,
XFS_IEXT_ATTR_MANIP_CNT(rmt_blks));
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(args->trans, dp,
- XFS_IEXT_ATTR_MANIP_CNT(rmt_blks));
if (error)
goto out_trans_cancel;
}
@@ -996,26 +1059,26 @@ xfs_attr_set(
error = xfs_attr_lookup(args);
switch (error) {
case -EEXIST:
- if (!args->value) {
+ if (op == XFS_ATTRUPDATE_REMOVE) {
/* if no value, we are performing a remove operation */
- xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE);
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE);
break;
}
/* Pure create fails if the attr already exists */
- if (args->attr_flags & XATTR_CREATE)
+ if (op == XFS_ATTRUPDATE_CREATE)
goto out_trans_cancel;
- xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE);
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE);
break;
case -ENOATTR:
/* Can't remove what isn't there. */
- if (!args->value)
+ if (op == XFS_ATTRUPDATE_REMOVE)
goto out_trans_cancel;
/* Pure replace fails if no existing attr to replace. */
- if (args->attr_flags & XATTR_REPLACE)
+ if (op == XFS_ATTRUPDATE_REPLACE)
goto out_trans_cancel;
- xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET);
+ xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET);
break;
default:
goto out_trans_cancel;
@@ -1028,8 +1091,7 @@ xfs_attr_set(
if (xfs_has_wsync(mp))
xfs_trans_set_sync(args->trans);
- if (!(args->op_flags & XFS_DA_OP_NOTIME))
- xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
+ xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
/*
* Commit the last in the sequence of transactions.
@@ -1038,6 +1100,7 @@ xfs_attr_set(
error = xfs_trans_commit(args->trans);
out_unlock:
xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ args->trans = NULL;
return error;
out_trans_cancel:
@@ -1050,7 +1113,7 @@ out_trans_cancel:
* External routines when attribute list is inside the inode
*========================================================================*/
-static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
+int xfs_attr_sf_totsize(struct xfs_inode *dp)
{
struct xfs_attr_sf_hdr *sf = dp->i_af.if_data;
@@ -1153,7 +1216,7 @@ xfs_attr_leaf_try_add(
struct xfs_buf *bp;
int error;
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
if (error)
return error;
@@ -1201,7 +1264,7 @@ xfs_attr_leaf_hasname(
{
int error = 0;
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, bp);
if (error)
return error;
@@ -1510,12 +1573,23 @@ out_release:
return error;
}
+/* Enforce that there is at most one namespace bit per attr. */
+inline bool xfs_attr_check_namespace(unsigned int attr_flags)
+{
+ return hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) < 2;
+}
+
/* Returns true if the attribute entry name is valid. */
bool
xfs_attr_namecheck(
+ unsigned int attr_flags,
const void *name,
size_t length)
{
+ /* Only one namespace bit allowed. */
+ if (!xfs_attr_check_namespace(attr_flags))
+ return false;
+
/*
* MAXNAMELEN includes the trailing null, but (name/length) leave it
* out, so use >= for the length check.
@@ -1523,6 +1597,10 @@ xfs_attr_namecheck(
if (length >= MAXNAMELEN)
return false;
+ /* Parent pointers have their own validation. */
+ if (attr_flags & XFS_ATTR_PARENT)
+ return xfs_parent_namecheck(attr_flags, name, length);
+
/* There shouldn't be any nulls here */
return !memchr(name, 0, length);
}
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 81be9b3e4004..088cb7b30168 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -47,8 +47,9 @@ struct xfs_attrlist_cursor_kern {
/* void; state communicated via *context */
-typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
- unsigned char *, int, int);
+typedef void (*put_listent_func_t)(struct xfs_attr_list_context *context,
+ int flags, unsigned char *name, int namelen, void *value,
+ int valuelen);
struct xfs_attr_list_context {
struct xfs_trans *tp;
@@ -510,8 +511,8 @@ struct xfs_attr_intent {
struct xfs_da_args *xattri_da_args;
/*
- * Shared buffer containing the attr name and value so that the logging
- * code can share large memory buffers between log items.
+ * Shared buffer containing the attr name, new name, and value so that
+ * the logging code can share large memory buffers between log items.
*/
struct xfs_attri_log_nameval *xattri_nameval;
@@ -529,6 +530,11 @@ struct xfs_attr_intent {
struct xfs_bmbt_irec xattri_map;
};
+static inline unsigned int
+xfs_attr_intent_op(const struct xfs_attr_intent *attr)
+{
+ return attr->xattri_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK;
+}
/*========================================================================
* Function prototypes for the kernel.
@@ -544,10 +550,20 @@ int xfs_inode_hasattr(struct xfs_inode *ip);
bool xfs_attr_is_leaf(struct xfs_inode *ip);
int xfs_attr_get_ilocked(struct xfs_da_args *args);
int xfs_attr_get(struct xfs_da_args *args);
-int xfs_attr_set(struct xfs_da_args *args);
+
+enum xfs_attr_update {
+ XFS_ATTRUPDATE_REMOVE, /* remove attr */
+ XFS_ATTRUPDATE_UPSERT, /* set value, replace any existing attr */
+ XFS_ATTRUPDATE_CREATE, /* set value, fail if attr already exists */
+ XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */
+};
+
+int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op, bool rsvd);
int xfs_attr_set_iter(struct xfs_attr_intent *attr);
int xfs_attr_remove_iter(struct xfs_attr_intent *attr);
-bool xfs_attr_namecheck(const void *name, size_t length);
+bool xfs_attr_check_namespace(unsigned int attr_flags);
+bool xfs_attr_namecheck(unsigned int attr_flags, const void *name,
+ size_t length);
int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
unsigned int *total);
@@ -590,7 +606,6 @@ xfs_attr_init_add_state(struct xfs_da_args *args)
static inline enum xfs_delattr_state
xfs_attr_init_remove_state(struct xfs_da_args *args)
{
- args->op_flags |= XFS_DA_OP_REMOVE;
if (xfs_attr_is_shortform(args->dp))
return XFS_DAS_SF_REMOVE;
if (xfs_attr_is_leaf(args->dp))
@@ -614,8 +629,25 @@ xfs_attr_init_replace_state(struct xfs_da_args *args)
return xfs_attr_init_add_state(args);
}
+xfs_dahash_t xfs_attr_hashname(const uint8_t *name, int namelen);
+
+xfs_dahash_t xfs_attr_hashval(struct xfs_mount *mp, unsigned int attr_flags,
+ const uint8_t *name, int namelen, const void *value,
+ int valuelen);
+
+/* Set the hash value for any extended attribute from any namespace. */
+static inline void xfs_attr_sethash(struct xfs_da_args *args)
+{
+ args->hashval = xfs_attr_hashval(args->dp->i_mount, args->attr_filter,
+ args->name, args->namelen,
+ args->value, args->valuelen);
+}
+
extern struct kmem_cache *xfs_attr_intent_cache;
int __init xfs_attr_intent_init_cache(void);
void xfs_attr_intent_destroy_cache(void);
+int xfs_attr_sf_totsize(struct xfs_inode *dp);
+int xfs_attr_add_fork(struct xfs_inode *ip, int size, int rsvd);
+
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 6374bf107242..b9e98950eb3d 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -29,6 +29,7 @@
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_errortag.h"
+#include "xfs_health.h"
/*
@@ -387,6 +388,27 @@ xfs_attr3_leaf_verify(
return NULL;
}
+xfs_failaddr_t
+xfs_attr3_leaf_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+
+ if (xfs_has_crc(mp)) {
+ struct xfs_attr3_leafblock *hdr3 = bp->b_addr;
+
+ if (hdr3->hdr.info.hdr.magic !=
+ cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->hdr.info.owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
static void
xfs_attr3_leaf_write_verify(
struct xfs_buf *bp)
@@ -447,16 +469,30 @@ int
xfs_attr3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t bno,
struct xfs_buf **bpp)
{
+ xfs_failaddr_t fa;
int err;
err = xfs_da_read_buf(tp, dp, bno, 0, bpp, XFS_ATTR_FORK,
&xfs_attr3_leaf_buf_ops);
- if (!err && tp && *bpp)
+ if (err || !(*bpp))
+ return err;
+
+ fa = xfs_attr3_leaf_header_check(*bpp, owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(*bpp, fa);
+ xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
+ return -EFSCORRUPTED;
+ }
+
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
- return err;
+ return 0;
}
/*========================================================================
@@ -471,28 +507,57 @@ xfs_attr3_leaf_read(
* INCOMPLETE flag will not be set in attr->attr_filter, but rather
* XFS_DA_OP_RECOVERY will be set in args->op_flags.
*/
+static inline unsigned int xfs_attr_match_mask(const struct xfs_da_args *args)
+{
+ if (args->op_flags & XFS_DA_OP_RECOVERY)
+ return XFS_ATTR_NSP_ONDISK_MASK;
+ return XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE;
+}
+
+static inline bool
+xfs_attr_parent_match(
+ const struct xfs_da_args *args,
+ const void *value,
+ unsigned int valuelen)
+{
+ ASSERT(args->value != NULL);
+
+ /* Parent pointers do not use remote values */
+ if (!value)
+ return false;
+
+ /*
+ * The only value we support is a parent rec. However, we'll accept
+ * any valuelen so that offline repair can delete ATTR_PARENT values
+ * that are not parent pointers.
+ */
+ if (valuelen != args->valuelen)
+ return false;
+
+ return memcmp(args->value, value, valuelen) == 0;
+}
+
static bool
xfs_attr_match(
struct xfs_da_args *args,
- uint8_t namelen,
- unsigned char *name,
- int flags)
+ unsigned int attr_flags,
+ const unsigned char *name,
+ unsigned int namelen,
+ const void *value,
+ unsigned int valuelen)
{
+ unsigned int mask = xfs_attr_match_mask(args);
if (args->namelen != namelen)
return false;
+ if ((args->attr_filter & mask) != (attr_flags & mask))
+ return false;
if (memcmp(args->name, name, namelen) != 0)
return false;
- /* Recovery ignores the INCOMPLETE flag. */
- if ((args->op_flags & XFS_DA_OP_RECOVERY) &&
- args->attr_filter == (flags & XFS_ATTR_NSP_ONDISK_MASK))
- return true;
+ if (attr_flags & XFS_ATTR_PARENT)
+ return xfs_attr_parent_match(args, value, valuelen);
- /* All remaining matches need to be filtered by INCOMPLETE state. */
- if (args->attr_filter !=
- (flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE)))
- return false;
return true;
}
@@ -503,6 +568,13 @@ xfs_attr_copy_value(
int valuelen)
{
/*
+ * Parent pointer lookups require the caller to specify the name and
+ * value, so don't copy anything.
+ */
+ if (args->attr_filter & XFS_ATTR_PARENT)
+ return 0;
+
+ /*
* No copy if all we have to do is get the length
*/
if (!args->valuelen) {
@@ -710,8 +782,9 @@ xfs_attr_sf_findname(
for (sfe = xfs_attr_sf_firstentry(sf);
sfe < xfs_attr_sf_endptr(sf);
sfe = xfs_attr_sf_nextentry(sfe)) {
- if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
- sfe->flags))
+ if (xfs_attr_match(args, sfe->flags, sfe->nameval,
+ sfe->namelen, &sfe->nameval[sfe->namelen],
+ sfe->valuelen))
return sfe;
}
@@ -818,7 +891,8 @@ xfs_attr_sf_removename(
*/
if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) &&
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
- !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE))) {
+ !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE)) &&
+ !xfs_has_parent(mp)) {
xfs_attr_fork_remove(dp, args->trans);
} else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
@@ -827,7 +901,8 @@ xfs_attr_sf_removename(
ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) ||
(args->op_flags & XFS_DA_OP_ADDNAME) ||
!xfs_has_attr2(mp) ||
- dp->i_df.if_format == XFS_DINODE_FMT_BTREE);
+ dp->i_df.if_format == XFS_DINODE_FMT_BTREE ||
+ xfs_has_parent(mp));
xfs_trans_log_inode(args->trans, dp,
XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
@@ -879,8 +954,7 @@ xfs_attr_shortform_to_leaf(
trace_xfs_attr_sf_to_leaf(args);
- tmpbuffer = kmem_alloc(size, 0);
- ASSERT(tmpbuffer != NULL);
+ tmpbuffer = kmalloc(size, GFP_KERNEL | __GFP_NOFAIL);
memcpy(tmpbuffer, ifp->if_data, size);
sf = (struct xfs_attr_sf_hdr *)tmpbuffer;
@@ -904,6 +978,7 @@ xfs_attr_shortform_to_leaf(
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
nargs.op_flags = XFS_DA_OP_OKNOENT;
+ nargs.owner = args->owner;
sfe = xfs_attr_sf_firstentry(sf);
for (i = 0; i < sf->count; i++) {
@@ -911,9 +986,13 @@ xfs_attr_shortform_to_leaf(
nargs.namelen = sfe->namelen;
nargs.value = &sfe->nameval[nargs.namelen];
nargs.valuelen = sfe->valuelen;
- nargs.hashval = xfs_da_hashname(sfe->nameval,
- sfe->namelen);
nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK;
+ if (!xfs_attr_check_namespace(sfe->flags)) {
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+ xfs_attr_sethash(&nargs);
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
ASSERT(error == -ENOATTR);
error = xfs_attr3_leaf_add(bp, &nargs);
@@ -924,7 +1003,7 @@ xfs_attr_shortform_to_leaf(
}
error = 0;
out:
- kmem_free(tmpbuffer);
+ kfree(tmpbuffer);
return error;
}
@@ -1027,7 +1106,7 @@ xfs_attr_shortform_verify(
* one namespace flag per xattr, so we can just count the
* bits (i.e. hweight) here.
*/
- if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
+ if (!xfs_attr_check_namespace(sfep->flags))
return __this_address;
sfep = next_sfep;
@@ -1059,7 +1138,7 @@ xfs_attr3_leaf_to_shortform(
trace_xfs_attr_leaf_to_sf(args);
- tmpbuffer = kmem_alloc(args->geo->blksize, 0);
+ tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
if (!tmpbuffer)
return -ENOMEM;
@@ -1106,6 +1185,7 @@ xfs_attr3_leaf_to_shortform(
nargs.whichfork = XFS_ATTR_FORK;
nargs.trans = args->trans;
nargs.op_flags = XFS_DA_OP_OKNOENT;
+ nargs.owner = args->owner;
for (i = 0; i < ichdr.count; entry++, i++) {
if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -1125,7 +1205,7 @@ xfs_attr3_leaf_to_shortform(
error = 0;
out:
- kmem_free(tmpbuffer);
+ kfree(tmpbuffer);
return error;
}
@@ -1158,7 +1238,7 @@ xfs_attr3_leaf_to_node(
error = xfs_da_grow_inode(args, &blkno);
if (error)
goto out;
- error = xfs_attr3_leaf_read(args->trans, dp, 0, &bp1);
+ error = xfs_attr3_leaf_read(args->trans, dp, args->owner, 0, &bp1);
if (error)
goto out;
@@ -1237,7 +1317,7 @@ xfs_attr3_leaf_create(
ichdr.magic = XFS_ATTR3_LEAF_MAGIC;
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->owner = cpu_to_be64(dp->i_ino);
+ hdr3->owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr);
@@ -1533,7 +1613,7 @@ xfs_attr3_leaf_compact(
trace_xfs_attr_leaf_compact(args);
- tmpbuffer = kmem_alloc(args->geo->blksize, 0);
+ tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
memset(bp->b_addr, 0, args->geo->blksize);
leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1571,7 +1651,7 @@ xfs_attr3_leaf_compact(
*/
xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
- kmem_free(tmpbuffer);
+ kfree(tmpbuffer);
}
/*
@@ -1993,7 +2073,7 @@ xfs_attr3_leaf_toosmall(
if (blkno == 0)
continue;
error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
- blkno, &bp);
+ state->args->owner, blkno, &bp);
if (error)
return error;
@@ -2250,7 +2330,8 @@ xfs_attr3_leaf_unbalance(
struct xfs_attr_leafblock *tmp_leaf;
struct xfs_attr3_icleaf_hdr tmphdr;
- tmp_leaf = kmem_zalloc(state->args->geo->blksize, 0);
+ tmp_leaf = kzalloc(state->args->geo->blksize,
+ GFP_KERNEL | __GFP_NOFAIL);
/*
* Copy the header into the temp leaf so that all the stuff
@@ -2290,7 +2371,7 @@ xfs_attr3_leaf_unbalance(
}
memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
savehdr = tmphdr; /* struct copy */
- kmem_free(tmp_leaf);
+ kfree(tmp_leaf);
}
xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
@@ -2343,6 +2424,7 @@ xfs_attr3_leaf_lookup_int(
entries = xfs_attr3_leaf_entryp(leaf);
if (ichdr.count >= args->geo->blksize / 8) {
xfs_buf_mark_corrupt(bp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -2362,10 +2444,12 @@ xfs_attr3_leaf_lookup_int(
}
if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) {
xfs_buf_mark_corrupt(bp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) {
xfs_buf_mark_corrupt(bp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -2397,18 +2481,23 @@ xfs_attr3_leaf_lookup_int(
*/
if (entry->flags & XFS_ATTR_LOCAL) {
name_loc = xfs_attr3_leaf_name_local(leaf, probe);
- if (!xfs_attr_match(args, name_loc->namelen,
- name_loc->nameval, entry->flags))
+ if (!xfs_attr_match(args, entry->flags,
+ name_loc->nameval, name_loc->namelen,
+ &name_loc->nameval[name_loc->namelen],
+ be16_to_cpu(name_loc->valuelen)))
continue;
args->index = probe;
return -EEXIST;
} else {
+ unsigned int valuelen;
+
name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
- if (!xfs_attr_match(args, name_rmt->namelen,
- name_rmt->name, entry->flags))
+ valuelen = be32_to_cpu(name_rmt->valuelen);
+ if (!xfs_attr_match(args, entry->flags, name_rmt->name,
+ name_rmt->namelen, NULL, valuelen))
continue;
args->index = probe;
- args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
+ args->rmtvaluelen = valuelen;
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
args->rmtblkcnt = xfs_attr3_rmt_blocks(
args->dp->i_mount,
@@ -2711,7 +2800,8 @@ xfs_attr3_leaf_clearflag(
/*
* Set up the operation.
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp);
if (error)
return error;
@@ -2775,7 +2865,8 @@ xfs_attr3_leaf_setflag(
/*
* Set up the operation.
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp);
if (error)
return error;
@@ -2834,7 +2925,8 @@ xfs_attr3_leaf_flipflags(
/*
* Read the block containing the "old" attr
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp1);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno, &bp1);
if (error)
return error;
@@ -2842,8 +2934,8 @@ xfs_attr3_leaf_flipflags(
* Read the block containing the "new" attr, if it is different
*/
if (args->blkno2 != args->blkno) {
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2,
- &bp2);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner,
+ args->blkno2, &bp2);
if (error)
return error;
} else {
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 9b9948639c0f..bac219589896 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -98,12 +98,14 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t bno, struct xfs_buf **bpp);
void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
struct xfs_attr3_icleaf_hdr *to,
struct xfs_attr_leafblock *from);
void xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo,
struct xfs_attr_leafblock *to,
struct xfs_attr3_icleaf_hdr *from);
+xfs_failaddr_t xfs_attr3_leaf_header_check(struct xfs_buf *bp,
+ xfs_ino_t owner);
#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d440393b40eb..4c44ce1c8a64 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -22,6 +22,7 @@
#include "xfs_attr_remote.h"
#include "xfs_trace.h"
#include "xfs_error.h"
+#include "xfs_health.h"
#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
@@ -42,19 +43,32 @@
* the logging system and therefore never have a log item.
*/
-/*
- * Each contiguous block has a header, so it is not just a simple attribute
- * length to FSB conversion.
- */
-int
+/* How many bytes can be stored in a remote value buffer? */
+inline unsigned int
+xfs_attr3_rmt_buf_space(
+ struct xfs_mount *mp)
+{
+ unsigned int blocksize = mp->m_attr_geo->blksize;
+
+ if (xfs_has_crc(mp))
+ return blocksize - sizeof(struct xfs_attr3_rmt_hdr);
+
+ return blocksize;
+}
+
+/* Compute number of fsblocks needed to store a remote attr value */
+unsigned int
xfs_attr3_rmt_blocks(
- struct xfs_mount *mp,
- int attrlen)
+ struct xfs_mount *mp,
+ unsigned int attrlen)
{
- if (xfs_has_crc(mp)) {
- int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
- return (attrlen + buflen - 1) / buflen;
- }
+ /*
+ * Each contiguous block has a header, so it is not just a simple
+ * attribute length to FSB conversion.
+ */
+ if (xfs_has_crc(mp))
+ return howmany(attrlen, xfs_attr3_rmt_buf_space(mp));
+
return XFS_B_TO_FSB(mp, attrlen);
}
@@ -91,7 +105,6 @@ xfs_attr3_rmt_verify(
struct xfs_mount *mp,
struct xfs_buf *bp,
void *ptr,
- int fsbsize,
xfs_daddr_t bno)
{
struct xfs_attr3_rmt_hdr *rmt = ptr;
@@ -102,7 +115,7 @@ xfs_attr3_rmt_verify(
return __this_address;
if (be64_to_cpu(rmt->rm_blkno) != bno)
return __this_address;
- if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
+ if (be32_to_cpu(rmt->rm_bytes) > mp->m_attr_geo->blksize - sizeof(*rmt))
return __this_address;
if (be32_to_cpu(rmt->rm_offset) +
be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
@@ -121,9 +134,9 @@ __xfs_attr3_rmt_read_verify(
{
struct xfs_mount *mp = bp->b_mount;
char *ptr;
- int len;
+ unsigned int len;
xfs_daddr_t bno;
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_has_crc(mp))
@@ -140,7 +153,7 @@ __xfs_attr3_rmt_read_verify(
*failaddr = __this_address;
return -EFSBADCRC;
}
- *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
+ *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, bno);
if (*failaddr)
return -EFSCORRUPTED;
len -= blksize;
@@ -185,7 +198,7 @@ xfs_attr3_rmt_write_verify(
{
struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int blksize = mp->m_attr_geo->blksize;
char *ptr;
int len;
xfs_daddr_t bno;
@@ -202,7 +215,7 @@ xfs_attr3_rmt_write_verify(
while (len > 0) {
struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;
- fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno);
+ fa = xfs_attr3_rmt_verify(mp, bp, ptr, bno);
if (fa) {
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
@@ -276,32 +289,34 @@ xfs_attr3_rmt_hdr_set(
*/
STATIC int
xfs_attr_rmtval_copyout(
- struct xfs_mount *mp,
- struct xfs_buf *bp,
- xfs_ino_t ino,
- int *offset,
- int *valuelen,
- uint8_t **dst)
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ struct xfs_inode *dp,
+ xfs_ino_t owner,
+ unsigned int *offset,
+ unsigned int *valuelen,
+ uint8_t **dst)
{
- char *src = bp->b_addr;
- xfs_daddr_t bno = xfs_buf_daddr(bp);
- int len = BBTOB(bp->b_length);
- int blksize = mp->m_attr_geo->blksize;
+ char *src = bp->b_addr;
+ xfs_daddr_t bno = xfs_buf_daddr(bp);
+ unsigned int len = BBTOB(bp->b_length);
+ unsigned int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
- int hdr_size = 0;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int hdr_size = 0;
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
byte_cnt = min(*valuelen, byte_cnt);
if (xfs_has_crc(mp)) {
- if (xfs_attr3_rmt_hdr_ok(src, ino, *offset,
+ if (xfs_attr3_rmt_hdr_ok(src, owner, *offset,
byte_cnt, bno)) {
xfs_alert(mp,
"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
- bno, *offset, byte_cnt, ino);
+ bno, *offset, byte_cnt, owner);
+ xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
return -EFSCORRUPTED;
}
hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
@@ -327,20 +342,20 @@ xfs_attr_rmtval_copyin(
struct xfs_mount *mp,
struct xfs_buf *bp,
xfs_ino_t ino,
- int *offset,
- int *valuelen,
+ unsigned int *offset,
+ unsigned int *valuelen,
uint8_t **src)
{
char *dst = bp->b_addr;
xfs_daddr_t bno = xfs_buf_daddr(bp);
- int len = BBTOB(bp->b_length);
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int len = BBTOB(bp->b_length);
+ unsigned int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
- int hdr_size;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int hdr_size;
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
byte_cnt = min(*valuelen, byte_cnt);
hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
@@ -386,12 +401,12 @@ xfs_attr_rmtval_get(
struct xfs_buf *bp;
xfs_dablk_t lblkno = args->rmtblkno;
uint8_t *dst = args->value;
- int valuelen;
+ unsigned int valuelen;
int nmap;
int error;
- int blkcnt = args->rmtblkcnt;
+ unsigned int blkcnt = args->rmtblkcnt;
int i;
- int offset = 0;
+ unsigned int offset = 0;
trace_xfs_attr_rmtval_get(args);
@@ -418,12 +433,13 @@ xfs_attr_rmtval_get(
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt,
0, &bp, &xfs_attr3_rmt_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK);
if (error)
return error;
- error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
- &offset, &valuelen,
- &dst);
+ error = xfs_attr_rmtval_copyout(mp, bp, args->dp,
+ args->owner, &offset, &valuelen, &dst);
xfs_buf_relse(bp);
if (error)
return error;
@@ -448,7 +464,7 @@ xfs_attr_rmt_find_hole(
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
int error;
- int blkcnt;
+ unsigned int blkcnt;
xfs_fileoff_t lfileoff = 0;
/*
@@ -477,11 +493,11 @@ xfs_attr_rmtval_set_value(
struct xfs_bmbt_irec map;
xfs_dablk_t lblkno;
uint8_t *src = args->value;
- int blkcnt;
- int valuelen;
+ unsigned int blkcnt;
+ unsigned int valuelen;
int nmap;
int error;
- int offset = 0;
+ unsigned int offset = 0;
/*
* Roll through the "value", copying the attribute value to the
@@ -517,8 +533,8 @@ xfs_attr_rmtval_set_value(
return error;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
- xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
- &valuelen, &src);
+ xfs_attr_rmtval_copyin(mp, bp, args->owner, &offset, &valuelen,
+ &src);
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
xfs_buf_relse(bp);
@@ -545,11 +561,13 @@ xfs_attr_rmtval_stale(
struct xfs_buf *bp;
int error;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
if (XFS_IS_CORRUPT(mp, map->br_startblock == DELAYSTARTBLOCK) ||
- XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK))
+ XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK)) {
+ xfs_bmap_mark_sick(ip, XFS_ATTR_FORK);
return -EFSCORRUPTED;
+ }
error = xfs_buf_incore(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map->br_startblock),
@@ -619,7 +637,6 @@ xfs_attr_rmtval_set_blk(
if (error)
return error;
- ASSERT(nmap == 1);
ASSERT((map->br_startblock != DELAYSTARTBLOCK) &&
(map->br_startblock != HOLESTARTBLOCK));
@@ -639,7 +656,7 @@ xfs_attr_rmtval_invalidate(
struct xfs_da_args *args)
{
xfs_dablk_t lblkno;
- int blkcnt;
+ unsigned int blkcnt;
int error;
/*
@@ -659,8 +676,10 @@ xfs_attr_rmtval_invalidate(
blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
if (error)
return error;
- if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1))
+ if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1)) {
+ xfs_bmap_mark_sick(args->dp, XFS_ATTR_FORK);
return -EFSCORRUPTED;
+ }
error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index d097ec6c4dc3..e3c6c7d774bf 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -6,7 +6,13 @@
#ifndef __XFS_ATTR_REMOTE_H__
#define __XFS_ATTR_REMOTE_H__
-int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
+unsigned int xfs_attr3_rmt_blocks(struct xfs_mount *mp, unsigned int attrlen);
+
+/* Number of rmt blocks needed to store the maximally sized attr value */
+static inline unsigned int xfs_attr3_max_rmt_blocks(struct xfs_mount *mp)
+{
+ return xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+}
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index bc4422223024..73bdc0e55682 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -16,6 +16,7 @@ typedef struct xfs_attr_sf_sort {
uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
xfs_dahash_t hash; /* this entry's hash value */
unsigned char *name; /* name value, pointer into buffer */
+ void *value;
} xfs_attr_sf_sort_t;
#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f362345467fa..3b3206d312d6 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -36,6 +36,9 @@
#include "xfs_refcount.h"
#include "xfs_icache.h"
#include "xfs_iomap.h"
+#include "xfs_health.h"
+#include "xfs_bmap_item.h"
+#include "xfs_symlink_remote.h"
struct kmem_cache *xfs_bmap_intent_cache;
@@ -225,6 +228,28 @@ xfs_bmap_forkoff_reset(
}
}
+static int
+xfs_bmap_read_buf(
+ struct xfs_mount *mp, /* file system mount point */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_fsblock_t fsbno, /* file system block number */
+ struct xfs_buf **bpp) /* buffer for fsbno */
+{
+ struct xfs_buf *bp; /* return value */
+ int error;
+
+ if (!xfs_verify_fsbno(mp, fsbno))
+ return -EFSCORRUPTED;
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp,
+ &xfs_bmbt_buf_ops);
+ if (!error) {
+ xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
+ *bpp = bp;
+ }
+ return error;
+}
+
#ifdef DEBUG
STATIC struct xfs_buf *
xfs_bmap_get_bp(
@@ -364,9 +389,9 @@ xfs_bmap_check_leaf_extents(
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
- error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
- XFS_BMAP_BTREE_REF,
- &xfs_bmbt_buf_ops);
+ error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
+ if (xfs_metadata_is_sick(error))
+ xfs_btree_mark_sick(cur);
if (error)
goto error_norelse;
}
@@ -383,6 +408,7 @@ xfs_bmap_check_leaf_extents(
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -450,9 +476,9 @@ xfs_bmap_check_leaf_extents(
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
- error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
- XFS_BMAP_BTREE_REF,
- &xfs_bmbt_buf_ops);
+ error = xfs_bmap_read_buf(mp, NULL, bno, &bp);
+ if (xfs_metadata_is_sick(error))
+ xfs_btree_mark_sick(cur);
if (error)
goto error_norelse;
}
@@ -562,11 +588,14 @@ xfs_bmap_btree_to_extents(
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
cbno = be64_to_cpu(*pp);
#ifdef DEBUG
- if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
+ if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
#endif
- error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
- &xfs_bmbt_buf_ops);
+ error = xfs_bmap_read_buf(mp, tp, cbno, &cbp);
+ if (xfs_metadata_is_sick(error))
+ xfs_btree_mark_sick(cur);
if (error)
return error;
cblock = XFS_BUF_TO_BLOCK(cbp);
@@ -634,14 +663,13 @@ xfs_bmap_extents_to_btree(
* Fill in the root.
*/
block = ifp->if_broot;
- xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
- XFS_BTNUM_BMAP, 1, 1, ip->i_ino,
- XFS_BTREE_LONG_PTRS);
+ xfs_bmbt_init_block(ip, block, NULL, 1, 1);
/*
* Need a cursor. Can't allocate until bb_level is filled in.
*/
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
- cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
+ if (wasdel)
+ cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
/*
* Convert to a btree with two levels, one record in root.
*/
@@ -667,7 +695,7 @@ xfs_bmap_extents_to_btree(
goto out_root_realloc;
}
- cur->bc_ino.allocated++;
+ cur->bc_bmap.allocated++;
ip->i_nblocks++;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
@@ -679,11 +707,8 @@ xfs_bmap_extents_to_btree(
/*
* Fill in the child block.
*/
- abp->b_ops = &xfs_bmbt_buf_ops;
ablock = XFS_BUF_TO_BLOCK(abp);
- xfs_btree_init_block_int(mp, ablock, xfs_buf_daddr(abp),
- XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
- XFS_BTREE_LONG_PTRS);
+ xfs_bmbt_init_block(ip, ablock, abp, 0, 0);
for_each_xfs_iext(ifp, &icur, &rec) {
if (isnullstartblock(rec.br_startblock))
@@ -754,7 +779,7 @@ xfs_bmap_local_to_extents_empty(
}
-STATIC int /* error */
+int /* error */
xfs_bmap_local_to_extents(
xfs_trans_t *tp, /* transaction pointer */
xfs_inode_t *ip, /* incore inode pointer */
@@ -764,7 +789,8 @@ xfs_bmap_local_to_extents(
void (*init_fn)(struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
- struct xfs_ifork *ifp))
+ struct xfs_ifork *ifp, void *priv),
+ void *priv)
{
int error = 0;
int flags; /* logging flags returned */
@@ -825,7 +851,7 @@ xfs_bmap_local_to_extents(
* log here. Note that init_fn must also set the buffer log item type
* correctly.
*/
- init_fn(tp, bp, ip, ifp);
+ init_fn(tp, bp, ip, ifp, priv);
/* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -878,6 +904,7 @@ xfs_bmap_add_attrfork_btree(
goto error0;
/* must be at least one entry */
if (XFS_IS_CORRUPT(mp, stat != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -887,7 +914,7 @@ xfs_bmap_add_attrfork_btree(
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
return -ENOSPC;
}
- cur->bc_ino.allocated = 0;
+ cur->bc_bmap.allocated = 0;
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
}
return 0;
@@ -915,7 +942,7 @@ xfs_bmap_add_attrfork_extents(
error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags,
XFS_DATA_FORK);
if (cur) {
- cur->bc_ino.allocated = 0;
+ cur->bc_bmap.allocated = 0;
xfs_btree_del_cursor(cur, error);
}
return error;
@@ -950,16 +977,18 @@ xfs_bmap_add_attrfork_local(
dargs.total = dargs.geo->fsbcount;
dargs.whichfork = XFS_DATA_FORK;
dargs.trans = tp;
+ dargs.owner = ip->i_ino;
return xfs_dir2_sf_to_block(&dargs);
}
if (S_ISLNK(VFS_I(ip)->i_mode))
return xfs_bmap_local_to_extents(tp, ip, 1, flags,
- XFS_DATA_FORK,
- xfs_symlink_local_to_remote);
+ XFS_DATA_FORK, xfs_symlink_local_to_remote,
+ NULL);
/* should only be called for types that support local format data */
ASSERT(0);
+ xfs_bmap_mark_sick(ip, XFS_ATTR_FORK);
return -EFSCORRUPTED;
}
@@ -996,40 +1025,29 @@ xfs_bmap_set_attrforkoff(
}
/*
- * Convert inode from non-attributed to attributed.
- * Must not be in a transaction, ip must not be locked.
+ * Convert inode from non-attributed to attributed. Caller must hold the
+ * ILOCK_EXCL and the file cannot have an attr fork.
*/
int /* error code */
xfs_bmap_add_attrfork(
- xfs_inode_t *ip, /* incore inode pointer */
+ struct xfs_trans *tp,
+ struct xfs_inode *ip, /* incore inode pointer */
int size, /* space new attribute needs */
int rsvd) /* xact may use reserved blks */
{
- xfs_mount_t *mp; /* mount structure */
- xfs_trans_t *tp; /* transaction pointer */
- int blks; /* space reservation */
+ struct xfs_mount *mp = tp->t_mountp;
int version = 1; /* superblock attr version */
int logflags; /* logging flags */
int error; /* error return value */
- ASSERT(xfs_inode_has_attr_fork(ip) == 0);
-
- mp = ip->i_mount;
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
-
- blks = XFS_ADDAFORK_SPACE_RES(mp);
-
- error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
- rsvd, &tp);
- if (error)
- return error;
- if (xfs_inode_has_attr_fork(ip))
- goto trans_cancel;
+ ASSERT(!xfs_inode_has_attr_fork(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_bmap_set_attrforkoff(ip, size, &version);
if (error)
- goto trans_cancel;
+ return error;
xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
logflags = 0;
@@ -1050,7 +1068,7 @@ xfs_bmap_add_attrfork(
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (error)
- goto trans_cancel;
+ return error;
if (!xfs_has_attr(mp) ||
(!xfs_has_attr2(mp) && version == 2)) {
bool log_sb = false;
@@ -1069,14 +1087,7 @@ xfs_bmap_add_attrfork(
xfs_log_sb(tp);
}
- error = xfs_trans_commit(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
-
-trans_cancel:
- xfs_trans_cancel(tp);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
+ return 0;
}
/*
@@ -1143,6 +1154,7 @@ xfs_iread_bmbt_block(
(unsigned long long)ip->i_ino);
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
sizeof(*block), __this_address);
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -1158,6 +1170,7 @@ xfs_iread_bmbt_block(
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iread_extents(2)", frp,
sizeof(*frp), fa);
+ xfs_bmap_mark_sick(ip, whichfork);
return xfs_bmap_complain_bad_rec(ip, whichfork, fa,
&new);
}
@@ -1189,7 +1202,7 @@ xfs_iread_extents(
if (!xfs_need_iread_extents(ifp))
return 0;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ir.loaded = 0;
xfs_iext_first(ifp, &ir.icur);
@@ -1201,6 +1214,7 @@ xfs_iread_extents(
goto out;
if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) {
+ xfs_bmap_mark_sick(ip, whichfork);
error = -EFSCORRUPTED;
goto out;
}
@@ -1213,6 +1227,8 @@ xfs_iread_extents(
smp_store_release(&ifp->if_needextents, 0);
return 0;
out:
+ if (xfs_metadata_is_sick(error))
+ xfs_bmap_mark_sick(ip, whichfork);
xfs_iext_destroy(ifp);
return error;
}
@@ -1292,6 +1308,7 @@ xfs_bmap_last_before(
break;
default:
ASSERT(0);
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -1388,8 +1405,10 @@ xfs_bmap_last_offset(
if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
return 0;
- if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp)))
+ if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
+ }
error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
if (error || is_empty)
@@ -1429,8 +1448,7 @@ xfs_bmap_add_extent_delay_real(
ASSERT(whichfork != XFS_ATTR_FORK);
ASSERT(!isnullstartblock(new->br_startblock));
- ASSERT(!bma->cur ||
- (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
+ ASSERT(!bma->cur || (bma->cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
XFS_STATS_INC(mp, xs_add_exlist);
@@ -1528,6 +1546,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1535,6 +1554,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1542,6 +1562,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1549,6 +1570,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -1571,6 +1593,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1578,6 +1601,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -1604,6 +1628,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1611,6 +1636,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -1632,6 +1658,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1639,10 +1666,12 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -1673,6 +1702,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1680,6 +1710,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
}
+ ASSERT(da_new <= da_old);
break;
case BMAP_LEFT_FILLING:
@@ -1698,6 +1729,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1705,6 +1737,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1721,7 +1754,7 @@ xfs_bmap_add_extent_delay_real(
temp = PREV.br_blockcount - new->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock) -
- (bma->cur ? bma->cur->bc_ino.allocated : 0));
+ (bma->cur ? bma->cur->bc_bmap.allocated : 0));
PREV.br_startoff = new_endoff;
PREV.br_blockcount = temp;
@@ -1749,6 +1782,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1767,6 +1801,7 @@ xfs_bmap_add_extent_delay_real(
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
xfs_iext_next(ifp, &bma->icur);
xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
+ ASSERT(da_new <= da_old);
break;
case BMAP_RIGHT_FILLING:
@@ -1785,6 +1820,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1792,6 +1828,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1808,12 +1845,13 @@ xfs_bmap_add_extent_delay_real(
temp = PREV.br_blockcount - new->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock) -
- (bma->cur ? bma->cur->bc_ino.allocated : 0));
+ (bma->cur ? bma->cur->bc_bmap.allocated : 0));
PREV.br_startblock = nullstartblock(da_new);
PREV.br_blockcount = temp;
xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
xfs_iext_next(ifp, &bma->icur);
+ ASSERT(da_new <= da_old);
break;
case 0:
@@ -1871,6 +1909,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1878,6 +1917,7 @@ xfs_bmap_add_extent_delay_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(bma->cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1926,19 +1966,18 @@ xfs_bmap_add_extent_delay_real(
}
if (da_new != da_old)
- xfs_mod_delalloc(mp, (int64_t)da_new - da_old);
+ xfs_mod_delalloc(bma->ip, 0, (int64_t)da_new - da_old);
if (bma->cur) {
- da_new += bma->cur->bc_ino.allocated;
- bma->cur->bc_ino.allocated = 0;
+ da_new += bma->cur->bc_bmap.allocated;
+ bma->cur->bc_bmap.allocated = 0;
}
/* adjust for changes in reserved delayed indirect blocks */
- if (da_new != da_old) {
- ASSERT(state == 0 || da_new < da_old);
- error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
- false);
- }
+ if (da_new < da_old)
+ xfs_add_fdblocks(mp, da_old - da_new);
+ else if (da_new > da_old)
+ error = xfs_dec_fdblocks(mp, da_new - da_old, true);
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
@@ -2074,30 +2113,35 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2126,18 +2170,21 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2169,18 +2216,21 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2207,6 +2257,7 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2240,6 +2291,7 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2277,6 +2329,7 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2287,6 +2340,7 @@ xfs_bmap_add_extent_unwritten_real(
if ((error = xfs_btree_insert(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2317,6 +2371,7 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2353,6 +2408,7 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2363,12 +2419,14 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if ((error = xfs_btree_insert(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2405,6 +2463,7 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2417,6 +2476,7 @@ xfs_bmap_add_extent_unwritten_real(
if ((error = xfs_btree_insert(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2429,6 +2489,7 @@ xfs_bmap_add_extent_unwritten_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2436,6 +2497,7 @@ xfs_bmap_add_extent_unwritten_real(
if ((error = xfs_btree_insert(cur, &i)))
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2472,7 +2534,7 @@ xfs_bmap_add_extent_unwritten_real(
/* clear out the allocated field, done with it now in any case. */
if (cur) {
- cur->bc_ino.allocated = 0;
+ cur->bc_bmap.allocated = 0;
*curp = cur;
}
@@ -2616,12 +2678,12 @@ xfs_bmap_add_extent_hole_delay(
}
if (oldlen != newlen) {
ASSERT(oldlen > newlen);
- xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
- false);
+ xfs_add_fdblocks(ip->i_mount, oldlen - newlen);
+
/*
* Nothing to do for disk quota accounting here.
*/
- xfs_mod_delalloc(ip->i_mount, (int64_t)newlen - oldlen);
+ xfs_mod_delalloc(ip, 0, (int64_t)newlen - oldlen);
}
}
@@ -2651,7 +2713,7 @@ xfs_bmap_add_extent_hole_real(
struct xfs_bmbt_irec old;
ASSERT(!isnullstartblock(new->br_startblock));
- ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL));
+ ASSERT(!cur || !(cur->bc_flags & XFS_BTREE_BMBT_WASDEL));
XFS_STATS_INC(mp, xs_add_exlist);
@@ -2721,6 +2783,7 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2728,6 +2791,7 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2735,6 +2799,7 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2764,6 +2829,7 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2794,6 +2860,7 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2820,6 +2887,7 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2827,6 +2895,7 @@ xfs_bmap_add_extent_hole_real(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2853,7 +2922,7 @@ xfs_bmap_add_extent_hole_real(
/* clear out the allocated field, done with it now in any case. */
if (cur)
- cur->bc_ino.allocated = 0;
+ cur->bc_bmap.allocated = 0;
xfs_bmap_check_leaf_extents(cur, ip, whichfork);
done:
@@ -3291,7 +3360,7 @@ xfs_bmap_alloc_account(
* yet.
*/
if (ap->wasdel) {
- xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
+ xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0);
return;
}
@@ -3315,7 +3384,7 @@ xfs_bmap_alloc_account(
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel) {
ap->ip->i_delayed_blks -= ap->length;
- xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length);
+ xfs_mod_delalloc(ap->ip, -(int64_t)ap->length, 0);
fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT;
} else {
fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
@@ -3898,14 +3967,18 @@ xfs_bmapi_read(
ASSERT(*nmap >= 1);
ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE)));
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
- if (WARN_ON_ONCE(!ifp))
+ if (WARN_ON_ONCE(!ifp)) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
+ }
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT))
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
+ }
if (xfs_is_shutdown(mp))
return -EIO;
@@ -3983,6 +4056,7 @@ xfs_bmapi_reserve_delalloc(
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
xfs_extlen_t alen;
xfs_extlen_t indlen;
+ uint64_t fdblocks;
int error;
xfs_fileoff_t aoff = off;
@@ -4025,17 +4099,21 @@ xfs_bmapi_reserve_delalloc(
indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
ASSERT(indlen > 0);
- error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
- if (error)
- goto out_unreserve_quota;
+ fdblocks = indlen;
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ error = xfs_dec_frextents(mp, xfs_rtb_to_rtx(mp, alen));
+ if (error)
+ goto out_unreserve_quota;
+ } else {
+ fdblocks += alen;
+ }
- error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
+ error = xfs_dec_fdblocks(mp, fdblocks, false);
if (error)
- goto out_unreserve_blocks;
-
+ goto out_unreserve_frextents;
ip->i_delayed_blks += alen;
- xfs_mod_delalloc(ip->i_mount, alen + indlen);
+ xfs_mod_delalloc(ip, alen, indlen);
got->br_startoff = aoff;
got->br_startblock = nullstartblock(indlen);
@@ -4056,8 +4134,9 @@ xfs_bmapi_reserve_delalloc(
return 0;
-out_unreserve_blocks:
- xfs_mod_fdblocks(mp, alen, false);
+out_unreserve_frextents:
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_add_frextents(mp, xfs_rtb_to_rtx(mp, alen));
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
@@ -4108,26 +4187,10 @@ xfs_bmapi_allocate(
struct xfs_mount *mp = bma->ip->i_mount;
int whichfork = xfs_bmapi_whichfork(bma->flags);
struct xfs_ifork *ifp = xfs_ifork_ptr(bma->ip, whichfork);
- int tmp_logflags = 0;
int error;
ASSERT(bma->length > 0);
-
- /*
- * For the wasdelay case, we could also just allocate the stuff asked
- * for in this bmap call but that wouldn't be as good.
- */
- if (bma->wasdel) {
- bma->length = (xfs_extlen_t)bma->got.br_blockcount;
- bma->offset = bma->got.br_startoff;
- if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
- bma->prev.br_startoff = NULLFILEOFF;
- } else {
- bma->length = XFS_FILBLKS_MIN(bma->length, XFS_MAX_BMBT_EXTLEN);
- if (!bma->eof)
- bma->length = XFS_FILBLKS_MIN(bma->length,
- bma->got.br_startoff - bma->offset);
- }
+ ASSERT(bma->length <= XFS_MAX_BMBT_EXTLEN);
if (bma->flags & XFS_BMAPI_CONTIG)
bma->minlen = bma->length;
@@ -4143,8 +4206,15 @@ xfs_bmapi_allocate(
} else {
error = xfs_bmap_alloc_userdata(bma);
}
- if (error || bma->blkno == NULLFSBLOCK)
+ if (error)
return error;
+ if (bma->blkno == NULLFSBLOCK)
+ return -ENOSPC;
+
+ if (WARN_ON_ONCE(!xfs_valid_startblock(bma->ip, bma->blkno))) {
+ xfs_bmap_mark_sick(bma->ip, whichfork);
+ return -EFSCORRUPTED;
+ }
if (bma->flags & XFS_BMAPI_ZERO) {
error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
@@ -4160,9 +4230,8 @@ xfs_bmapi_allocate(
*/
bma->nallocs++;
- if (bma->cur)
- bma->cur->bc_ino.flags =
- bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0;
+ if (bma->cur && bma->wasdel)
+ bma->cur->bc_flags |= XFS_BTREE_BMBT_WASDEL;
bma->got.br_startoff = bma->offset;
bma->got.br_startblock = bma->blkno;
@@ -4178,8 +4247,6 @@ xfs_bmapi_allocate(
error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip,
whichfork, &bma->icur, &bma->cur, &bma->got,
&bma->logflags, bma->flags);
-
- bma->logflags |= tmp_logflags;
if (error)
return error;
@@ -4324,6 +4391,15 @@ xfs_bmapi_finish(
* extent state if necessary. Details behaviour is controlled by the flags
* parameter. Only allocates blocks from a single allocation group, to avoid
* locking problems.
+ *
+ * Returns 0 on success and places the extent mappings in mval. nmaps is used
+ * as an input/output parameter where the caller specifies the maximum number
+ * of mappings that may be returned and xfs_bmapi_write passes back the number
+ * of mappings (including existing mappings) it found.
+ *
+ * Returns a negative error code on failure, including -ENOSPC when it could not
+ * allocate any blocks and -ENOSR when it did allocate blocks to convert a
+ * delalloc range, but those blocks were before the passed in range.
*/
int
xfs_bmapi_write(
@@ -4369,7 +4445,7 @@ xfs_bmapi_write(
ASSERT(tp != NULL);
ASSERT(len > 0);
ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL);
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(!(flags & XFS_BMAPI_REMAP));
/* zeroing is for currently only for data extents, not metadata */
@@ -4386,6 +4462,7 @@ xfs_bmapi_write(
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -4441,20 +4518,33 @@ xfs_bmapi_write(
* allocation length request (which can be 64 bits in
* length) and the bma length request, which is
* xfs_extlen_t and therefore 32 bits. Hence we have to
- * check for 32-bit overflows and handle them here.
+ * be careful and do the min() using the larger type to
+ * avoid overflows.
*/
- if (len > (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN)
- bma.length = XFS_MAX_BMBT_EXTLEN;
- else
- bma.length = len;
+ bma.length = XFS_FILBLKS_MIN(len, XFS_MAX_BMBT_EXTLEN);
+
+ if (wasdelay) {
+ bma.length = XFS_FILBLKS_MIN(bma.length,
+ bma.got.br_blockcount -
+ (bno - bma.got.br_startoff));
+ } else {
+ if (!eof)
+ bma.length = XFS_FILBLKS_MIN(bma.length,
+ bma.got.br_startoff - bno);
+ }
- ASSERT(len > 0);
ASSERT(bma.length > 0);
error = xfs_bmapi_allocate(&bma);
- if (error)
+ if (error) {
+ /*
+ * If we already allocated space in a previous
+ * iteration return what we go so far when
+ * running out of space.
+ */
+ if (error == -ENOSPC && bma.nallocs)
+ break;
goto error0;
- if (bma.blkno == NULLFSBLOCK)
- break;
+ }
/*
* If this is a CoW allocation, record the data in
@@ -4492,7 +4582,6 @@ xfs_bmapi_write(
if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
eof = true;
}
- *nmap = n;
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
whichfork);
@@ -4503,7 +4592,22 @@ xfs_bmapi_write(
ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
xfs_bmapi_finish(&bma, whichfork, 0);
xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
- orig_nmap, *nmap);
+ orig_nmap, n);
+
+ /*
+ * When converting delayed allocations, xfs_bmapi_allocate ignores
+ * the passed in bno and always converts from the start of the found
+ * delalloc extent.
+ *
+ * To avoid a successful return with *nmap set to 0, return the magic
+ * -ENOSR error code for this particular case so that the caller can
+ * handle it.
+ */
+ if (!n) {
+ ASSERT(bma.nallocs >= *nmap);
+ return -ENOSR;
+ }
+ *nmap = n;
return 0;
error0:
xfs_bmapi_finish(&bma, whichfork, error);
@@ -4516,8 +4620,8 @@ error0:
* invocations to allocate the target offset if a large enough physical extent
* is not available.
*/
-int
-xfs_bmapi_convert_delalloc(
+static int
+xfs_bmapi_convert_one_delalloc(
struct xfs_inode *ip,
int whichfork,
xfs_off_t offset,
@@ -4547,11 +4651,8 @@ xfs_bmapi_convert_delalloc(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- error = xfs_iext_count_may_overflow(ip, whichfork,
+ error = xfs_iext_count_extend(tp, ip, whichfork,
XFS_IEXT_ADD_NOSPLIT_CNT);
- if (error == -EFBIG)
- error = xfs_iext_count_upgrade(tp, ip,
- XFS_IEXT_ADD_NOSPLIT_CNT);
if (error)
goto out_trans_cancel;
@@ -4574,19 +4675,25 @@ xfs_bmapi_convert_delalloc(
if (!isnullstartblock(bma.got.br_startblock)) {
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
xfs_iomap_inode_sequence(ip, flags));
- *seq = READ_ONCE(ifp->if_seq);
+ if (seq)
+ *seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
bma.tp = tp;
bma.ip = ip;
bma.wasdel = true;
- bma.offset = bma.got.br_startoff;
- bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount,
- XFS_MAX_BMBT_EXTLEN);
bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
/*
+ * Always allocate convert from the start of the delalloc extent even if
+ * that is outside the passed in range to create large contiguous
+ * extents on disk.
+ */
+ bma.offset = bma.got.br_startoff;
+ bma.length = bma.got.br_blockcount;
+
+ /*
* When we're converting the delalloc reservations backing dirty pages
* in the page cache, we must be careful about how we create the new
* extents:
@@ -4610,20 +4717,14 @@ xfs_bmapi_convert_delalloc(
if (error)
goto out_finish;
- error = -ENOSPC;
- if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
- goto out_finish;
- error = -EFSCORRUPTED;
- if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
- goto out_finish;
-
XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
xfs_iomap_inode_sequence(ip, flags));
- *seq = READ_ONCE(ifp->if_seq);
+ if (seq)
+ *seq = READ_ONCE(ifp->if_seq);
if (whichfork == XFS_COW_FORK)
xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
@@ -4646,6 +4747,36 @@ out_trans_cancel:
return error;
}
+/*
+ * Pass in a dellalloc extent and convert it to real extents, return the real
+ * extent that maps offset_fsb in iomap.
+ */
+int
+xfs_bmapi_convert_delalloc(
+ struct xfs_inode *ip,
+ int whichfork,
+ loff_t offset,
+ struct iomap *iomap,
+ unsigned int *seq)
+{
+ int error;
+
+ /*
+ * Attempt to allocate whatever delalloc extent currently backs offset
+ * and put the result into iomap. Allocate in a loop because it may
+ * take several attempts to allocate real blocks for a contiguous
+ * delalloc extent if free space is sufficiently fragmented.
+ */
+ do {
+ error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset,
+ iomap, seq);
+ if (error)
+ return error;
+ } while (iomap->offset + iomap->length <= offset);
+
+ return 0;
+}
+
int
xfs_bmapi_remap(
struct xfs_trans *tp,
@@ -4666,7 +4797,7 @@ xfs_bmapi_remap(
ifp = xfs_ifork_ptr(ip, whichfork);
ASSERT(len > 0);
ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN);
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC |
XFS_BMAPI_NORMAP)));
ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
@@ -4674,6 +4805,7 @@ xfs_bmapi_remap(
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -4693,10 +4825,8 @@ xfs_bmapi_remap(
ip->i_nblocks += len;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
+ if (ifp->if_format == XFS_DINODE_FMT_BTREE)
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
- cur->bc_ino.flags = 0;
- }
got.br_startoff = bno;
got.br_startblock = startblock;
@@ -4738,32 +4868,18 @@ error0:
* ores == 1). The number of stolen blocks is returned. The availability and
* subsequent accounting of stolen blocks is the responsibility of the caller.
*/
-static xfs_filblks_t
+static void
xfs_bmap_split_indlen(
xfs_filblks_t ores, /* original res. */
xfs_filblks_t *indlen1, /* ext1 worst indlen */
- xfs_filblks_t *indlen2, /* ext2 worst indlen */
- xfs_filblks_t avail) /* stealable blocks */
+ xfs_filblks_t *indlen2) /* ext2 worst indlen */
{
xfs_filblks_t len1 = *indlen1;
xfs_filblks_t len2 = *indlen2;
xfs_filblks_t nres = len1 + len2; /* new total res. */
- xfs_filblks_t stolen = 0;
xfs_filblks_t resfactor;
/*
- * Steal as many blocks as we can to try and satisfy the worst case
- * indlen for both new extents.
- */
- if (ores < nres && avail)
- stolen = XFS_FILBLKS_MIN(nres - ores, avail);
- ores += stolen;
-
- /* nothing else to do if we've satisfied the new reservation */
- if (ores >= nres)
- return stolen;
-
- /*
* We can't meet the total required reservation for the two extents.
* Calculate the percent of the overall shortage between both extents
* and apply this percentage to each of the requested indlen values.
@@ -4807,11 +4923,9 @@ xfs_bmap_split_indlen(
*indlen1 = len1;
*indlen2 = len2;
-
- return stolen;
}
-int
+void
xfs_bmap_del_extent_delay(
struct xfs_inode *ip,
int whichfork,
@@ -4824,14 +4938,14 @@ xfs_bmap_del_extent_delay(
struct xfs_bmbt_irec new;
int64_t da_old, da_new, da_diff = 0;
xfs_fileoff_t del_endoff, got_endoff;
- xfs_filblks_t got_indlen, new_indlen, stolen;
+ xfs_filblks_t got_indlen, new_indlen, stolen = 0;
uint32_t state = xfs_bmap_fork_to_state(whichfork);
- int error = 0;
+ uint64_t fdblocks;
bool isrt;
XFS_STATS_INC(mp, xs_del_exlist);
- isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+ isrt = xfs_ifork_is_realtime(ip, whichfork);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got->br_startoff + got->br_blockcount;
da_old = startblockval(got->br_startblock);
@@ -4841,18 +4955,12 @@ xfs_bmap_del_extent_delay(
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
- if (isrt)
- xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
-
/*
* Update the inode delalloc counter now and wait to update the
* sb counters as we might have to borrow some blocks for the
* indirect block accounting.
*/
- ASSERT(!isrt);
- error = xfs_quota_unreserve_blkres(ip, del->br_blockcount);
- if (error)
- return error;
+ xfs_quota_unreserve_blkres(ip, del->br_blockcount);
ip->i_delayed_blks -= del->br_blockcount;
if (got->br_startoff == del->br_startoff)
@@ -4906,8 +5014,24 @@ xfs_bmap_del_extent_delay(
new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
WARN_ON_ONCE(!got_indlen || !new_indlen);
- stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
- del->br_blockcount);
+ /*
+ * Steal as many blocks as we can to try and satisfy the worst
+ * case indlen for both new extents.
+ *
+ * However, we can't just steal reservations from the data
+ * blocks if this is an RT inodes as the data and metadata
+ * blocks come from different pools. We'll have to live with
+ * under-filled indirect reservation in this case.
+ */
+ da_new = got_indlen + new_indlen;
+ if (da_new > da_old && !isrt) {
+ stolen = XFS_FILBLKS_MIN(da_new - da_old,
+ del->br_blockcount);
+ da_old += stolen;
+ }
+ if (da_new > da_old)
+ xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen);
+ da_new = got_indlen + new_indlen;
got->br_startblock = nullstartblock((int)got_indlen);
@@ -4919,20 +5043,21 @@ xfs_bmap_del_extent_delay(
xfs_iext_next(ifp, icur);
xfs_iext_insert(ip, icur, &new, state);
- da_new = got_indlen + new_indlen - stolen;
del->br_blockcount -= stolen;
break;
}
ASSERT(da_old >= da_new);
da_diff = da_old - da_new;
- if (!isrt)
- da_diff += del->br_blockcount;
- if (da_diff) {
- xfs_mod_fdblocks(mp, da_diff, false);
- xfs_mod_delalloc(mp, -da_diff);
- }
- return error;
+ fdblocks = da_diff;
+
+ if (isrt)
+ xfs_add_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
+ else
+ fdblocks += del->br_blockcount;
+
+ xfs_add_fdblocks(mp, fdblocks);
+ xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff);
}
void
@@ -5023,8 +5148,7 @@ xfs_bmap_del_extent_real(
{
xfs_fsblock_t del_endblock=0; /* first block past del */
xfs_fileoff_t del_endoff; /* first offset past del */
- int do_fx; /* free extent at end of routine */
- int error; /* error return value */
+ int error = 0; /* error return value */
struct xfs_bmbt_irec got; /* current extent entry */
xfs_fileoff_t got_endoff; /* first offset past got */
int i; /* temp state */
@@ -5067,20 +5191,10 @@ xfs_bmap_del_extent_real(
return -ENOSPC;
*logflagsp = XFS_ILOG_CORE;
- if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
- if (!(bflags & XFS_BMAPI_REMAP)) {
- error = xfs_rtfree_blocks(tp, del->br_startblock,
- del->br_blockcount);
- if (error)
- return error;
- }
-
- do_fx = 0;
+ if (xfs_ifork_is_realtime(ip, whichfork))
qfield = XFS_TRANS_DQ_RTBCOUNT;
- } else {
- do_fx = 1;
+ else
qfield = XFS_TRANS_DQ_BCOUNT;
- }
nblks = del->br_blockcount;
del_endblock = del->br_startblock + del->br_blockcount;
@@ -5088,8 +5202,10 @@ xfs_bmap_del_extent_real(
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
}
if (got.br_startoff == del->br_startoff)
@@ -5113,8 +5229,10 @@ xfs_bmap_del_extent_real(
}
if ((error = xfs_btree_delete(cur, &i)))
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
break;
case BMAP_LEFT_FILLING:
/*
@@ -5186,8 +5304,10 @@ xfs_bmap_del_extent_real(
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
/*
* Update the btree record back
* to the original value.
@@ -5203,8 +5323,10 @@ xfs_bmap_del_extent_real(
*logflagsp = 0;
return -ENOSPC;
}
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
} else
*logflagsp |= xfs_ilog_fext(whichfork);
@@ -5220,18 +5342,29 @@ xfs_bmap_del_extent_real(
/*
* If we need to, add to list of extents to delete.
*/
- if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
+ if (!(bflags & XFS_BMAPI_REMAP)) {
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
+ } else if (xfs_ifork_is_realtime(ip, whichfork)) {
+ /*
+ * Ensure the bitmap and summary inodes are locked
+ * and joined to the transaction before modifying them.
+ */
+ if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) {
+ tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED;
+ xfs_rtbitmap_lock(tp, mp);
+ }
+ error = xfs_rtfree_blocks(tp, del->br_startblock,
+ del->br_blockcount);
} else {
error = xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
XFS_AG_RESV_NONE,
((bflags & XFS_BMAPI_NODISCARD) ||
del->br_state == XFS_EXT_UNWRITTEN));
- if (error)
- return error;
}
+ if (error)
+ return error;
}
/*
@@ -5286,12 +5419,14 @@ __xfs_bunmapi(
whichfork = xfs_bmapi_whichfork(flags);
ASSERT(whichfork != XFS_COW_FORK);
ifp = xfs_ifork_ptr(ip, whichfork);
- if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)))
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
+ }
if (xfs_is_shutdown(mp))
return -EIO;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(len > 0);
ASSERT(nexts >= 0);
@@ -5304,7 +5439,7 @@ __xfs_bunmapi(
return 0;
}
XFS_STATS_INC(mp, xs_blk_unmap);
- isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+ isrt = xfs_ifork_is_realtime(ip, whichfork);
end = start + len;
if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) {
@@ -5317,20 +5452,9 @@ __xfs_bunmapi(
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
- cur->bc_ino.flags = 0;
} else
cur = NULL;
- if (isrt) {
- /*
- * Synchronize by locking the bitmap inode.
- */
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
- xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
- xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
- }
-
extno = 0;
while (end != (xfs_fileoff_t)-1 && end >= start &&
(nexts == 0 || extno < nexts)) {
@@ -5367,7 +5491,7 @@ __xfs_bunmapi(
if (del.br_startoff + del.br_blockcount > end + 1)
del.br_blockcount = end + 1 - del.br_startoff;
- if (!isrt)
+ if (!isrt || (flags & XFS_BMAPI_REMAP))
goto delete;
mod = xfs_rtb_to_rtxoff(mp,
@@ -5385,7 +5509,7 @@ __xfs_bunmapi(
* This piece is unwritten, or we're not
* using unwritten extents. Skip over it.
*/
- ASSERT(end >= mod);
+ ASSERT((flags & XFS_BMAPI_REMAP) || end >= mod);
end -= mod > del.br_blockcount ?
del.br_blockcount : mod;
if (end < got.br_startoff &&
@@ -5491,18 +5615,16 @@ __xfs_bunmapi(
delete:
if (wasdel) {
- error = xfs_bmap_del_extent_delay(ip, whichfork, &icur,
- &got, &del);
+ xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
} else {
error = xfs_bmap_del_extent_real(ip, tp, &icur, cur,
&del, &tmp_logflags, whichfork,
flags);
logflags |= tmp_logflags;
+ if (error)
+ goto error0;
}
- if (error)
- goto error0;
-
end = del.br_startoff - 1;
nodelete:
/*
@@ -5555,7 +5677,7 @@ error0:
xfs_trans_log_inode(tp, ip, logflags);
if (cur) {
if (!error)
- cur->bc_ino.allocated = 0;
+ cur->bc_bmap.allocated = 0;
xfs_btree_del_cursor(cur, error);
}
return error;
@@ -5635,8 +5757,7 @@ xfs_bmse_merge(
blockcount = left->br_blockcount + got->br_blockcount;
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
ASSERT(xfs_bmse_can_merge(left, got, shift));
new = *left;
@@ -5657,21 +5778,27 @@ xfs_bmse_merge(
error = xfs_bmbt_lookup_eq(cur, got, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
error = xfs_btree_delete(cur, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
/* lookup and update size of the previous extent */
error = xfs_bmbt_lookup_eq(cur, left, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
error = xfs_bmbt_update(cur, &new);
if (error)
@@ -5719,8 +5846,10 @@ xfs_bmap_shift_update_extent(
error = xfs_bmbt_lookup_eq(cur, &prev, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, i != 1))
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
error = xfs_bmbt_update(cur, got);
if (error)
@@ -5758,28 +5887,28 @@ xfs_bmap_collapse_extents(
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
if (xfs_is_shutdown(mp))
return -EIO;
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
- if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
+ if (ifp->if_format == XFS_DINODE_FMT_BTREE)
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
- cur->bc_ino.flags = 0;
- }
if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) {
*done = true;
goto del_cursor;
}
if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
+ xfs_bmap_mark_sick(ip, whichfork);
error = -EFSCORRUPTED;
goto del_cursor;
}
@@ -5837,7 +5966,7 @@ xfs_bmap_can_insert_extents(
int is_empty;
int error = 0;
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
if (xfs_is_shutdown(ip->i_mount))
return -EIO;
@@ -5873,22 +6002,21 @@ xfs_bmap_insert_extents(
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
if (xfs_is_shutdown(mp))
return -EIO;
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
- if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
+ if (ifp->if_format == XFS_DINODE_FMT_BTREE)
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
- cur->bc_ino.flags = 0;
- }
if (*next_fsb == NULLFSBLOCK) {
xfs_iext_last(ifp, &icur);
@@ -5904,11 +6032,13 @@ xfs_bmap_insert_extents(
}
}
if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
+ xfs_bmap_mark_sick(ip, whichfork);
error = -EFSCORRUPTED;
goto del_cursor;
}
if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
+ xfs_bmap_mark_sick(ip, whichfork);
error = -EFSCORRUPTED;
goto del_cursor;
}
@@ -5976,6 +6106,7 @@ xfs_bmap_split_extent(
if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) ||
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ xfs_bmap_mark_sick(ip, whichfork);
return -EFSCORRUPTED;
}
@@ -6002,11 +6133,11 @@ xfs_bmap_split_extent(
if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
- cur->bc_ino.flags = 0;
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
goto del_cursor;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto del_cursor;
}
@@ -6034,6 +6165,7 @@ xfs_bmap_split_extent(
if (error)
goto del_cursor;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto del_cursor;
}
@@ -6041,6 +6173,7 @@ xfs_bmap_split_extent(
if (error)
goto del_cursor;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto del_cursor;
}
@@ -6060,7 +6193,7 @@ xfs_bmap_split_extent(
del_cursor:
if (cur) {
- cur->bc_ino.allocated = 0;
+ cur->bc_bmap.allocated = 0;
xfs_btree_del_cursor(cur, error);
}
@@ -6069,17 +6202,8 @@ del_cursor:
return error;
}
-/* Deferred mapping is only for real extents in the data fork. */
-static bool
-xfs_bmap_is_update_needed(
- struct xfs_bmbt_irec *bmap)
-{
- return bmap->br_startblock != HOLESTARTBLOCK &&
- bmap->br_startblock != DELAYSTARTBLOCK;
-}
-
/* Record a bmap intent. */
-static int
+static inline void
__xfs_bmap_add(
struct xfs_trans *tp,
enum xfs_bmap_intent_type type,
@@ -6089,25 +6213,19 @@ __xfs_bmap_add(
{
struct xfs_bmap_intent *bi;
- trace_xfs_bmap_defer(tp->t_mountp,
- XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
- type,
- XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
- ip->i_ino, whichfork,
- bmap->br_startoff,
- bmap->br_blockcount,
- bmap->br_state);
+ if ((whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) ||
+ bmap->br_startblock == HOLESTARTBLOCK ||
+ bmap->br_startblock == DELAYSTARTBLOCK)
+ return;
- bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
+ bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&bi->bi_list);
bi->bi_type = type;
bi->bi_owner = ip;
bi->bi_whichfork = whichfork;
bi->bi_bmap = *bmap;
- xfs_bmap_update_get_group(tp->t_mountp, bi);
- xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
- return 0;
+ xfs_bmap_defer_add(tp, bi);
}
/* Map an extent into a file. */
@@ -6115,12 +6233,10 @@ void
xfs_bmap_map_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
+ int whichfork,
struct xfs_bmbt_irec *PREV)
{
- if (!xfs_bmap_is_update_needed(PREV))
- return;
-
- __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
+ __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, whichfork, PREV);
}
/* Unmap an extent out of a file. */
@@ -6128,12 +6244,10 @@ void
xfs_bmap_unmap_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
+ int whichfork,
struct xfs_bmbt_irec *PREV)
{
- if (!xfs_bmap_is_update_needed(PREV))
- return;
-
- __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
+ __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, whichfork, PREV);
}
/*
@@ -6147,36 +6261,35 @@ xfs_bmap_finish_one(
{
struct xfs_bmbt_irec *bmap = &bi->bi_bmap;
int error = 0;
+ int flags = 0;
- ASSERT(tp->t_highest_agno == NULLAGNUMBER);
+ if (bi->bi_whichfork == XFS_ATTR_FORK)
+ flags |= XFS_BMAPI_ATTRFORK;
- trace_xfs_bmap_deferred(tp->t_mountp,
- XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock),
- bi->bi_type,
- XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock),
- bi->bi_owner->i_ino, bi->bi_whichfork,
- bmap->br_startoff, bmap->br_blockcount,
- bmap->br_state);
+ ASSERT(tp->t_highest_agno == NULLAGNUMBER);
- if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK))
- return -EFSCORRUPTED;
+ trace_xfs_bmap_deferred(bi);
- if (XFS_TEST_ERROR(false, tp->t_mountp,
- XFS_ERRTAG_BMAP_FINISH_ONE))
+ if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE))
return -EIO;
switch (bi->bi_type) {
case XFS_BMAP_MAP:
+ if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN)
+ flags |= XFS_BMAPI_PREALLOC;
error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff,
- bmap->br_blockcount, bmap->br_startblock, 0);
+ bmap->br_blockcount, bmap->br_startblock,
+ flags);
bmap->br_blockcount = 0;
break;
case XFS_BMAP_UNMAP:
error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff,
- &bmap->br_blockcount, XFS_BMAPI_REMAP, 1);
+ &bmap->br_blockcount, flags | XFS_BMAPI_REMAP,
+ 1);
break;
default:
ASSERT(0);
+ xfs_bmap_mark_sick(bi->bi_owner, bi->bi_whichfork);
error = -EFSCORRUPTED;
}
@@ -6257,7 +6370,7 @@ xfs_bunmapi_range(
xfs_filblks_t unmap_len = endoff - startoff + 1;
int error = 0;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
while (unmap_len > 0) {
ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
@@ -6274,3 +6387,46 @@ xfs_bunmapi_range(
out:
return error;
}
+
+struct xfs_bmap_query_range {
+ xfs_bmap_query_range_fn fn;
+ void *priv;
+};
+
+/* Format btree record and pass to our callback. */
+STATIC int
+xfs_bmap_query_range_helper(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_rec *rec,
+ void *priv)
+{
+ struct xfs_bmap_query_range *query = priv;
+ struct xfs_bmbt_irec irec;
+ xfs_failaddr_t fa;
+
+ xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
+ fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork,
+ &irec);
+ if (fa) {
+ xfs_btree_mark_sick(cur);
+ return xfs_bmap_complain_bad_rec(cur->bc_ino.ip,
+ cur->bc_ino.whichfork, fa, &irec);
+ }
+
+ return query->fn(cur, &irec, query->priv);
+}
+
+/* Find all bmaps. */
+int
+xfs_bmap_query_all(
+ struct xfs_btree_cur *cur,
+ xfs_bmap_query_range_fn fn,
+ void *priv)
+{
+ struct xfs_bmap_query_range query = {
+ .priv = priv,
+ .fn = fn,
+ };
+
+ return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query);
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f6b73f1bad5f..667b0c2b33d1 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -158,7 +158,7 @@ static inline bool xfs_bmap_is_real_extent(const struct xfs_bmbt_irec *irec)
* Return true if the extent is a real, allocated extent, or false if it is a
* delayed allocation, and unwritten extent or a hole.
*/
-static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec)
+static inline bool xfs_bmap_is_written_extent(const struct xfs_bmbt_irec *irec)
{
return xfs_bmap_is_real_extent(irec) &&
irec->br_state != XFS_EXT_UNWRITTEN;
@@ -176,9 +176,16 @@ int xfs_bmap_longest_free_extent(struct xfs_perag *pag,
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp);
-int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+int xfs_bmap_add_attrfork(struct xfs_trans *tp, struct xfs_inode *ip,
+ int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork);
+int xfs_bmap_local_to_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_extlen_t total, int *logflagsp, int whichfork,
+ void (*init_fn)(struct xfs_trans *tp, struct xfs_buf *bp,
+ struct xfs_inode *ip, struct xfs_ifork *ifp,
+ void *priv),
+ void *priv);
void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
@@ -195,7 +202,7 @@ int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
xfs_extnum_t nexts, int *done);
-int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
+void xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *del);
void xfs_bmap_del_extent_cow(struct xfs_inode *ip,
@@ -232,6 +239,10 @@ enum xfs_bmap_intent_type {
XFS_BMAP_UNMAP,
};
+#define XFS_BMAP_INTENT_STRINGS \
+ { XFS_BMAP_MAP, "map" }, \
+ { XFS_BMAP_UNMAP, "unmap" }
+
struct xfs_bmap_intent {
struct list_head bi_list;
enum xfs_bmap_intent_type bi_type;
@@ -241,14 +252,11 @@ struct xfs_bmap_intent {
struct xfs_bmbt_irec bi_bmap;
};
-void xfs_bmap_update_get_group(struct xfs_mount *mp,
- struct xfs_bmap_intent *bi);
-
int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi);
void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
- struct xfs_bmbt_irec *imap);
+ int whichfork, struct xfs_bmbt_irec *imap);
void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
- struct xfs_bmbt_irec *imap);
+ int whichfork, struct xfs_bmbt_irec *imap);
static inline uint32_t xfs_bmap_fork_to_state(int whichfork)
{
@@ -280,4 +288,12 @@ extern struct kmem_cache *xfs_bmap_intent_cache;
int __init xfs_bmap_intent_init_cache(void);
void xfs_bmap_intent_destroy_cache(void);
+typedef int (*xfs_bmap_query_range_fn)(
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *rec,
+ void *priv);
+
+int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn,
+ void *priv);
+
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 71f2d50f7823..f5d84dcb58da 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -26,6 +26,22 @@
static struct kmem_cache *xfs_bmbt_cur_cache;
+void
+xfs_bmbt_init_block(
+ struct xfs_inode *ip,
+ struct xfs_btree_block *buf,
+ struct xfs_buf *bp,
+ __u16 level,
+ __u16 numrecs)
+{
+ if (bp)
+ xfs_btree_init_buf(ip->i_mount, bp, &xfs_bmbt_ops, level,
+ numrecs, ip->i_ino);
+ else
+ xfs_btree_init_block(ip->i_mount, buf, &xfs_bmbt_ops, level,
+ numrecs, ip->i_ino);
+}
+
/*
* Convert on-disk form of btree root to in-memory form.
*/
@@ -44,9 +60,7 @@ xfs_bmdr_to_bmbt(
xfs_bmbt_key_t *tkp;
__be64 *tpp;
- xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL,
- XFS_BTNUM_BMAP, 0, 0, ip->i_ino,
- XFS_BTREE_LONG_PTRS);
+ xfs_bmbt_init_block(ip, rblock, NULL, 0, 0);
rblock->bb_level = dblock->bb_level;
ASSERT(be16_to_cpu(rblock->bb_level) > 0);
rblock->bb_numrecs = dblock->bb_numrecs;
@@ -171,13 +185,8 @@ xfs_bmbt_dup_cursor(
new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp,
cur->bc_ino.ip, cur->bc_ino.whichfork);
-
- /*
- * Copy the firstblock, dfops, and flags values,
- * since init cursor doesn't get them.
- */
- new->bc_ino.flags = cur->bc_ino.flags;
-
+ new->bc_flags |= (cur->bc_flags &
+ (XFS_BTREE_BMBT_INVALID_OWNER | XFS_BTREE_BMBT_WASDEL));
return new;
}
@@ -189,10 +198,10 @@ xfs_bmbt_update_cursor(
ASSERT((dst->bc_tp->t_highest_agno != NULLAGNUMBER) ||
(dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME));
- dst->bc_ino.allocated += src->bc_ino.allocated;
+ dst->bc_bmap.allocated += src->bc_bmap.allocated;
dst->bc_tp->t_highest_agno = src->bc_tp->t_highest_agno;
- src->bc_ino.allocated = 0;
+ src->bc_bmap.allocated = 0;
}
STATIC int
@@ -211,7 +220,7 @@ xfs_bmbt_alloc_block(
xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino,
cur->bc_ino.whichfork);
args.minlen = args.maxlen = args.prod = 1;
- args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL;
+ args.wasdel = cur->bc_flags & XFS_BTREE_BMBT_WASDEL;
if (!args.wasdel && args.tp->t_blk_res == 0)
return -ENOSPC;
@@ -247,7 +256,7 @@ xfs_bmbt_alloc_block(
}
ASSERT(args.len == 1);
- cur->bc_ino.allocated++;
+ cur->bc_bmap.allocated++;
cur->bc_ino.ip->i_nblocks++;
xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE);
xfs_trans_mod_dquot_byino(args.tp, cur->bc_ino.ip,
@@ -360,14 +369,6 @@ xfs_bmbt_init_rec_from_cur(
xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b);
}
-STATIC void
-xfs_bmbt_init_ptr_from_cur(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr *ptr)
-{
- ptr->l = 0;
-}
-
STATIC int64_t
xfs_bmbt_key_diff(
struct xfs_btree_cur *cur,
@@ -419,7 +420,7 @@ xfs_bmbt_verify(
* XXX: need a better way of verifying the owner here. Right now
* just make sure there has been one set.
*/
- fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+ fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
if (fa)
return fa;
}
@@ -435,7 +436,7 @@ xfs_bmbt_verify(
if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1]))
return __this_address;
- return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]);
+ return xfs_btree_fsblock_verify(bp, mp->m_bmap_dmxr[level != 0]);
}
static void
@@ -444,7 +445,7 @@ xfs_bmbt_read_verify(
{
xfs_failaddr_t fa;
- if (!xfs_btree_lblock_verify_crc(bp))
+ if (!xfs_btree_fsblock_verify_crc(bp))
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_bmbt_verify(bp);
@@ -468,7 +469,7 @@ xfs_bmbt_write_verify(
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
- xfs_btree_lblock_calc_crc(bp);
+ xfs_btree_fsblock_calc_crc(bp);
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
@@ -515,9 +516,16 @@ xfs_bmbt_keys_contiguous(
be64_to_cpu(key2->bmbt.br_startoff));
}
-static const struct xfs_btree_ops xfs_bmbt_ops = {
+const struct xfs_btree_ops xfs_bmbt_ops = {
+ .name = "bmap",
+ .type = XFS_BTREE_TYPE_INODE,
+
.rec_len = sizeof(xfs_bmbt_rec_t),
.key_len = sizeof(xfs_bmbt_key_t),
+ .ptr_len = XFS_BTREE_LONG_PTR_LEN,
+
+ .lru_refs = XFS_BMAP_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2),
.dup_cursor = xfs_bmbt_dup_cursor,
.update_cursor = xfs_bmbt_update_cursor,
@@ -529,7 +537,6 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.init_key_from_rec = xfs_bmbt_init_key_from_rec,
.init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
- .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
.diff_two_keys = xfs_bmbt_diff_two_keys,
.buf_ops = &xfs_bmbt_buf_ops,
@@ -538,35 +545,10 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.keys_contiguous = xfs_bmbt_keys_contiguous,
};
-static struct xfs_btree_cur *
-xfs_bmbt_init_common(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- int whichfork)
-{
- struct xfs_btree_cur *cur;
-
- ASSERT(whichfork != XFS_COW_FORK);
-
- cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP,
- mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache);
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2);
-
- cur->bc_ops = &xfs_bmbt_ops;
- cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE;
- if (xfs_has_crc(mp))
- cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
-
- cur->bc_ino.ip = ip;
- cur->bc_ino.allocated = 0;
- cur->bc_ino.flags = 0;
-
- return cur;
-}
-
/*
- * Allocate a new bmap btree cursor.
+ * Create a new bmap btree cursor.
+ *
+ * For staging cursors -1 in passed in whichfork.
*/
struct xfs_btree_cur *
xfs_bmbt_init_cursor(
@@ -575,15 +557,34 @@ xfs_bmbt_init_cursor(
struct xfs_inode *ip,
int whichfork)
{
- struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
struct xfs_btree_cur *cur;
+ unsigned int maxlevels;
- cur = xfs_bmbt_init_common(mp, tp, ip, whichfork);
+ ASSERT(whichfork != XFS_COW_FORK);
- cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
- cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork);
+ /*
+ * The Data fork always has larger maxlevel, so use that for staging
+ * cursors.
+ */
+ switch (whichfork) {
+ case XFS_STAGING_FORK:
+ maxlevels = mp->m_bm_maxlevels[XFS_DATA_FORK];
+ break;
+ default:
+ maxlevels = mp->m_bm_maxlevels[whichfork];
+ break;
+ }
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bmbt_ops, maxlevels,
+ xfs_bmbt_cur_cache);
+ cur->bc_ino.ip = ip;
cur->bc_ino.whichfork = whichfork;
+ cur->bc_bmap.allocated = 0;
+ if (whichfork != XFS_STAGING_FORK) {
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
+ cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
+ cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork);
+ }
return cur;
}
@@ -599,33 +600,6 @@ xfs_bmbt_block_maxrecs(
}
/*
- * Allocate a new bmap btree cursor for reloading an inode block mapping data
- * structure. Note that callers can use the staged cursor to reload extents
- * format inode forks if they rebuild the iext tree and commit the staged
- * cursor immediately.
- */
-struct xfs_btree_cur *
-xfs_bmbt_stage_cursor(
- struct xfs_mount *mp,
- struct xfs_inode *ip,
- struct xbtree_ifakeroot *ifake)
-{
- struct xfs_btree_cur *cur;
- struct xfs_btree_ops *ops;
-
- /* data fork always has larger maxheight */
- cur = xfs_bmbt_init_common(mp, NULL, ip, XFS_DATA_FORK);
- cur->bc_nlevels = ifake->if_levels;
- cur->bc_ino.forksize = ifake->if_fork_size;
-
- /* Don't let anyone think we're attached to the real fork yet. */
- cur->bc_ino.whichfork = -1;
- xfs_btree_stage_ifakeroot(cur, ifake, &ops);
- ops->update_cursor = NULL;
- return cur;
-}
-
-/*
* Swap in the new inode fork root. Once we pass this point the newly rebuilt
* mappings are in place and we have to kill off any old btree blocks.
*/
@@ -665,7 +639,7 @@ xfs_bmbt_commit_staged_btree(
break;
}
xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
- xfs_btree_commit_ifakeroot(cur, tp, whichfork, &xfs_bmbt_ops);
+ xfs_btree_commit_ifakeroot(cur, tp, whichfork);
}
/*
@@ -751,7 +725,7 @@ xfs_bmbt_change_owner(
ASSERT(xfs_ifork_ptr(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE);
cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
- cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER;
+ cur->bc_flags |= XFS_BTREE_BMBT_INVALID_OWNER;
error = xfs_btree_change_owner(cur, new_owner, buffer_list);
xfs_btree_del_cursor(cur, error);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h
index 151b8491f60e..de1b73f1225c 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.h
+++ b/fs/xfs/libxfs/xfs_bmap_btree.h
@@ -107,8 +107,6 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
-struct xfs_btree_cur *xfs_bmbt_stage_cursor(struct xfs_mount *mp,
- struct xfs_inode *ip, struct xbtree_ifakeroot *ifake);
void xfs_bmbt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, int whichfork);
@@ -120,4 +118,7 @@ unsigned int xfs_bmbt_maxlevels_ondisk(void);
int __init xfs_bmbt_init_cur_cache(void);
void xfs_bmbt_destroy_cur_cache(void);
+void xfs_bmbt_init_block(struct xfs_inode *ip, struct xfs_btree_block *buf,
+ struct xfs_buf *bp, __u16 level, __u16 numrecs);
+
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index ea8d3659df20..d29547572a68 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -27,28 +27,24 @@
#include "xfs_bmap_btree.h"
#include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h"
+#include "xfs_health.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
/*
* Btree magic numbers.
*/
-static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
- { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
- XFS_FIBT_MAGIC, 0 },
- { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
- XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
- XFS_REFC_CRC_MAGIC }
-};
-
uint32_t
xfs_btree_magic(
- int crc,
- xfs_btnum_t btnum)
+ struct xfs_mount *mp,
+ const struct xfs_btree_ops *ops)
{
- uint32_t magic = xfs_magics[crc][btnum];
+ int idx = xfs_has_crc(mp) ? 1 : 0;
+ __be32 magic = ops->buf_ops->magic[idx];
/* Ensure we asked for crc for crc-only magics. */
ASSERT(magic != 0);
- return magic;
+ return be32_to_cpu(magic);
}
/*
@@ -63,10 +59,8 @@ xfs_btree_magic(
* bytes.
*/
static inline xfs_failaddr_t
-xfs_btree_check_lblock_siblings(
+xfs_btree_check_fsblock_siblings(
struct xfs_mount *mp,
- struct xfs_btree_cur *cur,
- int level,
xfs_fsblock_t fsb,
__be64 dsibling)
{
@@ -78,22 +72,33 @@ xfs_btree_check_lblock_siblings(
sibling = be64_to_cpu(dsibling);
if (sibling == fsb)
return __this_address;
- if (level >= 0) {
- if (!xfs_btree_check_lptr(cur, sibling, level + 1))
- return __this_address;
- } else {
- if (!xfs_verify_fsbno(mp, sibling))
- return __this_address;
- }
+ if (!xfs_verify_fsbno(mp, sibling))
+ return __this_address;
+ return NULL;
+}
+static inline xfs_failaddr_t
+xfs_btree_check_memblock_siblings(
+ struct xfs_buftarg *btp,
+ xfbno_t bno,
+ __be64 dsibling)
+{
+ xfbno_t sibling;
+
+ if (dsibling == cpu_to_be64(NULLFSBLOCK))
+ return NULL;
+
+ sibling = be64_to_cpu(dsibling);
+ if (sibling == bno)
+ return __this_address;
+ if (!xmbuf_verify_daddr(btp, xfbno_to_daddr(sibling)))
+ return __this_address;
return NULL;
}
static inline xfs_failaddr_t
-xfs_btree_check_sblock_siblings(
+xfs_btree_check_agblock_siblings(
struct xfs_perag *pag,
- struct xfs_btree_cur *cur,
- int level,
xfs_agblock_t agbno,
__be32 dsibling)
{
@@ -105,34 +110,21 @@ xfs_btree_check_sblock_siblings(
sibling = be32_to_cpu(dsibling);
if (sibling == agbno)
return __this_address;
- if (level >= 0) {
- if (!xfs_btree_check_sptr(cur, sibling, level + 1))
- return __this_address;
- } else {
- if (!xfs_verify_agbno(pag, sibling))
- return __this_address;
- }
+ if (!xfs_verify_agbno(pag, sibling))
+ return __this_address;
return NULL;
}
-/*
- * Check a long btree block header. Return the address of the failing check,
- * or NULL if everything is ok.
- */
-xfs_failaddr_t
-__xfs_btree_check_lblock(
+static xfs_failaddr_t
+__xfs_btree_check_lblock_hdr(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
struct xfs_buf *bp)
{
struct xfs_mount *mp = cur->bc_mp;
- xfs_btnum_t btnum = cur->bc_btnum;
- int crc = xfs_has_crc(mp);
- xfs_failaddr_t fa;
- xfs_fsblock_t fsb = NULLFSBLOCK;
- if (crc) {
+ if (xfs_has_crc(mp)) {
if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
return __this_address;
if (block->bb_u.l.bb_blkno !=
@@ -142,7 +134,7 @@ __xfs_btree_check_lblock(
return __this_address;
}
- if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
+ if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
return __this_address;
if (be16_to_cpu(block->bb_level) != level)
return __this_address;
@@ -150,44 +142,83 @@ __xfs_btree_check_lblock(
cur->bc_ops->get_maxrecs(cur, level))
return __this_address;
- if (bp)
- fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+ return NULL;
+}
+
+/*
+ * Check a long btree block header. Return the address of the failing check,
+ * or NULL if everything is ok.
+ */
+static xfs_failaddr_t
+__xfs_btree_check_fsblock(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block,
+ int level,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_failaddr_t fa;
+ xfs_fsblock_t fsb;
+
+ fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp);
+ if (fa)
+ return fa;
- fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
+ /*
+ * For inode-rooted btrees, the root block sits in the inode fork. In
+ * that case bp is NULL, and the block must not have any siblings.
+ */
+ if (!bp) {
+ if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK))
+ return __this_address;
+ if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK))
+ return __this_address;
+ return NULL;
+ }
+
+ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
block->bb_u.l.bb_leftsib);
if (!fa)
- fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
block->bb_u.l.bb_rightsib);
return fa;
}
-/* Check a long btree block header. */
-static int
-xfs_btree_check_lblock(
+/*
+ * Check an in-memory btree block header. Return the address of the failing
+ * check, or NULL if everything is ok.
+ */
+static xfs_failaddr_t
+__xfs_btree_check_memblock(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
struct xfs_buf *bp)
{
- struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target;
xfs_failaddr_t fa;
+ xfbno_t bno;
- fa = __xfs_btree_check_lblock(cur, block, level, bp);
- if (XFS_IS_CORRUPT(mp, fa != NULL) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
- if (bp)
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- return -EFSCORRUPTED;
- }
- return 0;
+ fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp);
+ if (fa)
+ return fa;
+
+ bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_leftsib);
+ if (!fa)
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_rightsib);
+ return fa;
}
/*
* Check a short btree block header. Return the address of the failing check,
* or NULL if everything is ok.
*/
-xfs_failaddr_t
-__xfs_btree_check_sblock(
+static xfs_failaddr_t
+__xfs_btree_check_agblock(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
@@ -195,20 +226,17 @@ __xfs_btree_check_sblock(
{
struct xfs_mount *mp = cur->bc_mp;
struct xfs_perag *pag = cur->bc_ag.pag;
- xfs_btnum_t btnum = cur->bc_btnum;
- int crc = xfs_has_crc(mp);
xfs_failaddr_t fa;
- xfs_agblock_t agbno = NULLAGBLOCK;
+ xfs_agblock_t agbno;
- if (crc) {
+ if (xfs_has_crc(mp)) {
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
return __this_address;
- if (block->bb_u.s.bb_blkno !=
- cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
return __this_address;
}
- if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
+ if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
return __this_address;
if (be16_to_cpu(block->bb_level) != level)
return __this_address;
@@ -216,36 +244,45 @@ __xfs_btree_check_sblock(
cur->bc_ops->get_maxrecs(cur, level))
return __this_address;
- if (bp)
- agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
-
- fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
+ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
+ fa = xfs_btree_check_agblock_siblings(pag, agbno,
block->bb_u.s.bb_leftsib);
if (!fa)
- fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
+ fa = xfs_btree_check_agblock_siblings(pag, agbno,
block->bb_u.s.bb_rightsib);
return fa;
}
-/* Check a short btree block header. */
-STATIC int
-xfs_btree_check_sblock(
+/*
+ * Internal btree block check.
+ *
+ * Return NULL if the block is ok or the address of the failed check otherwise.
+ */
+xfs_failaddr_t
+__xfs_btree_check_block(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
int level,
struct xfs_buf *bp)
{
- struct xfs_mount *mp = cur->bc_mp;
- xfs_failaddr_t fa;
-
- fa = __xfs_btree_check_sblock(cur, block, level, bp);
- if (XFS_IS_CORRUPT(mp, fa != NULL) ||
- XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
- if (bp)
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- return -EFSCORRUPTED;
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ return __xfs_btree_check_memblock(cur, block, level, bp);
+ case XFS_BTREE_TYPE_AG:
+ return __xfs_btree_check_agblock(cur, block, level, bp);
+ case XFS_BTREE_TYPE_INODE:
+ return __xfs_btree_check_fsblock(cur, block, level, bp);
+ default:
+ ASSERT(0);
+ return __this_address;
}
- return 0;
+}
+
+static inline unsigned int xfs_btree_block_errtag(struct xfs_btree_cur *cur)
+{
+ if (cur->bc_ops->ptr_len == XFS_BTREE_SHORT_PTR_LEN)
+ return XFS_ERRTAG_BTREE_CHECK_SBLOCK;
+ return XFS_ERRTAG_BTREE_CHECK_LBLOCK;
}
/*
@@ -258,34 +295,49 @@ xfs_btree_check_block(
int level, /* level of the btree block */
struct xfs_buf *bp) /* buffer containing block, if any */
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return xfs_btree_check_lblock(cur, block, level, bp);
- else
- return xfs_btree_check_sblock(cur, block, level, bp);
-}
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_failaddr_t fa;
-/* Check that this long pointer is valid and points within the fs. */
-bool
-xfs_btree_check_lptr(
- struct xfs_btree_cur *cur,
- xfs_fsblock_t fsbno,
- int level)
-{
- if (level <= 0)
- return false;
- return xfs_verify_fsbno(cur->bc_mp, fsbno);
+ fa = __xfs_btree_check_block(cur, block, level, bp);
+ if (XFS_IS_CORRUPT(mp, fa != NULL) ||
+ XFS_TEST_ERROR(false, mp, xfs_btree_block_errtag(cur))) {
+ if (bp)
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
-/* Check that this short pointer is valid and points within the AG. */
-bool
-xfs_btree_check_sptr(
- struct xfs_btree_cur *cur,
- xfs_agblock_t agbno,
- int level)
+int
+__xfs_btree_check_ptr(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ int index,
+ int level)
{
if (level <= 0)
- return false;
- return xfs_verify_agbno(cur->bc_ag.pag, agbno);
+ return -EFSCORRUPTED;
+
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ if (!xfbtree_verify_bno(cur->bc_mem.xfbtree,
+ be64_to_cpu((&ptr->l)[index])))
+ return -EFSCORRUPTED;
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ if (!xfs_verify_fsbno(cur->bc_mp,
+ be64_to_cpu((&ptr->l)[index])))
+ return -EFSCORRUPTED;
+ break;
+ case XFS_BTREE_TYPE_AG:
+ if (!xfs_verify_agbno(cur->bc_ag.pag,
+ be32_to_cpu((&ptr->s)[index])))
+ return -EFSCORRUPTED;
+ break;
+ }
+
+ return 0;
}
/*
@@ -299,26 +351,35 @@ xfs_btree_check_ptr(
int index,
int level)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
- level))
- return 0;
- xfs_err(cur->bc_mp,
-"Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
+ int error;
+
+ error = __xfs_btree_check_ptr(cur, ptr, index, level);
+ if (error) {
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ xfs_err(cur->bc_mp,
+"In-memory: Corrupt %sbt flags 0x%x pointer at level %d index %d fa %pS.",
+ cur->bc_ops->name, cur->bc_flags, level, index,
+ __this_address);
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ xfs_err(cur->bc_mp,
+"Inode %llu fork %d: Corrupt %sbt pointer at level %d index %d.",
cur->bc_ino.ip->i_ino,
- cur->bc_ino.whichfork, cur->bc_btnum,
+ cur->bc_ino.whichfork, cur->bc_ops->name,
level, index);
- } else {
- if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
- level))
- return 0;
- xfs_err(cur->bc_mp,
-"AG %u: Corrupt btree %d pointer at level %d index %d.",
- cur->bc_ag.pag->pag_agno, cur->bc_btnum,
+ break;
+ case XFS_BTREE_TYPE_AG:
+ xfs_err(cur->bc_mp,
+"AG %u: Corrupt %sbt pointer at level %d index %d.",
+ cur->bc_ag.pag->pag_agno, cur->bc_ops->name,
level, index);
+ break;
+ }
+ xfs_btree_mark_sick(cur);
}
- return -EFSCORRUPTED;
+ return error;
}
#ifdef DEBUG
@@ -336,7 +397,7 @@ xfs_btree_check_ptr(
* it to disk.
*/
void
-xfs_btree_lblock_calc_crc(
+xfs_btree_fsblock_calc_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
@@ -350,7 +411,7 @@ xfs_btree_lblock_calc_crc(
}
bool
-xfs_btree_lblock_verify_crc(
+xfs_btree_fsblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
@@ -374,7 +435,7 @@ xfs_btree_lblock_verify_crc(
* it to disk.
*/
void
-xfs_btree_sblock_calc_crc(
+xfs_btree_agblock_calc_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
@@ -388,7 +449,7 @@ xfs_btree_sblock_calc_crc(
}
bool
-xfs_btree_sblock_verify_crc(
+xfs_btree_agblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
@@ -410,6 +471,17 @@ xfs_btree_free_block(
{
int error;
+ trace_xfs_btree_free_block(cur, bp);
+
+ /*
+ * Don't allow block freeing for a staging cursor, because staging
+ * cursors do not support regular btree modifications.
+ */
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
error = cur->bc_ops->free_block(cur, bp);
if (!error) {
xfs_trans_binval(cur->bc_tp, bp);
@@ -448,33 +520,70 @@ xfs_btree_del_cursor(
* zero, then we should be shut down or on our way to shutdown due to
* cancelling a dirty transaction on error.
*/
- ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
+ ASSERT(!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_bmap.allocated == 0 ||
xfs_is_shutdown(cur->bc_mp) || error != 0);
- if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
- kmem_free(cur->bc_ops);
- if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag)
- xfs_perag_put(cur->bc_ag.pag);
+
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_AG:
+ if (cur->bc_ag.pag)
+ xfs_perag_put(cur->bc_ag.pag);
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ /* nothing to do */
+ break;
+ case XFS_BTREE_TYPE_MEM:
+ if (cur->bc_mem.pag)
+ xfs_perag_put(cur->bc_mem.pag);
+ break;
+ }
+
kmem_cache_free(cur->bc_cache, cur);
}
+/* Return the buffer target for this btree's buffer. */
+static inline struct xfs_buftarg *
+xfs_btree_buftarg(
+ struct xfs_btree_cur *cur)
+{
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
+ return cur->bc_mem.xfbtree->target;
+ return cur->bc_mp->m_ddev_targp;
+}
+
+/* Return the block size (in units of 512b sectors) for this btree. */
+static inline unsigned int
+xfs_btree_bbsize(
+ struct xfs_btree_cur *cur)
+{
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM)
+ return XFBNO_BBSIZE;
+ return cur->bc_mp->m_bsize;
+}
+
/*
* Duplicate the btree cursor.
* Allocate a new one, copy the record, re-get the buffers.
*/
-int /* error */
+int /* error */
xfs_btree_dup_cursor(
- struct xfs_btree_cur *cur, /* input cursor */
- struct xfs_btree_cur **ncur) /* output cursor */
+ struct xfs_btree_cur *cur, /* input cursor */
+ struct xfs_btree_cur **ncur) /* output cursor */
{
- struct xfs_buf *bp; /* btree block's buffer pointer */
- int error; /* error return value */
- int i; /* level number of btree block */
- xfs_mount_t *mp; /* mount structure for filesystem */
- struct xfs_btree_cur *new; /* new cursor value */
- xfs_trans_t *tp; /* transaction pointer, can be NULL */
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_trans *tp = cur->bc_tp;
+ struct xfs_buf *bp;
+ struct xfs_btree_cur *new;
+ int error;
+ int i;
- tp = cur->bc_tp;
- mp = cur->bc_mp;
+ /*
+ * Don't allow staging cursors to be duplicated because they're supposed
+ * to be kept private to a single thread.
+ */
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
/*
* Allocate a new cursor like the old one.
@@ -494,10 +603,13 @@ xfs_btree_dup_cursor(
new->bc_levels[i].ra = cur->bc_levels[i].ra;
bp = cur->bc_levels[i].bp;
if (bp) {
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- xfs_buf_daddr(bp), mp->m_bsize,
- 0, &bp,
- cur->bc_ops->buf_ops);
+ error = xfs_trans_read_buf(mp, tp,
+ xfs_btree_buftarg(cur),
+ xfs_buf_daddr(bp),
+ xfs_btree_bbsize(cur), 0, &bp,
+ cur->bc_ops->buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_btree_mark_sick(new);
if (error) {
xfs_btree_del_cursor(new, error);
*ncur = NULL;
@@ -539,7 +651,7 @@ xfs_btree_dup_cursor(
* record, key or pointer (xfs_btree_*_addr). Note that all addressing
* inside the btree block is done using indices starting at one, not zero!
*
- * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
+ * If XFS_BTGEO_OVERLAPPING is set, then this btree supports keys containing
* overlapping intervals. In such a tree, records are still sorted lowest to
* highest and indexed by the smallest key value that refers to the record.
* However, nodes are different: each pointer has two associated keys -- one
@@ -589,26 +701,17 @@ xfs_btree_dup_cursor(
*/
static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
+ if (xfs_has_crc(cur->bc_mp))
return XFS_BTREE_LBLOCK_CRC_LEN;
return XFS_BTREE_LBLOCK_LEN;
}
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
+ if (xfs_has_crc(cur->bc_mp))
return XFS_BTREE_SBLOCK_CRC_LEN;
return XFS_BTREE_SBLOCK_LEN;
}
/*
- * Return size of btree block pointers for this btree instance.
- */
-static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
-{
- return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
- sizeof(__be64) : sizeof(__be32);
-}
-
-/*
* Calculate offset of the n-th record in a btree block.
*/
STATIC size_t
@@ -655,7 +758,7 @@ xfs_btree_ptr_offset(
{
return xfs_btree_block_len(cur) +
cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
- (n - 1) * xfs_btree_ptr_len(cur);
+ (n - 1) * cur->bc_ops->ptr_len;
}
/*
@@ -718,7 +821,7 @@ struct xfs_ifork *
xfs_btree_ifork_ptr(
struct xfs_btree_cur *cur)
{
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
if (cur->bc_flags & XFS_BTREE_STAGING)
return cur->bc_ino.ifake->if_fork;
@@ -750,8 +853,7 @@ xfs_btree_get_block(
int level, /* level in btree */
struct xfs_buf **bpp) /* buffer containing the block */
{
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level == cur->bc_nlevels - 1)) {
+ if (xfs_btree_at_iroot(cur, level)) {
*bpp = NULL;
return xfs_btree_get_iroot(cur);
}
@@ -856,95 +958,52 @@ xfs_btree_offsets(
}
}
-/*
- * Get a buffer for the block, return it read in.
- * Long-form addressing.
- */
-int
-xfs_btree_read_bufl(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- struct xfs_buf **bpp, /* buffer for fsbno */
- int refval, /* ref count value for buffer */
- const struct xfs_buf_ops *ops)
-{
- struct xfs_buf *bp; /* return value */
- xfs_daddr_t d; /* real disk block address */
- int error;
-
- if (!xfs_verify_fsbno(mp, fsbno))
- return -EFSCORRUPTED;
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
- mp->m_bsize, 0, &bp, ops);
- if (error)
- return error;
- if (bp)
- xfs_buf_set_ref(bp, refval);
- *bpp = bp;
- return 0;
-}
-
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Long-form addressing.
- */
-/* ARGSUSED */
-void
-xfs_btree_reada_bufl(
- struct xfs_mount *mp, /* file system mount point */
- xfs_fsblock_t fsbno, /* file system block number */
- xfs_extlen_t count, /* count of filesystem blocks */
- const struct xfs_buf_ops *ops)
+STATIC int
+xfs_btree_readahead_fsblock(
+ struct xfs_btree_cur *cur,
+ int lr,
+ struct xfs_btree_block *block)
{
- xfs_daddr_t d;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
+ int rval = 0;
- ASSERT(fsbno != NULLFSBLOCK);
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
-}
+ if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
+ xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, left),
+ mp->m_bsize, cur->bc_ops->buf_ops);
+ rval++;
+ }
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Short-form addressing.
- */
-/* ARGSUSED */
-void
-xfs_btree_reada_bufs(
- struct xfs_mount *mp, /* file system mount point */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- xfs_extlen_t count, /* count of filesystem blocks */
- const struct xfs_buf_ops *ops)
-{
- xfs_daddr_t d;
+ if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
+ xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, right),
+ mp->m_bsize, cur->bc_ops->buf_ops);
+ rval++;
+ }
- ASSERT(agno != NULLAGNUMBER);
- ASSERT(agbno != NULLAGBLOCK);
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
+ return rval;
}
STATIC int
-xfs_btree_readahead_lblock(
+xfs_btree_readahead_memblock(
struct xfs_btree_cur *cur,
int lr,
struct xfs_btree_block *block)
{
+ struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target;
+ xfbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
+ xfbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
int rval = 0;
- xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
- xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
- xfs_btree_reada_bufl(cur->bc_mp, left, 1,
- cur->bc_ops->buf_ops);
+ xfs_buf_readahead(btp, xfbno_to_daddr(left), XFBNO_BBSIZE,
+ cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
- xfs_btree_reada_bufl(cur->bc_mp, right, 1,
- cur->bc_ops->buf_ops);
+ xfs_buf_readahead(btp, xfbno_to_daddr(right), XFBNO_BBSIZE,
+ cur->bc_ops->buf_ops);
rval++;
}
@@ -952,25 +1011,28 @@ xfs_btree_readahead_lblock(
}
STATIC int
-xfs_btree_readahead_sblock(
+xfs_btree_readahead_agblock(
struct xfs_btree_cur *cur,
int lr,
- struct xfs_btree_block *block)
+ struct xfs_btree_block *block)
{
- int rval = 0;
+ struct xfs_mount *mp = cur->bc_mp;
+ xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno;
xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
-
+ int rval = 0;
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- left, 1, cur->bc_ops->buf_ops);
+ xfs_buf_readahead(mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, left),
+ mp->m_bsize, cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
- xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- right, 1, cur->bc_ops->buf_ops);
+ xfs_buf_readahead(mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(mp, agno, right),
+ mp->m_bsize, cur->bc_ops->buf_ops);
rval++;
}
@@ -993,8 +1055,7 @@ xfs_btree_readahead(
* No readahead needed if we are at the root level and the
* btree root is stored in the inode.
*/
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (lev == cur->bc_nlevels - 1))
+ if (xfs_btree_at_iroot(cur, lev))
return 0;
if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
@@ -1003,9 +1064,17 @@ xfs_btree_readahead(
cur->bc_levels[lev].ra |= lr;
block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return xfs_btree_readahead_lblock(cur, lr, block);
- return xfs_btree_readahead_sblock(cur, lr, block);
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_AG:
+ return xfs_btree_readahead_agblock(cur, lr, block);
+ case XFS_BTREE_TYPE_INODE:
+ return xfs_btree_readahead_fsblock(cur, lr, block);
+ case XFS_BTREE_TYPE_MEM:
+ return xfs_btree_readahead_memblock(cur, lr, block);
+ default:
+ ASSERT(0);
+ return 0;
+ }
}
STATIC int
@@ -1014,23 +1083,24 @@ xfs_btree_ptr_to_daddr(
const union xfs_btree_ptr *ptr,
xfs_daddr_t *daddr)
{
- xfs_fsblock_t fsbno;
- xfs_agblock_t agbno;
int error;
error = xfs_btree_check_ptr(cur, ptr, 0, 1);
if (error)
return error;
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- fsbno = be64_to_cpu(ptr->l);
- *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
- } else {
- agbno = be32_to_cpu(ptr->s);
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_AG:
*daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- agbno);
+ be32_to_cpu(ptr->s));
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+ break;
+ case XFS_BTREE_TYPE_MEM:
+ *daddr = xfbno_to_daddr(be64_to_cpu(ptr->l));
+ break;
}
-
return 0;
}
@@ -1050,8 +1120,9 @@ xfs_btree_readahead_ptr(
if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
return;
- xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr,
- cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+ xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
+ xfs_btree_bbsize(cur) * count,
+ cur->bc_ops->buf_ops);
}
/*
@@ -1072,7 +1143,7 @@ xfs_btree_setbuf(
cur->bc_levels[lev].ra = 0;
b = XFS_BUF_TO_BLOCK(bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
@@ -1090,7 +1161,7 @@ xfs_btree_ptr_is_null(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
return ptr->l == cpu_to_be64(NULLFSBLOCK);
else
return ptr->s == cpu_to_be32(NULLAGBLOCK);
@@ -1101,12 +1172,23 @@ xfs_btree_set_ptr_null(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
ptr->l = cpu_to_be64(NULLFSBLOCK);
else
ptr->s = cpu_to_be32(NULLAGBLOCK);
}
+static inline bool
+xfs_btree_ptrs_equal(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr1,
+ union xfs_btree_ptr *ptr2)
+{
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
+ return ptr1->l == ptr2->l;
+ return ptr1->s == ptr2->s;
+}
+
/*
* Get/set/init sibling pointers
*/
@@ -1119,7 +1201,7 @@ xfs_btree_get_sibling(
{
ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (lr == XFS_BB_RIGHTSIB)
ptr->l = block->bb_u.l.bb_rightsib;
else
@@ -1141,7 +1223,7 @@ xfs_btree_set_sibling(
{
ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (lr == XFS_BB_RIGHTSIB)
block->bb_u.l.bb_rightsib = ptr->l;
else
@@ -1154,25 +1236,24 @@ xfs_btree_set_sibling(
}
}
-void
-xfs_btree_init_block_int(
+static void
+__xfs_btree_init_block(
struct xfs_mount *mp,
struct xfs_btree_block *buf,
+ const struct xfs_btree_ops *ops,
xfs_daddr_t blkno,
- xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
- __u64 owner,
- unsigned int flags)
+ __u64 owner)
{
- int crc = xfs_has_crc(mp);
- __u32 magic = xfs_btree_magic(crc, btnum);
+ bool crc = xfs_has_crc(mp);
+ __u32 magic = xfs_btree_magic(mp, ops);
buf->bb_magic = cpu_to_be32(magic);
buf->bb_level = cpu_to_be16(level);
buf->bb_numrecs = cpu_to_be16(numrecs);
- if (flags & XFS_BTREE_LONG_PTRS) {
+ if (ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
if (crc) {
@@ -1183,14 +1264,12 @@ xfs_btree_init_block_int(
buf->bb_u.l.bb_lsn = 0;
}
} else {
- /* owner is a 32 bit value on short blocks */
- __u32 __owner = (__u32)owner;
-
buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
if (crc) {
buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
- buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
+ /* owner is a 32 bit value on short blocks */
+ buf->bb_u.s.bb_owner = cpu_to_be32((__u32)owner);
uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
buf->bb_u.s.bb_lsn = 0;
}
@@ -1199,15 +1278,46 @@ xfs_btree_init_block_int(
void
xfs_btree_init_block(
- struct xfs_mount *mp,
- struct xfs_buf *bp,
- xfs_btnum_t btnum,
- __u16 level,
- __u16 numrecs,
- __u64 owner)
+ struct xfs_mount *mp,
+ struct xfs_btree_block *block,
+ const struct xfs_btree_ops *ops,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner)
+{
+ __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
+ numrecs, owner);
+}
+
+void
+xfs_btree_init_buf(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ const struct xfs_btree_ops *ops,
+ __u16 level,
+ __u16 numrecs,
+ __u64 owner)
+{
+ __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
+ xfs_buf_daddr(bp), level, numrecs, owner);
+ bp->b_ops = ops->buf_ops;
+}
+
+static inline __u64
+xfs_btree_owner(
+ struct xfs_btree_cur *cur)
{
- xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp),
- btnum, level, numrecs, owner, 0);
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_MEM:
+ return cur->bc_mem.xfbtree->owner;
+ case XFS_BTREE_TYPE_INODE:
+ return cur->bc_ino.ip->i_ino;
+ case XFS_BTREE_TYPE_AG:
+ return cur->bc_ag.pag->pag_agno;
+ default:
+ ASSERT(0);
+ return 0;
+ }
}
void
@@ -1217,22 +1327,8 @@ xfs_btree_init_block_cur(
int level,
int numrecs)
{
- __u64 owner;
-
- /*
- * we can pull the owner from the cursor right now as the different
- * owners align directly with the pointer size of the btree. This may
- * change in future, but is safe for current users of the generic btree
- * code.
- */
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- owner = cur->bc_ino.ip->i_ino;
- else
- owner = cur->bc_ag.pag->pag_agno;
-
- xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp),
- xfs_buf_daddr(bp), cur->bc_btnum, level,
- numrecs, owner, cur->bc_flags);
+ xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs,
+ xfs_btree_owner(cur));
}
/*
@@ -1250,7 +1346,7 @@ xfs_btree_is_lastrec(
if (level > 0)
return 0;
- if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
+ if (!(cur->bc_ops->geom_flags & XFS_BTGEO_LASTREC_UPDATE))
return 0;
xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
@@ -1265,41 +1361,27 @@ xfs_btree_buf_to_ptr(
struct xfs_buf *bp,
union xfs_btree_ptr *ptr)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
- xfs_buf_daddr(bp)));
- else {
+ switch (cur->bc_ops->type) {
+ case XFS_BTREE_TYPE_AG:
ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
xfs_buf_daddr(bp)));
+ break;
+ case XFS_BTREE_TYPE_INODE:
+ ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
+ xfs_buf_daddr(bp)));
+ break;
+ case XFS_BTREE_TYPE_MEM:
+ ptr->l = cpu_to_be64(xfs_daddr_to_xfbno(xfs_buf_daddr(bp)));
+ break;
}
}
-STATIC void
+static inline void
xfs_btree_set_refs(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
{
- switch (cur->bc_btnum) {
- case XFS_BTNUM_BNO:
- case XFS_BTNUM_CNT:
- xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
- break;
- case XFS_BTNUM_INO:
- case XFS_BTNUM_FINO:
- xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
- break;
- case XFS_BTNUM_BMAP:
- xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
- break;
- case XFS_BTNUM_RMAP:
- xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
- break;
- case XFS_BTNUM_REFC:
- xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
- break;
- default:
- ASSERT(0);
- }
+ xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
}
int
@@ -1309,15 +1391,14 @@ xfs_btree_get_buf_block(
struct xfs_btree_block **block,
struct xfs_buf **bpp)
{
- struct xfs_mount *mp = cur->bc_mp;
- xfs_daddr_t d;
- int error;
+ xfs_daddr_t d;
+ int error;
error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
if (error)
return error;
- error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize,
- 0, bpp);
+ error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
+ xfs_btree_bbsize(cur), 0, bpp);
if (error)
return error;
@@ -1348,9 +1429,11 @@ xfs_btree_read_buf_block(
error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
if (error)
return error;
- error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
- mp->m_bsize, flags, bpp,
- cur->bc_ops->buf_ops);
+ error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
+ xfs_btree_bbsize(cur), flags, bpp,
+ cur->bc_ops->buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_btree_mark_sick(cur);
if (error)
return error;
@@ -1398,7 +1481,7 @@ xfs_btree_copy_ptrs(
int numptrs)
{
ASSERT(numptrs >= 0);
- memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
+ memcpy(dst_ptr, src_ptr, numptrs * cur->bc_ops->ptr_len);
}
/*
@@ -1454,8 +1537,8 @@ xfs_btree_shift_ptrs(
ASSERT(numptrs >= 0);
ASSERT(dir == 1 || dir == -1);
- dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
- memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
+ dst_ptr = (char *)ptr + (dir * cur->bc_ops->ptr_len);
+ memmove(dst_ptr, ptr, numptrs * cur->bc_ops->ptr_len);
}
/*
@@ -1566,7 +1649,7 @@ xfs_btree_log_block(
if (bp) {
int nbits;
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+ if (xfs_has_crc(cur->bc_mp)) {
/*
* We don't log the CRC when updating a btree
* block but instead recreate it during log
@@ -1581,7 +1664,7 @@ xfs_btree_log_block(
nbits = XFS_BB_NUM_BITS;
}
xfs_btree_offsets(fields,
- (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
+ (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) ?
loffsets : soffsets,
nbits, &first, &last);
xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
@@ -1658,9 +1741,10 @@ xfs_btree_increment(
* confused or have the tree root in an inode.
*/
if (lev == cur->bc_nlevels) {
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
goto out0;
ASSERT(0);
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1751,9 +1835,10 @@ xfs_btree_decrement(
* or the root of the tree is in an inode.
*/
if (lev == cur->bc_nlevels) {
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
goto out0;
ASSERT(0);
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1786,6 +1871,33 @@ error0:
return error;
}
+/*
+ * Check the btree block owner now that we have the context to know who the
+ * real owner is.
+ */
+static inline xfs_failaddr_t
+xfs_btree_check_block_owner(
+ struct xfs_btree_cur *cur,
+ struct xfs_btree_block *block)
+{
+ __u64 owner;
+
+ if (!xfs_has_crc(cur->bc_mp) ||
+ (cur->bc_flags & XFS_BTREE_BMBT_INVALID_OWNER))
+ return NULL;
+
+ owner = xfs_btree_owner(cur);
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
+ if (be64_to_cpu(block->bb_u.l.bb_owner) != owner)
+ return __this_address;
+ } else {
+ if (be32_to_cpu(block->bb_u.s.bb_owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
int
xfs_btree_lookup_get_block(
struct xfs_btree_cur *cur, /* btree cursor */
@@ -1798,8 +1910,7 @@ xfs_btree_lookup_get_block(
int error = 0;
/* special case the root block if in an inode */
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level == cur->bc_nlevels - 1)) {
+ if (xfs_btree_at_iroot(cur, level)) {
*blkp = xfs_btree_get_iroot(cur);
return 0;
}
@@ -1824,11 +1935,7 @@ xfs_btree_lookup_get_block(
return error;
/* Check the inode owner since the verifiers don't. */
- if (xfs_has_crc(cur->bc_mp) &&
- !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) &&
- (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
- be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
- cur->bc_ino.ip->i_ino)
+ if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
goto out_bad;
/* Did we get the level we were looking for? */
@@ -1846,6 +1953,7 @@ out_bad:
*blkp = NULL;
xfs_buf_mark_corrupt(bp);
xfs_trans_brelse(cur->bc_tp, bp);
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@@ -1872,6 +1980,27 @@ xfs_lookup_get_search_key(
}
/*
+ * Initialize a pointer to the root block.
+ */
+void
+xfs_btree_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
+ /*
+ * Inode-rooted btrees call xfs_btree_get_iroot to find the root
+ * in xfs_btree_lookup_get_block and don't need a pointer here.
+ */
+ ptr->l = 0;
+ } else if (cur->bc_flags & XFS_BTREE_STAGING) {
+ ptr->s = cpu_to_be32(cur->bc_ag.afake->af_root);
+ } else {
+ cur->bc_ops->init_ptr_from_cur(cur, ptr);
+ }
+}
+
+/*
* Lookup the record. The cursor is made to point to it, based on dir.
* stat is set to 0 if can't find any such record, 1 for success.
*/
@@ -1892,14 +2021,16 @@ xfs_btree_lookup(
XFS_BTREE_STATS_INC(cur, lookup);
/* No such thing as a zero-level tree. */
- if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0))
+ if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
block = NULL;
keyno = 0;
/* initialise start pointer from cursor */
- cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ xfs_btree_init_ptr_from_cur(cur, &ptr);
pp = &ptr;
/*
@@ -1936,6 +2067,7 @@ xfs_btree_lookup(
XFS_ERRLEVEL_LOW,
cur->bc_mp, block,
sizeof(*block));
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@@ -2012,8 +2144,10 @@ xfs_btree_lookup(
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto error0;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
*stat = 1;
return 0;
}
@@ -2040,7 +2174,7 @@ xfs_btree_high_key_from_key(
struct xfs_btree_cur *cur,
union xfs_btree_key *key)
{
- ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
+ ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING);
return (union xfs_btree_key *)((char *)key +
(cur->bc_ops->key_len / 2));
}
@@ -2061,7 +2195,7 @@ xfs_btree_get_leaf_keys(
rec = xfs_btree_rec_addr(cur, 1, block);
cur->bc_ops->init_key_from_rec(key, rec);
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
@@ -2088,7 +2222,7 @@ xfs_btree_get_node_keys(
union xfs_btree_key *high;
int n;
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
memcpy(key, xfs_btree_key_addr(cur, 1, block),
cur->bc_ops->key_len / 2);
@@ -2132,7 +2266,7 @@ xfs_btree_needs_key_update(
struct xfs_btree_cur *cur,
int ptr)
{
- return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
+ return (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) || ptr == 1;
}
/*
@@ -2156,7 +2290,7 @@ __xfs_btree_updkeys(
struct xfs_buf *bp;
int ptr;
- ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
+ ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING);
/* Exit if there aren't any parent levels to update. */
if (level + 1 >= cur->bc_nlevels)
@@ -2225,7 +2359,7 @@ xfs_btree_update_keys(
ASSERT(level >= 0);
block = xfs_btree_get_block(cur, level, &bp);
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)
return __xfs_btree_updkeys(cur, level, block, bp, false);
/*
@@ -2332,8 +2466,7 @@ xfs_btree_lshift(
int error; /* error return value */
int i;
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- level == cur->bc_nlevels - 1)
+ if (xfs_btree_at_iroot(cur, level))
goto out0;
/* Set up variables for this block as "right". */
@@ -2460,12 +2593,13 @@ xfs_btree_lshift(
* Using a temporary cursor, update the parent key values of the
* block on the left.
*/
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
error = xfs_btree_dup_cursor(cur, &tcur);
if (error)
goto error0;
i = xfs_btree_firstrec(tcur, level);
if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2527,8 +2661,7 @@ xfs_btree_rshift(
int error; /* error return value */
int i; /* loop counter */
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level == cur->bc_nlevels - 1))
+ if (xfs_btree_at_iroot(cur, level))
goto out0;
/* Set up variables for this block as "left". */
@@ -2636,6 +2769,7 @@ xfs_btree_rshift(
goto error0;
i = xfs_btree_lastrec(tcur, level);
if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2645,7 +2779,7 @@ xfs_btree_rshift(
goto error1;
/* Update the parent high keys of the left block, if needed. */
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
error = xfs_btree_update_keys(cur, level);
if (error)
goto error1;
@@ -2673,6 +2807,32 @@ error1:
return error;
}
+static inline int
+xfs_btree_alloc_block(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *hint_block,
+ union xfs_btree_ptr *new_block,
+ int *stat)
+{
+ int error;
+
+ /*
+ * Don't allow block allocation for a staging cursor, because staging
+ * cursors do not support regular btree modifications.
+ *
+ * Bulk loading uses a separate callback to obtain new blocks from a
+ * preallocated list, which prevents ENOSPC failures during loading.
+ */
+ if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
+ trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
+ return error;
+}
+
/*
* Split cur/level block in half.
* Return new block number and the key to its first
@@ -2716,7 +2876,7 @@ __xfs_btree_split(
xfs_btree_buf_to_ptr(cur, lbp, &lptr);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
+ error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -2823,7 +2983,7 @@ __xfs_btree_split(
}
/* Update the parent high keys of the left block, if needed. */
- if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
+ if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) {
error = xfs_btree_update_keys(cur, level);
if (error)
goto error0;
@@ -2941,7 +3101,7 @@ xfs_btree_split(
struct xfs_btree_split_args args;
DECLARE_COMPLETION_ONSTACK(done);
- if (cur->bc_btnum != XFS_BTNUM_BMAP ||
+ if (!xfs_btree_is_bmap(cur->bc_ops) ||
cur->bc_tp->t_highest_agno == NULLAGNUMBER)
return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
@@ -2963,7 +3123,6 @@ xfs_btree_split(
#define xfs_btree_split __xfs_btree_split
#endif /* __KERNEL__ */
-
/*
* Copy the old inode root contents into a real block and make the
* broot point to it.
@@ -2988,7 +3147,7 @@ xfs_btree_new_iroot(
XFS_BTREE_STATS_INC(cur, newroot);
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
level = cur->bc_nlevels - 1;
@@ -2996,7 +3155,7 @@ xfs_btree_new_iroot(
pp = xfs_btree_ptr_addr(cur, 1, block);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
+ error = xfs_btree_alloc_block(cur, pp, &nptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -3014,9 +3173,9 @@ xfs_btree_new_iroot(
* In that case have to also ensure the blkno remains correct
*/
memcpy(cblock, block, xfs_btree_block_len(cur));
- if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
+ if (xfs_has_crc(cur->bc_mp)) {
__be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
cblock->bb_u.l.bb_blkno = bno;
else
cblock->bb_u.s.bb_blkno = bno;
@@ -3069,6 +3228,21 @@ error0:
return error;
}
+static void
+xfs_btree_set_root(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ int inc)
+{
+ if (cur->bc_flags & XFS_BTREE_STAGING) {
+ /* Update the btree root information for a per-AG fake root. */
+ cur->bc_ag.afake->af_root = be32_to_cpu(ptr->s);
+ cur->bc_ag.afake->af_levels += inc;
+ } else {
+ cur->bc_ops->set_root(cur, ptr, inc);
+ }
+}
+
/*
* Allocate a new root block, fill it in.
*/
@@ -3093,10 +3267,10 @@ xfs_btree_new_root(
XFS_BTREE_STATS_INC(cur, newroot);
/* initialise our start point from the cursor */
- cur->bc_ops->init_ptr_from_cur(cur, &rptr);
+ xfs_btree_init_ptr_from_cur(cur, &rptr);
/* Allocate the new block. If we can't do it, we're toast. Give up. */
- error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
+ error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
if (error)
goto error0;
if (*stat == 0)
@@ -3109,7 +3283,7 @@ xfs_btree_new_root(
goto error0;
/* Set the root in the holding structure increasing the level by 1. */
- cur->bc_ops->set_root(cur, &lptr, 1);
+ xfs_btree_set_root(cur, &lptr, 1);
/*
* At the previous root level there are now two blocks: the old root,
@@ -3213,8 +3387,7 @@ xfs_btree_make_block_unfull(
{
int error = 0;
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- level == cur->bc_nlevels - 1) {
+ if (xfs_btree_at_iroot(cur, level)) {
struct xfs_inode *ip = cur->bc_ino.ip;
if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
@@ -3299,8 +3472,8 @@ xfs_btree_insrec(
* If we have an external root pointer, and we've made it to the
* root level, allocate a new root block and we're done.
*/
- if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- (level >= cur->bc_nlevels)) {
+ if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE &&
+ level >= cur->bc_nlevels) {
error = xfs_btree_new_root(cur, stat);
xfs_btree_set_ptr_null(cur, ptrp);
@@ -3524,6 +3697,7 @@ xfs_btree_insert(
}
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -3537,7 +3711,8 @@ xfs_btree_insert(
if (pcur != cur &&
(ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
/* Save the state from the cursor before we trash it */
- if (cur->bc_ops->update_cursor)
+ if (cur->bc_ops->update_cursor &&
+ !(cur->bc_flags & XFS_BTREE_STAGING))
cur->bc_ops->update_cursor(pcur, cur);
cur->bc_nlevels = pcur->bc_nlevels;
xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
@@ -3586,7 +3761,7 @@ xfs_btree_kill_iroot(
#endif
int i;
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
ASSERT(cur->bc_nlevels > 1);
/*
@@ -3680,7 +3855,7 @@ xfs_btree_kill_root(
* Update the root pointer, decreasing the level by 1 and then
* free the old root.
*/
- cur->bc_ops->set_root(cur, newroot, -1);
+ xfs_btree_set_root(cur, newroot, -1);
error = xfs_btree_free_block(cur, bp);
if (error)
@@ -3822,27 +3997,25 @@ xfs_btree_delrec(
* Try to get rid of the next level down. If we can't then there's
* nothing left to do.
*/
- if (level == cur->bc_nlevels - 1) {
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
- xfs_iroot_realloc(cur->bc_ino.ip, -1,
- cur->bc_ino.whichfork);
+ if (xfs_btree_at_iroot(cur, level)) {
+ xfs_iroot_realloc(cur->bc_ino.ip, -1, cur->bc_ino.whichfork);
- error = xfs_btree_kill_iroot(cur);
- if (error)
- goto error0;
+ error = xfs_btree_kill_iroot(cur);
+ if (error)
+ goto error0;
- error = xfs_btree_dec_cursor(cur, level, stat);
- if (error)
- goto error0;
- *stat = 1;
- return 0;
- }
+ error = xfs_btree_dec_cursor(cur, level, stat);
+ if (error)
+ goto error0;
+ *stat = 1;
+ return 0;
+ }
- /*
- * If this is the root level, and there's only one entry left,
- * and it's NOT the leaf level, then we can get rid of this
- * level.
- */
+ /*
+ * If this is the root level, and there's only one entry left, and it's
+ * NOT the leaf level, then we can get rid of this level.
+ */
+ if (level == cur->bc_nlevels - 1) {
if (numrecs == 1 && level > 0) {
union xfs_btree_ptr *pp;
/*
@@ -3891,7 +4064,7 @@ xfs_btree_delrec(
xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
/*
* One child of root, need to get a chance to copy its contents
* into the root and delete it. Can't go up to next level,
@@ -3931,6 +4104,7 @@ xfs_btree_delrec(
*/
i = xfs_btree_lastrec(tcur, level);
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -3939,12 +4113,14 @@ xfs_btree_delrec(
if (error)
goto error0;
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
i = xfs_btree_lastrec(tcur, level);
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -3992,6 +4168,7 @@ xfs_btree_delrec(
if (!xfs_btree_ptr_is_null(cur, &lptr)) {
i = xfs_btree_firstrec(tcur, level);
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -4000,6 +4177,7 @@ xfs_btree_delrec(
if (error)
goto error0;
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -4017,6 +4195,7 @@ xfs_btree_delrec(
*/
i = xfs_btree_firstrec(tcur, level);
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -4026,6 +4205,7 @@ xfs_btree_delrec(
goto error0;
i = xfs_btree_firstrec(tcur, level);
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -4201,8 +4381,8 @@ xfs_btree_delrec(
* If we joined with the right neighbor and there's a level above
* us, increment the cursor at that level.
*/
- else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
- (level + 1 < cur->bc_nlevels)) {
+ else if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE ||
+ level + 1 < cur->bc_nlevels) {
error = xfs_btree_increment(cur, level + 1, &i);
if (error)
goto error0;
@@ -4270,7 +4450,7 @@ xfs_btree_delete(
* If we combined blocks as part of deleting the record, delrec won't
* have updated the parent high keys so we have to do that here.
*/
- if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
+ if (joined && (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) {
error = xfs_btree_updkeys_force(cur, 0);
if (error)
goto error0;
@@ -4344,7 +4524,7 @@ xfs_btree_visit_block(
{
struct xfs_btree_block *block;
struct xfs_buf *bp;
- union xfs_btree_ptr rptr;
+ union xfs_btree_ptr rptr, bufptr;
int error;
/* do right sibling readahead */
@@ -4367,15 +4547,12 @@ xfs_btree_visit_block(
* return the same block without checking if the right sibling points
* back to us and creates a cyclic reference in the btree.
*/
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp,
- xfs_buf_daddr(bp)))
- return -EFSCORRUPTED;
- } else {
- if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp,
- xfs_buf_daddr(bp)))
- return -EFSCORRUPTED;
+ xfs_btree_buf_to_ptr(cur, bp, &bufptr);
+ if (xfs_btree_ptrs_equal(cur, &rptr, &bufptr)) {
+ xfs_btree_mark_sick(cur);
+ return -EFSCORRUPTED;
}
+
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
}
@@ -4393,7 +4570,7 @@ xfs_btree_visit_blocks(
struct xfs_btree_block *block = NULL;
int error = 0;
- cur->bc_ops->init_ptr_from_cur(cur, &lptr);
+ xfs_btree_init_ptr_from_cur(cur, &lptr);
/* for each level */
for (level = cur->bc_nlevels - 1; level >= 0; level--) {
@@ -4471,7 +4648,7 @@ xfs_btree_block_change_owner(
/* modify the owner */
block = xfs_btree_get_block(cur, level, &bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) {
if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
return 0;
block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
@@ -4489,7 +4666,7 @@ xfs_btree_block_change_owner(
* though, so everything is consistent in memory.
*/
if (!bp) {
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
ASSERT(level == cur->bc_nlevels - 1);
return 0;
}
@@ -4523,7 +4700,7 @@ xfs_btree_change_owner(
/* Verify the v5 fields of a long-format btree block. */
xfs_failaddr_t
-xfs_btree_lblock_v5hdr_verify(
+xfs_btree_fsblock_v5hdr_verify(
struct xfs_buf *bp,
uint64_t owner)
{
@@ -4544,7 +4721,7 @@ xfs_btree_lblock_v5hdr_verify(
/* Verify a long-format btree block. */
xfs_failaddr_t
-xfs_btree_lblock_verify(
+xfs_btree_fsblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
@@ -4553,28 +4730,60 @@ xfs_btree_lblock_verify(
xfs_fsblock_t fsb;
xfs_failaddr_t fa;
+ ASSERT(!xfs_buftarg_is_mem(bp->b_target));
+
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > max_recs)
return __this_address;
/* sibling pointer verification */
fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
- fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
block->bb_u.l.bb_leftsib);
if (!fa)
- fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
+ fa = xfs_btree_check_fsblock_siblings(mp, fsb,
block->bb_u.l.bb_rightsib);
return fa;
}
+/* Verify an in-memory btree block. */
+xfs_failaddr_t
+xfs_btree_memblock_verify(
+ struct xfs_buf *bp,
+ unsigned int max_recs)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_buftarg *btp = bp->b_target;
+ xfs_failaddr_t fa;
+ xfbno_t bno;
+
+ ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+ return __this_address;
+
+ /* sibling pointer verification */
+ bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp));
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_leftsib);
+ if (fa)
+ return fa;
+ fa = xfs_btree_check_memblock_siblings(btp, bno,
+ block->bb_u.l.bb_rightsib);
+ if (fa)
+ return fa;
+
+ return NULL;
+}
/**
- * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
+ * xfs_btree_agblock_v5hdr_verify() -- verify the v5 fields of a short-format
* btree block
*
* @bp: buffer containing the btree block
*/
xfs_failaddr_t
-xfs_btree_sblock_v5hdr_verify(
+xfs_btree_agblock_v5hdr_verify(
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_mount;
@@ -4593,13 +4802,13 @@ xfs_btree_sblock_v5hdr_verify(
}
/**
- * xfs_btree_sblock_verify() -- verify a short-format btree block
+ * xfs_btree_agblock_verify() -- verify a short-format btree block
*
* @bp: buffer containing the btree block
* @max_recs: maximum records allowed in this btree node
*/
xfs_failaddr_t
-xfs_btree_sblock_verify(
+xfs_btree_agblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
@@ -4608,16 +4817,18 @@ xfs_btree_sblock_verify(
xfs_agblock_t agbno;
xfs_failaddr_t fa;
+ ASSERT(!xfs_buftarg_is_mem(bp->b_target));
+
/* numrecs verification */
if (be16_to_cpu(block->bb_numrecs) > max_recs)
return __this_address;
/* sibling pointer verification */
agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
- fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
+ fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno,
block->bb_u.s.bb_leftsib);
if (!fa)
- fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
+ fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno,
block->bb_u.s.bb_rightsib);
return fa;
}
@@ -4815,7 +5026,7 @@ xfs_btree_overlapped_query_range(
/* Load the root of the btree. */
level = cur->bc_nlevels - 1;
- cur->bc_ops->init_ptr_from_cur(cur, &ptr);
+ xfs_btree_init_ptr_from_cur(cur, &ptr);
error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
if (error)
return error;
@@ -4966,7 +5177,7 @@ xfs_btree_query_range(
if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
return -EINVAL;
- if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
return xfs_btree_simple_query_range(cur, &low_key,
&high_key, fn, priv);
return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
@@ -5020,7 +5231,7 @@ xfs_btree_diff_two_ptrs(
const union xfs_btree_ptr *a,
const union xfs_btree_ptr *b)
{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
}
@@ -5074,7 +5285,7 @@ xfs_btree_has_records_helper(
key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
&rec_key, info->key_mask);
if (key_contig == XBTREE_KEY_OVERLAP &&
- !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
+ !(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING))
return -EFSCORRUPTED;
if (key_contig == XBTREE_KEY_GAP)
return -ECANCELED;
@@ -5168,7 +5379,7 @@ xfs_btree_has_more_records(
return true;
/* There are more record blocks. */
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
else
return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
@@ -5233,6 +5444,7 @@ xfs_btree_goto_left_edge(
return error;
if (stat != 0) {
ASSERT(0);
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index d906324e25c8..f93374278aa1 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -55,15 +55,8 @@ union xfs_btree_rec {
#define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi)
#define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi)
-#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi)
-#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
-#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
-#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
-#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
-#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi)
-#define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi)
-
-uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
+struct xfs_btree_ops;
+uint32_t xfs_btree_magic(struct xfs_mount *mp, const struct xfs_btree_ops *ops);
/*
* For logging record fields.
@@ -86,9 +79,11 @@ uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum);
* Generic stats interface
*/
#define XFS_BTREE_STATS_INC(cur, stat) \
- XFS_STATS_INC_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat)
+ XFS_STATS_INC_OFF((cur)->bc_mp, \
+ (cur)->bc_ops->statoff + __XBTS_ ## stat)
#define XFS_BTREE_STATS_ADD(cur, stat, val) \
- XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val)
+ XFS_STATS_ADD_OFF((cur)->bc_mp, \
+ (cur)->bc_ops->statoff + __XBTS_ ## stat, val)
enum xbtree_key_contig {
XBTREE_KEY_GAP = 0,
@@ -111,10 +106,37 @@ static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y)
return XBTREE_KEY_OVERLAP;
}
+#define XFS_BTREE_LONG_PTR_LEN (sizeof(__be64))
+#define XFS_BTREE_SHORT_PTR_LEN (sizeof(__be32))
+
+enum xfs_btree_type {
+ XFS_BTREE_TYPE_AG,
+ XFS_BTREE_TYPE_INODE,
+ XFS_BTREE_TYPE_MEM,
+};
+
struct xfs_btree_ops {
- /* size of the key and record structures */
- size_t key_len;
- size_t rec_len;
+ const char *name;
+
+ /* Type of btree - AG-rooted or inode-rooted */
+ enum xfs_btree_type type;
+
+ /* XFS_BTGEO_* flags that determine the geometry of the btree */
+ unsigned int geom_flags;
+
+ /* size of the key, pointer, and record structures */
+ size_t key_len;
+ size_t ptr_len;
+ size_t rec_len;
+
+ /* LRU refcount to set on each btree buffer created */
+ unsigned int lru_refs;
+
+ /* offset of btree stats array */
+ unsigned int statoff;
+
+ /* sick mask for health reporting (only for XFS_BTREE_TYPE_AG) */
+ unsigned int sick_mask;
/* cursor operations */
struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *);
@@ -199,6 +221,10 @@ struct xfs_btree_ops {
const union xfs_btree_key *mask);
};
+/* btree geometry flags */
+#define XFS_BTGEO_LASTREC_UPDATE (1U << 0) /* track last rec externally */
+#define XFS_BTGEO_OVERLAPPING (1U << 1) /* overlapping intervals */
+
/*
* Reasons for the update_lastrec method to be called.
*/
@@ -215,39 +241,6 @@ union xfs_btree_irec {
struct xfs_refcount_irec rc;
};
-/* Per-AG btree information. */
-struct xfs_btree_cur_ag {
- struct xfs_perag *pag;
- union {
- struct xfs_buf *agbp;
- struct xbtree_afakeroot *afake; /* for staging cursor */
- };
- union {
- struct {
- unsigned int nr_ops; /* # record updates */
- unsigned int shape_changes; /* # of extent splits */
- } refc;
- struct {
- bool active; /* allocation cursor state */
- } abt;
- };
-};
-
-/* Btree-in-inode cursor information */
-struct xfs_btree_cur_ino {
- struct xfs_inode *ip;
- struct xbtree_ifakeroot *ifake; /* for staging cursor */
- int allocated;
- short forksize;
- char whichfork;
- char flags;
-/* We are converting a delalloc reservation */
-#define XFS_BTCUR_BMBT_WASDEL (1 << 0)
-
-/* For extent swap, ignore owner check in verifier */
-#define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1)
-};
-
struct xfs_btree_level {
/* buffer pointer */
struct xfs_buf *bp;
@@ -272,21 +265,38 @@ struct xfs_btree_cur
const struct xfs_btree_ops *bc_ops;
struct kmem_cache *bc_cache; /* cursor cache */
unsigned int bc_flags; /* btree features - below */
- xfs_btnum_t bc_btnum; /* identifies which btree type */
union xfs_btree_irec bc_rec; /* current insert/search record value */
uint8_t bc_nlevels; /* number of levels in the tree */
uint8_t bc_maxlevels; /* maximum levels for this btree type */
- int bc_statoff; /* offset of btree stats array */
- /*
- * Short btree pointers need an agno to be able to turn the pointers
- * into physical addresses for IO, so the btree cursor switches between
- * bc_ino and bc_ag based on whether XFS_BTREE_LONG_PTRS is set for the
- * cursor.
- */
+ /* per-type information */
union {
- struct xfs_btree_cur_ag bc_ag;
- struct xfs_btree_cur_ino bc_ino;
+ struct {
+ struct xfs_inode *ip;
+ short forksize;
+ char whichfork;
+ struct xbtree_ifakeroot *ifake; /* for staging cursor */
+ } bc_ino;
+ struct {
+ struct xfs_perag *pag;
+ struct xfs_buf *agbp;
+ struct xbtree_afakeroot *afake; /* for staging cursor */
+ } bc_ag;
+ struct {
+ struct xfbtree *xfbtree;
+ struct xfs_perag *pag;
+ } bc_mem;
+ };
+
+ /* per-format private data */
+ union {
+ struct {
+ int allocated;
+ } bc_bmap; /* bmapbt */
+ struct {
+ unsigned int nr_ops; /* # record updates */
+ unsigned int shape_changes; /* # of extent splits */
+ } bc_refc; /* refcountbt */
};
/* Must be at the end of the struct! */
@@ -304,18 +314,22 @@ xfs_btree_cur_sizeof(unsigned int nlevels)
return struct_size_t(struct xfs_btree_cur, bc_levels, nlevels);
}
-/* cursor flags */
-#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */
-#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */
-#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */
-#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */
-#define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */
+/* cursor state flags */
/*
* The root of this btree is a fakeroot structure so that we can stage a btree
* rebuild without leaving it accessible via primary metadata. The ops struct
* is dynamically allocated and must be freed when the cursor is deleted.
*/
-#define XFS_BTREE_STAGING (1<<5)
+#define XFS_BTREE_STAGING (1U << 0)
+
+/* We are converting a delalloc reservation (only for bmbt btrees) */
+#define XFS_BTREE_BMBT_WASDEL (1U << 1)
+
+/* For extent swap, ignore owner check in verifier (only for bmbt btrees) */
+#define XFS_BTREE_BMBT_INVALID_OWNER (1U << 2)
+
+/* Cursor is active (only for allocbt btrees) */
+#define XFS_BTREE_ALLOCBT_ACTIVE (1U << 3)
#define XFS_BTREE_NOERROR 0
#define XFS_BTREE_ERROR 1
@@ -325,14 +339,10 @@ xfs_btree_cur_sizeof(unsigned int nlevels)
*/
#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr))
-/*
- * Internal long and short btree block checks. They return NULL if the
- * block is ok or the address of the failed check otherwise.
- */
-xfs_failaddr_t __xfs_btree_check_lblock(struct xfs_btree_cur *cur,
- struct xfs_btree_block *block, int level, struct xfs_buf *bp);
-xfs_failaddr_t __xfs_btree_check_sblock(struct xfs_btree_cur *cur,
+xfs_failaddr_t __xfs_btree_check_block(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, int level, struct xfs_buf *bp);
+int __xfs_btree_check_ptr(struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr, int index, int level);
/*
* Check that block header is ok.
@@ -345,24 +355,6 @@ xfs_btree_check_block(
struct xfs_buf *bp); /* buffer containing block, if any */
/*
- * Check that (long) pointer is ok.
- */
-bool /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_lptr(
- struct xfs_btree_cur *cur, /* btree cursor */
- xfs_fsblock_t fsbno, /* btree block disk address */
- int level); /* btree block level */
-
-/*
- * Check that (short) pointer is ok.
- */
-bool /* error (0 or EFSCORRUPTED) */
-xfs_btree_check_sptr(
- struct xfs_btree_cur *cur, /* btree cursor */
- xfs_agblock_t agbno, /* btree block disk address */
- int level); /* btree block level */
-
-/*
* Delete the btree cursor.
*/
void
@@ -392,63 +384,14 @@ xfs_btree_offsets(
int *last); /* output: last byte offset */
/*
- * Get a buffer for the block, return it read in.
- * Long-form addressing.
- */
-int /* error */
-xfs_btree_read_bufl(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- struct xfs_buf **bpp, /* buffer for fsbno */
- int refval, /* ref count value for buffer */
- const struct xfs_buf_ops *ops);
-
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Long-form addressing.
- */
-void /* error */
-xfs_btree_reada_bufl(
- struct xfs_mount *mp, /* file system mount point */
- xfs_fsblock_t fsbno, /* file system block number */
- xfs_extlen_t count, /* count of filesystem blocks */
- const struct xfs_buf_ops *ops);
-
-/*
- * Read-ahead the block, don't wait for it, don't return a buffer.
- * Short-form addressing.
- */
-void /* error */
-xfs_btree_reada_bufs(
- struct xfs_mount *mp, /* file system mount point */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- xfs_extlen_t count, /* count of filesystem blocks */
- const struct xfs_buf_ops *ops);
-
-/*
* Initialise a new btree block header
*/
-void
-xfs_btree_init_block(
- struct xfs_mount *mp,
- struct xfs_buf *bp,
- xfs_btnum_t btnum,
- __u16 level,
- __u16 numrecs,
- __u64 owner);
-
-void
-xfs_btree_init_block_int(
- struct xfs_mount *mp,
- struct xfs_btree_block *buf,
- xfs_daddr_t blkno,
- xfs_btnum_t btnum,
- __u16 level,
- __u16 numrecs,
- __u64 owner,
- unsigned int flags);
+void xfs_btree_init_buf(struct xfs_mount *mp, struct xfs_buf *bp,
+ const struct xfs_btree_ops *ops, __u16 level, __u16 numrecs,
+ __u64 owner);
+void xfs_btree_init_block(struct xfs_mount *mp,
+ struct xfs_btree_block *buf, const struct xfs_btree_ops *ops,
+ __u16 level, __u16 numrecs, __u64 owner);
/*
* Common btree core entry points.
@@ -467,10 +410,10 @@ int xfs_btree_change_owner(struct xfs_btree_cur *cur, uint64_t new_owner,
/*
* btree block CRC helpers
*/
-void xfs_btree_lblock_calc_crc(struct xfs_buf *);
-bool xfs_btree_lblock_verify_crc(struct xfs_buf *);
-void xfs_btree_sblock_calc_crc(struct xfs_buf *);
-bool xfs_btree_sblock_verify_crc(struct xfs_buf *);
+void xfs_btree_fsblock_calc_crc(struct xfs_buf *);
+bool xfs_btree_fsblock_verify_crc(struct xfs_buf *);
+void xfs_btree_agblock_calc_crc(struct xfs_buf *);
+bool xfs_btree_agblock_verify_crc(struct xfs_buf *);
/*
* Internal btree helpers also used by xfs_bmap.c.
@@ -510,12 +453,14 @@ static inline int xfs_btree_get_level(const struct xfs_btree_block *block)
#define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b))
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))
-xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
-xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp,
+xfs_failaddr_t xfs_btree_agblock_v5hdr_verify(struct xfs_buf *bp);
+xfs_failaddr_t xfs_btree_agblock_verify(struct xfs_buf *bp,
unsigned int max_recs);
-xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp,
+xfs_failaddr_t xfs_btree_fsblock_v5hdr_verify(struct xfs_buf *bp,
uint64_t owner);
-xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
+xfs_failaddr_t xfs_btree_fsblock_verify(struct xfs_buf *bp,
+ unsigned int max_recs);
+xfs_failaddr_t xfs_btree_memblock_verify(struct xfs_buf *bp,
unsigned int max_recs);
unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits,
@@ -690,7 +635,7 @@ xfs_btree_islastblock(
block = xfs_btree_get_block(cur, level, &bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
}
@@ -714,21 +659,28 @@ void xfs_btree_copy_ptrs(struct xfs_btree_cur *cur,
void xfs_btree_copy_keys(struct xfs_btree_cur *cur,
union xfs_btree_key *dst_key,
const union xfs_btree_key *src_key, int numkeys);
+void xfs_btree_init_ptr_from_cur(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr);
static inline struct xfs_btree_cur *
xfs_btree_alloc_cursor(
struct xfs_mount *mp,
struct xfs_trans *tp,
- xfs_btnum_t btnum,
+ const struct xfs_btree_ops *ops,
uint8_t maxlevels,
struct kmem_cache *cache)
{
struct xfs_btree_cur *cur;
- cur = kmem_cache_zalloc(cache, GFP_NOFS | __GFP_NOFAIL);
+ ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN ||
+ ops->ptr_len == XFS_BTREE_SHORT_PTR_LEN);
+
+ /* BMBT allocations can come through from non-transactional context. */
+ cur = kmem_cache_zalloc(cache,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
+ cur->bc_ops = ops;
cur->bc_tp = tp;
cur->bc_mp = mp;
- cur->bc_btnum = btnum;
cur->bc_maxlevels = maxlevels;
cur->bc_cache = cache;
@@ -740,4 +692,14 @@ void xfs_btree_destroy_cur_caches(void);
int xfs_btree_goto_left_edge(struct xfs_btree_cur *cur);
+/* Does this level of the cursor point to the inode root (and not a block)? */
+static inline bool
+xfs_btree_at_iroot(
+ const struct xfs_btree_cur *cur,
+ int level)
+{
+ return cur->bc_ops->type == XFS_BTREE_TYPE_INODE &&
+ level == cur->bc_nlevels - 1;
+}
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c
new file mode 100644
index 000000000000..036061fe32cc
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_btree_mem.c
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_trans.h"
+#include "xfs_btree.h"
+#include "xfs_error.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
+#include "xfs_ag.h"
+#include "xfs_buf_item.h"
+#include "xfs_trace.h"
+
+/* Set the root of an in-memory btree. */
+void
+xfbtree_set_root(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr,
+ int inc)
+{
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+ cur->bc_mem.xfbtree->root = *ptr;
+ cur->bc_mem.xfbtree->nlevels += inc;
+}
+
+/* Initialize a pointer from the in-memory btree header. */
+void
+xfbtree_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+ *ptr = cur->bc_mem.xfbtree->root;
+}
+
+/* Duplicate an in-memory btree cursor. */
+struct xfs_btree_cur *
+xfbtree_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ struct xfs_btree_cur *ncur;
+
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+ ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ops,
+ cur->bc_maxlevels, cur->bc_cache);
+ ncur->bc_flags = cur->bc_flags;
+ ncur->bc_nlevels = cur->bc_nlevels;
+ ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree;
+
+ if (cur->bc_mem.pag)
+ ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag);
+
+ return ncur;
+}
+
+/* Close the btree xfile and release all resources. */
+void
+xfbtree_destroy(
+ struct xfbtree *xfbt)
+{
+ xfs_buftarg_drain(xfbt->target);
+}
+
+/* Compute the number of bytes available for records. */
+static inline unsigned int
+xfbtree_rec_bytes(
+ struct xfs_mount *mp,
+ const struct xfs_btree_ops *ops)
+{
+ return XMBUF_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
+}
+
+/* Initialize an empty leaf block as the btree root. */
+STATIC int
+xfbtree_init_leaf_block(
+ struct xfs_mount *mp,
+ struct xfbtree *xfbt,
+ const struct xfs_btree_ops *ops)
+{
+ struct xfs_buf *bp;
+ xfbno_t bno = xfbt->highest_bno++;
+ int error;
+
+ error = xfs_buf_get(xfbt->target, xfbno_to_daddr(bno), XFBNO_BBSIZE,
+ &bp);
+ if (error)
+ return error;
+
+ trace_xfbtree_create_root_buf(xfbt, bp);
+
+ bp->b_ops = ops->buf_ops;
+ xfs_btree_init_buf(mp, bp, ops, 0, 0, xfbt->owner);
+ xfs_buf_relse(bp);
+
+ xfbt->root.l = cpu_to_be64(bno);
+ return 0;
+}
+
+/*
+ * Create an in-memory btree root that can be used with the given xmbuf.
+ * Callers must set xfbt->owner.
+ */
+int
+xfbtree_init(
+ struct xfs_mount *mp,
+ struct xfbtree *xfbt,
+ struct xfs_buftarg *btp,
+ const struct xfs_btree_ops *ops)
+{
+ unsigned int blocklen = xfbtree_rec_bytes(mp, ops);
+ unsigned int keyptr_len;
+ int error;
+
+ /* Requires a long-format CRC-format btree */
+ if (!xfs_has_crc(mp)) {
+ ASSERT(xfs_has_crc(mp));
+ return -EINVAL;
+ }
+ if (ops->ptr_len != XFS_BTREE_LONG_PTR_LEN) {
+ ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN);
+ return -EINVAL;
+ }
+
+ memset(xfbt, 0, sizeof(*xfbt));
+ xfbt->target = btp;
+
+ /* Set up min/maxrecs for this btree. */
+ keyptr_len = ops->key_len + sizeof(__be64);
+ xfbt->maxrecs[0] = blocklen / ops->rec_len;
+ xfbt->maxrecs[1] = blocklen / keyptr_len;
+ xfbt->minrecs[0] = xfbt->maxrecs[0] / 2;
+ xfbt->minrecs[1] = xfbt->maxrecs[1] / 2;
+ xfbt->highest_bno = 0;
+ xfbt->nlevels = 1;
+
+ /* Initialize the empty btree. */
+ error = xfbtree_init_leaf_block(mp, xfbt, ops);
+ if (error)
+ goto err_freesp;
+
+ trace_xfbtree_init(mp, xfbt, ops);
+
+ return 0;
+
+err_freesp:
+ xfs_buftarg_drain(xfbt->target);
+ return error;
+}
+
+/* Allocate a block to our in-memory btree. */
+int
+xfbtree_alloc_block(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ struct xfbtree *xfbt = cur->bc_mem.xfbtree;
+ xfbno_t bno = xfbt->highest_bno++;
+
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+ trace_xfbtree_alloc_block(xfbt, cur, bno);
+
+ /* Fail if the block address exceeds the maximum for the buftarg. */
+ if (!xfbtree_verify_bno(xfbt, bno)) {
+ ASSERT(xfbtree_verify_bno(xfbt, bno));
+ *stat = 0;
+ return 0;
+ }
+
+ new->l = cpu_to_be64(bno);
+ *stat = 1;
+ return 0;
+}
+
+/* Free a block from our in-memory btree. */
+int
+xfbtree_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfbtree *xfbt = cur->bc_mem.xfbtree;
+ xfs_daddr_t daddr = xfs_buf_daddr(bp);
+ xfbno_t bno = xfs_daddr_to_xfbno(daddr);
+
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
+
+ trace_xfbtree_free_block(xfbt, cur, bno);
+
+ if (bno + 1 == xfbt->highest_bno)
+ xfbt->highest_bno--;
+
+ return 0;
+}
+
+/* Return the minimum number of records for a btree block. */
+int
+xfbtree_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ struct xfbtree *xfbt = cur->bc_mem.xfbtree;
+
+ return xfbt->minrecs[level != 0];
+}
+
+/* Return the maximum number of records for a btree block. */
+int
+xfbtree_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ struct xfbtree *xfbt = cur->bc_mem.xfbtree;
+
+ return xfbt->maxrecs[level != 0];
+}
+
+/* If this log item is a buffer item that came from the xfbtree, return it. */
+static inline struct xfs_buf *
+xfbtree_buf_match(
+ struct xfbtree *xfbt,
+ const struct xfs_log_item *lip)
+{
+ const struct xfs_buf_log_item *bli;
+ struct xfs_buf *bp;
+
+ if (lip->li_type != XFS_LI_BUF)
+ return NULL;
+
+ bli = container_of(lip, struct xfs_buf_log_item, bli_item);
+ bp = bli->bli_buf;
+ if (bp->b_target != xfbt->target)
+ return NULL;
+
+ return bp;
+}
+
+/*
+ * Commit changes to the incore btree immediately by writing all dirty xfbtree
+ * buffers to the backing xfile. This detaches all xfbtree buffers from the
+ * transaction, even on failure. The buffer locks are dropped between the
+ * delwri queue and submit, so the caller must synchronize btree access.
+ *
+ * Normally we'd let the buffers commit with the transaction and get written to
+ * the xfile via the log, but online repair stages ephemeral btrees in memory
+ * and uses the btree_staging functions to write new btrees to disk atomically.
+ * The in-memory btree (and its backing store) are discarded at the end of the
+ * repair phase, which means that xfbtree buffers cannot commit with the rest
+ * of a transaction.
+ *
+ * In other words, online repair only needs the transaction to collect buffer
+ * pointers and to avoid buffer deadlocks, not to guarantee consistency of
+ * updates.
+ */
+int
+xfbtree_trans_commit(
+ struct xfbtree *xfbt,
+ struct xfs_trans *tp)
+{
+ struct xfs_log_item *lip, *n;
+ bool tp_dirty = false;
+ int error = 0;
+
+ /*
+ * For each xfbtree buffer attached to the transaction, write the dirty
+ * buffers to the xfile and release them.
+ */
+ list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
+ struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip);
+
+ if (!bp) {
+ if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
+ tp_dirty |= true;
+ continue;
+ }
+
+ trace_xfbtree_trans_commit_buf(xfbt, bp);
+
+ xmbuf_trans_bdetach(tp, bp);
+
+ /*
+ * If the buffer fails verification, note the failure but
+ * continue walking the transaction items so that we remove all
+ * ephemeral btree buffers.
+ */
+ if (!error)
+ error = xmbuf_finalize(bp);
+
+ xfs_buf_relse(bp);
+ }
+
+ /*
+ * Reset the transaction's dirty flag to reflect the dirty state of the
+ * log items that are still attached.
+ */
+ tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
+ (tp_dirty ? XFS_TRANS_DIRTY : 0);
+
+ return error;
+}
+
+/*
+ * Cancel changes to the incore btree by detaching all the xfbtree buffers.
+ * Changes are not undone, so callers must not access the btree ever again.
+ */
+void
+xfbtree_trans_cancel(
+ struct xfbtree *xfbt,
+ struct xfs_trans *tp)
+{
+ struct xfs_log_item *lip, *n;
+ bool tp_dirty = false;
+
+ list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
+ struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip);
+
+ if (!bp) {
+ if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
+ tp_dirty |= true;
+ continue;
+ }
+
+ trace_xfbtree_trans_cancel_buf(xfbt, bp);
+
+ xmbuf_trans_bdetach(tp, bp);
+ xfs_buf_relse(bp);
+ }
+
+ /*
+ * Reset the transaction's dirty flag to reflect the dirty state of the
+ * log items that are still attached.
+ */
+ tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
+ (tp_dirty ? XFS_TRANS_DIRTY : 0);
+}
diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h
new file mode 100644
index 000000000000..1c3825786ec8
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_btree_mem.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_BTREE_MEM_H__
+#define __XFS_BTREE_MEM_H__
+
+typedef uint64_t xfbno_t;
+
+#define XFBNO_BLOCKSIZE (XMBUF_BLOCKSIZE)
+#define XFBNO_BBSHIFT (XMBUF_BLOCKSHIFT - BBSHIFT)
+#define XFBNO_BBSIZE (XFBNO_BLOCKSIZE >> BBSHIFT)
+
+static inline xfs_daddr_t xfbno_to_daddr(xfbno_t blkno)
+{
+ return blkno << XFBNO_BBSHIFT;
+}
+
+static inline xfbno_t xfs_daddr_to_xfbno(xfs_daddr_t daddr)
+{
+ return daddr >> XFBNO_BBSHIFT;
+}
+
+struct xfbtree {
+ /* buffer cache target for this in-memory btree */
+ struct xfs_buftarg *target;
+
+ /* Highest block number that has been written to. */
+ xfbno_t highest_bno;
+
+ /* Owner of this btree. */
+ unsigned long long owner;
+
+ /* Btree header */
+ union xfs_btree_ptr root;
+ unsigned int nlevels;
+
+ /* Minimum and maximum records per block. */
+ unsigned int maxrecs[2];
+ unsigned int minrecs[2];
+};
+
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+static inline bool xfbtree_verify_bno(struct xfbtree *xfbt, xfbno_t bno)
+{
+ return xmbuf_verify_daddr(xfbt->target, xfbno_to_daddr(bno));
+}
+
+void xfbtree_set_root(struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *ptr, int inc);
+void xfbtree_init_ptr_from_cur(struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr);
+struct xfs_btree_cur *xfbtree_dup_cursor(struct xfs_btree_cur *cur);
+
+int xfbtree_get_minrecs(struct xfs_btree_cur *cur, int level);
+int xfbtree_get_maxrecs(struct xfs_btree_cur *cur, int level);
+
+int xfbtree_alloc_block(struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *start, union xfs_btree_ptr *ptr,
+ int *stat);
+int xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp);
+
+/* Callers must set xfbt->target and xfbt->owner before calling this */
+int xfbtree_init(struct xfs_mount *mp, struct xfbtree *xfbt,
+ struct xfs_buftarg *btp, const struct xfs_btree_ops *ops);
+void xfbtree_destroy(struct xfbtree *xfbt);
+
+int xfbtree_trans_commit(struct xfbtree *xfbt, struct xfs_trans *tp);
+void xfbtree_trans_cancel(struct xfbtree *xfbt, struct xfs_trans *tp);
+#else
+# define xfbtree_verify_bno(...) (false)
+#endif /* CONFIG_XFS_BTREE_IN_MEM */
+
+#endif /* __XFS_BTREE_MEM_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index e276eba87cb1..694929703152 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -39,63 +39,6 @@
*/
/*
- * Don't allow staging cursors to be duplicated because they're supposed to be
- * kept private to a single thread.
- */
-STATIC struct xfs_btree_cur *
-xfs_btree_fakeroot_dup_cursor(
- struct xfs_btree_cur *cur)
-{
- ASSERT(0);
- return NULL;
-}
-
-/*
- * Don't allow block allocation for a staging cursor, because staging cursors
- * do not support regular btree modifications.
- *
- * Bulk loading uses a separate callback to obtain new blocks from a
- * preallocated list, which prevents ENOSPC failures during loading.
- */
-STATIC int
-xfs_btree_fakeroot_alloc_block(
- struct xfs_btree_cur *cur,
- const union xfs_btree_ptr *start_bno,
- union xfs_btree_ptr *new_bno,
- int *stat)
-{
- ASSERT(0);
- return -EFSCORRUPTED;
-}
-
-/*
- * Don't allow block freeing for a staging cursor, because staging cursors
- * do not support regular btree modifications.
- */
-STATIC int
-xfs_btree_fakeroot_free_block(
- struct xfs_btree_cur *cur,
- struct xfs_buf *bp)
-{
- ASSERT(0);
- return -EFSCORRUPTED;
-}
-
-/* Initialize a pointer to the root block from the fakeroot. */
-STATIC void
-xfs_btree_fakeroot_init_ptr_from_cur(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr *ptr)
-{
- struct xbtree_afakeroot *afake;
-
- ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
-
- afake = cur->bc_ag.afake;
- ptr->s = cpu_to_be32(afake->af_root);
-}
-
-/*
* Bulk Loading for AG Btrees
* ==========================
*
@@ -109,47 +52,20 @@ xfs_btree_fakeroot_init_ptr_from_cur(
* cursor into a regular btree cursor.
*/
-/* Update the btree root information for a per-AG fake root. */
-STATIC void
-xfs_btree_afakeroot_set_root(
- struct xfs_btree_cur *cur,
- const union xfs_btree_ptr *ptr,
- int inc)
-{
- struct xbtree_afakeroot *afake = cur->bc_ag.afake;
-
- ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
- afake->af_root = be32_to_cpu(ptr->s);
- afake->af_levels += inc;
-}
-
/*
* Initialize a AG-rooted btree cursor with the given AG btree fake root.
- * The btree cursor's bc_ops will be overridden as needed to make the staging
- * functionality work.
*/
void
xfs_btree_stage_afakeroot(
struct xfs_btree_cur *cur,
struct xbtree_afakeroot *afake)
{
- struct xfs_btree_ops *nops;
-
ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING));
- ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE));
+ ASSERT(cur->bc_ops->type != XFS_BTREE_TYPE_INODE);
ASSERT(cur->bc_tp == NULL);
- nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS);
- memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
- nops->alloc_block = xfs_btree_fakeroot_alloc_block;
- nops->free_block = xfs_btree_fakeroot_free_block;
- nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur;
- nops->set_root = xfs_btree_afakeroot_set_root;
- nops->dup_cursor = xfs_btree_fakeroot_dup_cursor;
-
cur->bc_ag.afake = afake;
cur->bc_nlevels = afake->af_levels;
- cur->bc_ops = nops;
cur->bc_flags |= XFS_BTREE_STAGING;
}
@@ -163,17 +79,15 @@ void
xfs_btree_commit_afakeroot(
struct xfs_btree_cur *cur,
struct xfs_trans *tp,
- struct xfs_buf *agbp,
- const struct xfs_btree_ops *ops)
+ struct xfs_buf *agbp)
{
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
ASSERT(cur->bc_tp == NULL);
trace_xfs_btree_commit_afakeroot(cur);
- kmem_free((void *)cur->bc_ops);
+ cur->bc_ag.afake = NULL;
cur->bc_ag.agbp = agbp;
- cur->bc_ops = ops;
cur->bc_flags &= ~XFS_BTREE_STAGING;
cur->bc_tp = tp;
}
@@ -211,29 +125,16 @@ xfs_btree_commit_afakeroot(
void
xfs_btree_stage_ifakeroot(
struct xfs_btree_cur *cur,
- struct xbtree_ifakeroot *ifake,
- struct xfs_btree_ops **new_ops)
+ struct xbtree_ifakeroot *ifake)
{
- struct xfs_btree_ops *nops;
-
ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING));
- ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+ ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE);
ASSERT(cur->bc_tp == NULL);
- nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS);
- memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
- nops->alloc_block = xfs_btree_fakeroot_alloc_block;
- nops->free_block = xfs_btree_fakeroot_free_block;
- nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur;
- nops->dup_cursor = xfs_btree_fakeroot_dup_cursor;
-
cur->bc_ino.ifake = ifake;
cur->bc_nlevels = ifake->if_levels;
- cur->bc_ops = nops;
+ cur->bc_ino.forksize = ifake->if_fork_size;
cur->bc_flags |= XFS_BTREE_STAGING;
-
- if (new_ops)
- *new_ops = nops;
}
/*
@@ -246,18 +147,15 @@ void
xfs_btree_commit_ifakeroot(
struct xfs_btree_cur *cur,
struct xfs_trans *tp,
- int whichfork,
- const struct xfs_btree_ops *ops)
+ int whichfork)
{
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
ASSERT(cur->bc_tp == NULL);
trace_xfs_btree_commit_ifakeroot(cur);
- kmem_free((void *)cur->bc_ops);
cur->bc_ino.ifake = NULL;
cur->bc_ino.whichfork = whichfork;
- cur->bc_ops = ops;
cur->bc_flags &= ~XFS_BTREE_STAGING;
cur->bc_tp = tp;
}
@@ -397,8 +295,7 @@ xfs_btree_bload_prep_block(
struct xfs_btree_block *new_block;
int ret;
- if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
- level == cur->bc_nlevels - 1) {
+ if (xfs_btree_at_iroot(cur, level)) {
struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
size_t new_size;
@@ -406,14 +303,12 @@ xfs_btree_bload_prep_block(
/* Allocate a new incore btree root block. */
new_size = bbl->iroot_size(cur, level, nr_this_block, priv);
- ifp->if_broot = kmem_zalloc(new_size, 0);
+ ifp->if_broot = kzalloc(new_size, GFP_KERNEL | __GFP_NOFAIL);
ifp->if_broot_bytes = (int)new_size;
/* Initialize it and send it out. */
- xfs_btree_init_block_int(cur->bc_mp, ifp->if_broot,
- XFS_BUF_DADDR_NULL, cur->bc_btnum, level,
- nr_this_block, cur->bc_ino.ip->i_ino,
- cur->bc_flags);
+ xfs_btree_init_block(cur->bc_mp, ifp->if_broot, cur->bc_ops,
+ level, nr_this_block, cur->bc_ino.ip->i_ino);
*bpp = NULL;
*blockp = ifp->if_broot;
@@ -704,7 +599,7 @@ xfs_btree_bload_compute_geometry(
xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
&avg_per_block, &level_blocks, &dontcare64);
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
/*
* If all the items we want to store at this level
* would fit in the inode root block, then we have our
@@ -763,7 +658,7 @@ xfs_btree_bload_compute_geometry(
return -EOVERFLOW;
bbl->btree_height = cur->bc_nlevels;
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE)
bbl->nr_blocks = nr_blocks - 1;
else
bbl->nr_blocks = nr_blocks;
@@ -890,7 +785,7 @@ xfs_btree_bload(
}
/* Initialize the new root. */
- if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
+ if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) {
ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
cur->bc_ino.ifake->if_levels = cur->bc_nlevels;
cur->bc_ino.ifake->if_blocks = total_blocks - 1;
diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h
index 055ea43b1e18..0c9c2ffb127a 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.h
+++ b/fs/xfs/libxfs/xfs_btree_staging.h
@@ -22,7 +22,7 @@ struct xbtree_afakeroot {
void xfs_btree_stage_afakeroot(struct xfs_btree_cur *cur,
struct xbtree_afakeroot *afake);
void xfs_btree_commit_afakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
- struct xfs_buf *agbp, const struct xfs_btree_ops *ops);
+ struct xfs_buf *agbp);
/* Fake root for an inode-rooted btree. */
struct xbtree_ifakeroot {
@@ -41,10 +41,9 @@ struct xbtree_ifakeroot {
/* Cursor interactions with fake roots for inode-rooted btrees. */
void xfs_btree_stage_ifakeroot(struct xfs_btree_cur *cur,
- struct xbtree_ifakeroot *ifake,
- struct xfs_btree_ops **new_ops);
+ struct xbtree_ifakeroot *ifake);
void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
- int whichfork, const struct xfs_btree_ops *ops);
+ int whichfork);
/* Bulk loading of staged btrees. */
typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur,
@@ -76,8 +75,7 @@ struct xfs_btree_bload {
/*
* This function should return the size of the in-core btree root
- * block. It is only necessary for XFS_BTREE_ROOT_IN_INODE btree
- * types.
+ * block. It is only necessary for XFS_BTREE_TYPE_INODE btrees.
*/
xfs_btree_bload_iroot_size_fn iroot_size;
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 5457188bb4de..16a529a88780 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -23,6 +23,7 @@
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_errortag.h"
+#include "xfs_health.h"
/*
* xfs_da_btree.c
@@ -85,7 +86,8 @@ xfs_da_state_alloc(
{
struct xfs_da_state *state;
- state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL);
+ state = kmem_cache_zalloc(xfs_da_state_cache,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
state->args = args;
state->mp = args->dp->i_mount;
return state;
@@ -250,6 +252,51 @@ xfs_da3_node_verify(
return NULL;
}
+xfs_failaddr_t
+xfs_da3_node_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+
+ if (xfs_has_crc(mp)) {
+ struct xfs_da3_blkinfo *hdr3 = bp->b_addr;
+
+ if (hdr3->hdr.magic != cpu_to_be16(XFS_DA3_NODE_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
+xfs_failaddr_t
+xfs_da3_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_da_blkinfo *hdr = bp->b_addr;
+
+ if (!xfs_has_crc(mp))
+ return NULL;
+
+ switch (hdr->magic) {
+ case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
+ return xfs_attr3_leaf_header_check(bp, owner);
+ case cpu_to_be16(XFS_DA3_NODE_MAGIC):
+ return xfs_da3_node_header_check(bp, owner);
+ case cpu_to_be16(XFS_DIR3_LEAF1_MAGIC):
+ case cpu_to_be16(XFS_DIR3_LEAFN_MAGIC):
+ return xfs_dir3_leaf_header_check(bp, owner);
+ }
+
+ ASSERT(0);
+ return NULL;
+}
+
static void
xfs_da3_node_write_verify(
struct xfs_buf *bp)
@@ -352,6 +399,8 @@ const struct xfs_buf_ops xfs_da3_node_buf_ops = {
static int
xfs_da3_node_set_type(
struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ int whichfork,
struct xfs_buf *bp)
{
struct xfs_da_blkinfo *info = bp->b_addr;
@@ -373,6 +422,7 @@ xfs_da3_node_set_type(
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp,
info, sizeof(*info));
xfs_trans_brelse(tp, bp);
+ xfs_dirattr_mark_sick(dp, whichfork);
return -EFSCORRUPTED;
}
}
@@ -391,7 +441,7 @@ xfs_da3_node_read(
&xfs_da3_node_buf_ops);
if (error || !*bpp || !tp)
return error;
- return xfs_da3_node_set_type(tp, *bpp);
+ return xfs_da3_node_set_type(tp, dp, whichfork, *bpp);
}
int
@@ -408,6 +458,8 @@ xfs_da3_node_read_mapped(
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno,
XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0,
bpp, &xfs_da3_node_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_dirattr_mark_sick(dp, whichfork);
if (error || !*bpp)
return error;
@@ -418,7 +470,7 @@ xfs_da3_node_read_mapped(
if (!tp)
return 0;
- return xfs_da3_node_set_type(tp, *bpp);
+ return xfs_da3_node_set_type(tp, dp, whichfork, *bpp);
}
/*
@@ -479,7 +531,7 @@ xfs_da3_node_create(
memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
ichdr.magic = XFS_DA3_NODE_MAGIC;
hdr3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
+ hdr3->info.owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
} else {
ichdr.magic = XFS_DA_NODE_MAGIC;
@@ -631,6 +683,7 @@ xfs_da3_split(
if (node->hdr.info.forw) {
if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
xfs_buf_mark_corrupt(oldblk->bp);
+ xfs_da_mark_sick(state->args);
error = -EFSCORRUPTED;
goto out;
}
@@ -644,6 +697,7 @@ xfs_da3_split(
if (node->hdr.info.back) {
if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
xfs_buf_mark_corrupt(oldblk->bp);
+ xfs_da_mark_sick(state->args);
error = -EFSCORRUPTED;
goto out;
}
@@ -1190,6 +1244,7 @@ xfs_da3_root_join(
struct xfs_da3_icnode_hdr oldroothdr;
int error;
struct xfs_inode *dp = state->args->dp;
+ xfs_failaddr_t fa;
trace_xfs_da_root_join(state->args);
@@ -1216,6 +1271,13 @@ xfs_da3_root_join(
error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
/*
@@ -1250,6 +1312,7 @@ xfs_da3_node_toosmall(
struct xfs_da_blkinfo *info;
xfs_dablk_t blkno;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
struct xfs_da3_icnode_hdr nodehdr;
int count;
int forward;
@@ -1324,6 +1387,13 @@ xfs_da3_node_toosmall(
state->args->whichfork);
if (error)
return error;
+ fa = xfs_da3_node_header_check(bp, state->args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(state->args->trans, bp);
+ xfs_da_mark_sick(state->args);
+ return -EFSCORRUPTED;
+ }
node = bp->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
@@ -1582,6 +1652,7 @@ xfs_da3_node_lookup_int(
struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_da_args *args;
+ xfs_failaddr_t fa;
xfs_dablk_t blkno;
xfs_dahash_t hashval;
xfs_dahash_t btreehashval;
@@ -1620,6 +1691,12 @@ xfs_da3_node_lookup_int(
if (magic == XFS_ATTR_LEAF_MAGIC ||
magic == XFS_ATTR3_LEAF_MAGIC) {
+ fa = xfs_attr3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_ATTR_LEAF_MAGIC;
blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
break;
@@ -1627,6 +1704,12 @@ xfs_da3_node_lookup_int(
if (magic == XFS_DIR2_LEAFN_MAGIC ||
magic == XFS_DIR3_LEAFN_MAGIC) {
+ fa = xfs_dir3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_DIR2_LEAFN_MAGIC;
blk->hashval = xfs_dir2_leaf_lasthash(args->dp,
blk->bp, NULL);
@@ -1635,6 +1718,14 @@ xfs_da3_node_lookup_int(
if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
xfs_buf_mark_corrupt(blk->bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
+
+ fa = xfs_da3_node_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -1650,6 +1741,7 @@ xfs_da3_node_lookup_int(
/* Tree taller than we can handle; bail out! */
if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
xfs_buf_mark_corrupt(blk->bp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -1658,6 +1750,7 @@ xfs_da3_node_lookup_int(
expected_level = nodehdr.level - 1;
else if (expected_level != nodehdr.level) {
xfs_buf_mark_corrupt(blk->bp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
} else
expected_level--;
@@ -1709,12 +1802,16 @@ xfs_da3_node_lookup_int(
}
/* We can't point back to the root. */
- if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
+ if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) {
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
+ }
}
- if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
+ if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) {
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
+ }
/*
* A leaf block that ends in the hashval that we are interested in
@@ -1732,6 +1829,7 @@ xfs_da3_node_lookup_int(
args->blkno = blk->blkno;
} else {
ASSERT(0);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
if (((retval == -ENOENT) || (retval == -ENOATTR)) &&
@@ -1803,6 +1901,7 @@ xfs_da3_blk_link(
struct xfs_da_blkinfo *tmp_info;
struct xfs_da_args *args;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
int before = 0;
int error;
struct xfs_inode *dp = state->args->dp;
@@ -1846,6 +1945,13 @@ xfs_da3_blk_link(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1867,6 +1973,13 @@ xfs_da3_blk_link(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
@@ -1896,6 +2009,7 @@ xfs_da3_blk_unlink(
struct xfs_da_blkinfo *tmp_info;
struct xfs_da_args *args;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
int error;
/*
@@ -1926,6 +2040,13 @@ xfs_da3_blk_unlink(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1943,6 +2064,13 @@ xfs_da3_blk_unlink(
&bp, args->whichfork);
if (error)
return error;
+ fa = xfs_da3_header_check(bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(bp, fa);
+ xfs_trans_brelse(args->trans, bp);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
ASSERT(bp != NULL);
tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
@@ -1979,6 +2107,7 @@ xfs_da3_path_shift(
struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_buf *bp;
+ xfs_failaddr_t fa;
xfs_dablk_t blkno = 0;
int level;
int error;
@@ -2057,6 +2186,12 @@ xfs_da3_path_shift(
switch (be16_to_cpu(info->magic)) {
case XFS_DA_NODE_MAGIC:
case XFS_DA3_NODE_MAGIC:
+ fa = xfs_da3_node_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_DA_NODE_MAGIC;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
bp->b_addr);
@@ -2070,6 +2205,12 @@ xfs_da3_path_shift(
break;
case XFS_ATTR_LEAF_MAGIC:
case XFS_ATTR3_LEAF_MAGIC:
+ fa = xfs_attr3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_ATTR_LEAF_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
@@ -2077,6 +2218,12 @@ xfs_da3_path_shift(
break;
case XFS_DIR2_LEAFN_MAGIC:
case XFS_DIR3_LEAFN_MAGIC:
+ fa = xfs_dir3_leaf_header_check(blk->bp, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(blk->bp, fa);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
blk->magic = XFS_DIR2_LEAFN_MAGIC;
ASSERT(level == path->active-1);
blk->index = 0;
@@ -2150,8 +2297,8 @@ xfs_da_grow_inode_int(
struct xfs_inode *dp = args->dp;
int w = args->whichfork;
xfs_rfsblock_t nblks = dp->i_nblocks;
- struct xfs_bmbt_irec map, *mapp;
- int nmap, error, got, i, mapi;
+ struct xfs_bmbt_irec map, *mapp = &map;
+ int nmap, error, got, i, mapi = 1;
/*
* Find a spot in the file space to put the new block.
@@ -2167,14 +2314,7 @@ xfs_da_grow_inode_int(
error = xfs_bmapi_write(tp, dp, *bno, count,
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->total, &map, &nmap);
- if (error)
- return error;
-
- ASSERT(nmap <= 1);
- if (nmap == 1) {
- mapp = &map;
- mapi = 1;
- } else if (nmap == 0 && count > 1) {
+ if (error == -ENOSPC && count > 1) {
xfs_fileoff_t b;
int c;
@@ -2182,7 +2322,8 @@ xfs_da_grow_inode_int(
* If we didn't get it and the block might work if fragmented,
* try without the CONTIG flag. Loop until we get it all.
*/
- mapp = kmem_alloc(sizeof(*mapp) * count, 0);
+ mapp = kmalloc(sizeof(*mapp) * count,
+ GFP_KERNEL | __GFP_NOFAIL);
for (b = *bno, mapi = 0; b < *bno + count; ) {
c = (int)(*bno + count - b);
nmap = min(XFS_BMAP_MAX_NMAP, c);
@@ -2191,16 +2332,13 @@ xfs_da_grow_inode_int(
args->total, &mapp[mapi], &nmap);
if (error)
goto out_free_map;
- if (nmap < 1)
- break;
mapi += nmap;
b = mapp[mapi - 1].br_startoff +
mapp[mapi - 1].br_blockcount;
}
- } else {
- mapi = 0;
- mapp = NULL;
}
+ if (error)
+ goto out_free_map;
/*
* Count the blocks we got, make sure it matches the total.
@@ -2219,7 +2357,7 @@ xfs_da_grow_inode_int(
out_free_map:
if (mapp != &map)
- kmem_free(mapp);
+ kfree(mapp);
return error;
}
@@ -2272,6 +2410,7 @@ xfs_da3_swap_lastblock(
struct xfs_buf *last_buf;
struct xfs_buf *sib_buf;
struct xfs_buf *par_buf;
+ xfs_failaddr_t fa;
xfs_dahash_t dead_hash;
xfs_fileoff_t lastoff;
xfs_dablk_t dead_blkno;
@@ -2297,8 +2436,10 @@ xfs_da3_swap_lastblock(
error = xfs_bmap_last_before(tp, dp, &lastoff, w);
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, lastoff == 0))
+ if (XFS_IS_CORRUPT(mp, lastoff == 0)) {
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
+ }
/*
* Read the last block in the btree space.
*/
@@ -2306,6 +2447,14 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
if (error)
return error;
+ fa = xfs_da3_header_check(last_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(last_buf, fa);
+ xfs_trans_brelse(tp, last_buf);
+ xfs_da_mark_sick(args);
+ return -EFSCORRUPTED;
+ }
+
/*
* Copy the last block into the dead buffer and log it.
*/
@@ -2344,10 +2493,18 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
if (error)
goto done;
+ fa = xfs_da3_header_check(sib_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(sib_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
sib_info = sib_buf->b_addr;
if (XFS_IS_CORRUPT(mp,
be32_to_cpu(sib_info->forw) != last_blkno ||
sib_info->magic != dead_info->magic)) {
+ xfs_da_mark_sick(args);
error = -EFSCORRUPTED;
goto done;
}
@@ -2364,10 +2521,18 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
if (error)
goto done;
+ fa = xfs_da3_header_check(sib_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(sib_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
sib_info = sib_buf->b_addr;
if (XFS_IS_CORRUPT(mp,
be32_to_cpu(sib_info->back) != last_blkno ||
sib_info->magic != dead_info->magic)) {
+ xfs_da_mark_sick(args);
error = -EFSCORRUPTED;
goto done;
}
@@ -2386,10 +2551,18 @@ xfs_da3_swap_lastblock(
error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
if (error)
goto done;
+ fa = xfs_da3_node_header_check(par_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(par_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
par_node = par_buf->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
if (XFS_IS_CORRUPT(mp,
level >= 0 && level != par_hdr.level + 1)) {
+ xfs_da_mark_sick(args);
error = -EFSCORRUPTED;
goto done;
}
@@ -2401,6 +2574,7 @@ xfs_da3_swap_lastblock(
entno++)
continue;
if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
+ xfs_da_mark_sick(args);
error = -EFSCORRUPTED;
goto done;
}
@@ -2426,15 +2600,24 @@ xfs_da3_swap_lastblock(
xfs_trans_brelse(tp, par_buf);
par_buf = NULL;
if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
+ xfs_da_mark_sick(args);
error = -EFSCORRUPTED;
goto done;
}
error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
if (error)
goto done;
+ fa = xfs_da3_node_header_check(par_buf, args->owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(par_buf, fa);
+ xfs_da_mark_sick(args);
+ error = -EFSCORRUPTED;
+ goto done;
+ }
par_node = par_buf->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
+ xfs_da_mark_sick(args);
error = -EFSCORRUPTED;
goto done;
}
@@ -2518,7 +2701,8 @@ xfs_dabuf_map(
int error = 0, nirecs, i;
if (nfsb > 1)
- irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_NOFS);
+ irecs = kzalloc(sizeof(irec) * nfsb,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
nirecs = nfsb;
error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs,
@@ -2531,7 +2715,8 @@ xfs_dabuf_map(
* larger one that needs to be free by the caller.
*/
if (nirecs > 1) {
- map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS);
+ map = kzalloc(nirecs * sizeof(struct xfs_buf_map),
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
if (!map) {
error = -ENOMEM;
goto out_free_irecs;
@@ -2557,12 +2742,13 @@ xfs_dabuf_map(
*nmaps = nirecs;
out_free_irecs:
if (irecs != &irec)
- kmem_free(irecs);
+ kfree(irecs);
return error;
invalid_mapping:
/* Caller ok with no mapping. */
if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) {
+ xfs_dirattr_mark_sick(dp, whichfork);
error = -EFSCORRUPTED;
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
xfs_alert(mp, "%s: bno %u inode %llu",
@@ -2613,7 +2799,7 @@ xfs_da_get_buf(
out_free:
if (mapp != &map)
- kmem_free(mapp);
+ kfree(mapp);
return error;
}
@@ -2644,6 +2830,8 @@ xfs_da_read_buf(
error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
&bp, ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_dirattr_mark_sick(dp, whichfork);
if (error)
goto out_free;
@@ -2654,7 +2842,7 @@ xfs_da_read_buf(
*bpp = bp;
out_free:
if (mapp != &map)
- kmem_free(mapp);
+ kfree(mapp);
return error;
}
@@ -2685,7 +2873,7 @@ xfs_da_reada_buf(
out_free:
if (mapp != &map)
- kmem_free(mapp);
+ kfree(mapp);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 706baf36e175..354d5d65043e 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -54,17 +54,24 @@ enum xfs_dacmp {
*/
typedef struct xfs_da_args {
struct xfs_da_geometry *geo; /* da block geometry */
- const uint8_t *name; /* string (maybe not NULL terminated) */
- int namelen; /* length of string (maybe no NULL) */
- uint8_t filetype; /* filetype of inode for directories */
+ const uint8_t *name; /* string (maybe not NULL terminated) */
+ const uint8_t *new_name; /* new attr name */
void *value; /* set of bytes (maybe contain NULLs) */
- int valuelen; /* length of value */
- unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */
- unsigned int attr_flags; /* XATTR_{CREATE,REPLACE} */
- xfs_dahash_t hashval; /* hash value of name */
- xfs_ino_t inumber; /* input/output inode number */
+ void *new_value; /* new xattr value (may contain NULLs) */
struct xfs_inode *dp; /* directory inode to manipulate */
struct xfs_trans *trans; /* current trans (changes over time) */
+
+ xfs_ino_t inumber; /* input/output inode number */
+ xfs_ino_t owner; /* inode that owns the dir/attr data */
+
+ int valuelen; /* length of value */
+ int new_valuelen; /* length of new_value */
+ uint8_t filetype; /* filetype of inode for directories */
+ uint8_t op_flags; /* operation flags */
+ uint8_t attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */
+ short namelen; /* length of string (maybe no NULL) */
+ short new_namelen; /* length of new attr name */
+ xfs_dahash_t hashval; /* hash value of name */
xfs_extlen_t total; /* total blocks needed, for 1st bmap */
int whichfork; /* data or attribute fork */
xfs_dablk_t blkno; /* blkno of attr leaf of interest */
@@ -77,7 +84,6 @@ typedef struct xfs_da_args {
xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
int rmtblkcnt2; /* remote attr value block count */
int rmtvaluelen2; /* remote attr value length in bytes */
- uint32_t op_flags; /* operation flags */
enum xfs_dacmp cmpresult; /* name compare result for lookups */
} xfs_da_args_t;
@@ -89,10 +95,8 @@ typedef struct xfs_da_args {
#define XFS_DA_OP_ADDNAME (1u << 2) /* this is an add operation */
#define XFS_DA_OP_OKNOENT (1u << 3) /* lookup op, ENOENT ok, else die */
#define XFS_DA_OP_CILOOKUP (1u << 4) /* lookup returns CI name if found */
-#define XFS_DA_OP_NOTIME (1u << 5) /* don't update inode timestamps */
-#define XFS_DA_OP_REMOVE (1u << 6) /* this is a remove operation */
-#define XFS_DA_OP_RECOVERY (1u << 7) /* Log recovery operation */
-#define XFS_DA_OP_LOGGED (1u << 8) /* Use intent items to track op */
+#define XFS_DA_OP_RECOVERY (1u << 5) /* Log recovery operation */
+#define XFS_DA_OP_LOGGED (1u << 6) /* Use intent items to track op */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
@@ -100,8 +104,6 @@ typedef struct xfs_da_args {
{ XFS_DA_OP_ADDNAME, "ADDNAME" }, \
{ XFS_DA_OP_OKNOENT, "OKNOENT" }, \
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \
- { XFS_DA_OP_NOTIME, "NOTIME" }, \
- { XFS_DA_OP_REMOVE, "REMOVE" }, \
{ XFS_DA_OP_RECOVERY, "RECOVERY" }, \
{ XFS_DA_OP_LOGGED, "LOGGED" }
@@ -235,6 +237,8 @@ void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp,
struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from);
void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp,
struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from);
+xfs_failaddr_t xfs_da3_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+xfs_failaddr_t xfs_da3_node_header_check(struct xfs_buf *bp, xfs_ino_t owner);
extern struct kmem_cache *xfs_da_state_cache;
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 24f9d1461f9a..86de99e2f757 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -159,6 +159,17 @@ struct xfs_da3_intnode {
#define XFS_DIR3_FT_MAX 9
+#define XFS_DIR3_FTYPE_STR \
+ { XFS_DIR3_FT_UNKNOWN, "unknown" }, \
+ { XFS_DIR3_FT_REG_FILE, "file" }, \
+ { XFS_DIR3_FT_DIR, "directory" }, \
+ { XFS_DIR3_FT_CHRDEV, "char" }, \
+ { XFS_DIR3_FT_BLKDEV, "block" }, \
+ { XFS_DIR3_FT_FIFO, "fifo" }, \
+ { XFS_DIR3_FT_SOCK, "sock" }, \
+ { XFS_DIR3_FT_SYMLINK, "symlink" }, \
+ { XFS_DIR3_FT_WHT, "whiteout" }
+
/*
* Byte offset in data block and shortform entry.
*/
@@ -703,12 +714,30 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
+#define XFS_ATTR_PARENT_BIT 3 /* parent pointer attrs */
#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
#define XFS_ATTR_LOCAL (1u << XFS_ATTR_LOCAL_BIT)
#define XFS_ATTR_ROOT (1u << XFS_ATTR_ROOT_BIT)
#define XFS_ATTR_SECURE (1u << XFS_ATTR_SECURE_BIT)
+#define XFS_ATTR_PARENT (1u << XFS_ATTR_PARENT_BIT)
#define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT)
-#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
+
+#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | \
+ XFS_ATTR_SECURE | \
+ XFS_ATTR_PARENT)
+
+/* Private attr namespaces not exposed to userspace */
+#define XFS_ATTR_PRIVATE_NSP_MASK (XFS_ATTR_PARENT)
+
+#define XFS_ATTR_ONDISK_MASK (XFS_ATTR_NSP_ONDISK_MASK | \
+ XFS_ATTR_LOCAL | \
+ XFS_ATTR_INCOMPLETE)
+
+#define XFS_ATTR_NAMESPACE_STR \
+ { XFS_ATTR_LOCAL, "local" }, \
+ { XFS_ATTR_ROOT, "root" }, \
+ { XFS_ATTR_SECURE, "secure" }, \
+ { XFS_ATTR_PARENT, "parent" }
/*
* Alignment for namelist and valuelist entries (since they are mixed
@@ -851,9 +880,7 @@ struct xfs_attr3_rmt_hdr {
#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
-#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
- ((bufsize) - (xfs_has_crc((mp)) ? \
- sizeof(struct xfs_attr3_rmt_hdr) : 0))
+unsigned int xfs_attr3_rmt_buf_space(struct xfs_mount *mp);
/* Number of bytes in a directory block. */
static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
@@ -864,4 +891,17 @@ static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
xfs_failaddr_t xfs_da3_blkinfo_verify(struct xfs_buf *bp,
struct xfs_da3_blkinfo *hdr3);
+/*
+ * Parent pointer attribute format definition
+ *
+ * The xattr name contains the dirent name.
+ * The xattr value encodes the parent inode number and generation to ease
+ * opening parents by handle.
+ * The xattr hashval is xfs_dir2_namehash() ^ p_ino
+ */
+struct xfs_parent_rec {
+ __be64 p_ino;
+ __be32 p_gen;
+} __packed;
+
#endif /* __XFS_DA_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 66a17910d021..4a078e07e1a0 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -27,6 +27,7 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_trans_priv.h"
+#include "xfs_exchmaps.h"
static struct kmem_cache *xfs_defer_pending_cache;
@@ -819,16 +820,16 @@ xfs_defer_can_append(
/* Create a new pending item at the end of the transaction list. */
static inline struct xfs_defer_pending *
xfs_defer_alloc(
- struct xfs_trans *tp,
+ struct list_head *dfops,
const struct xfs_defer_op_type *ops)
{
struct xfs_defer_pending *dfp;
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
- GFP_NOFS | __GFP_NOFAIL);
+ GFP_KERNEL | __GFP_NOFAIL);
dfp->dfp_ops = ops;
INIT_LIST_HEAD(&dfp->dfp_work);
- list_add_tail(&dfp->dfp_list, &tp->t_dfops);
+ list_add_tail(&dfp->dfp_list, dfops);
return dfp;
}
@@ -846,7 +847,7 @@ xfs_defer_add(
dfp = xfs_defer_find_last(tp, ops);
if (!dfp || !xfs_defer_can_append(dfp, ops))
- dfp = xfs_defer_alloc(tp, ops);
+ dfp = xfs_defer_alloc(&tp->t_dfops, ops);
xfs_defer_add_item(dfp, li);
trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
@@ -870,7 +871,7 @@ xfs_defer_add_barrier(
if (dfp)
return;
- xfs_defer_alloc(tp, &xfs_barrier_defer_type);
+ xfs_defer_alloc(&tp->t_dfops, &xfs_barrier_defer_type);
trace_xfs_defer_add_item(tp->t_mountp, dfp, NULL);
}
@@ -885,14 +886,9 @@ xfs_defer_start_recovery(
struct list_head *r_dfops,
const struct xfs_defer_op_type *ops)
{
- struct xfs_defer_pending *dfp;
+ struct xfs_defer_pending *dfp = xfs_defer_alloc(r_dfops, ops);
- dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
- GFP_NOFS | __GFP_NOFAIL);
- dfp->dfp_ops = ops;
dfp->dfp_intent = lip;
- INIT_LIST_HEAD(&dfp->dfp_work);
- list_add_tail(&dfp->dfp_list, r_dfops);
}
/*
@@ -979,7 +975,7 @@ xfs_defer_ops_capture(
return ERR_PTR(error);
/* Create an object to capture the defer ops. */
- dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS);
+ dfc = kzalloc(sizeof(*dfc), GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&dfc->dfc_list);
INIT_LIST_HEAD(&dfc->dfc_dfops);
@@ -1011,7 +1007,7 @@ xfs_defer_ops_capture(
* transaction.
*/
for (i = 0; i < dfc->dfc_held.dr_inos; i++) {
- ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL);
ihold(VFS_I(dfc->dfc_held.dr_ip[i]));
}
@@ -1038,7 +1034,7 @@ xfs_defer_ops_capture_abort(
for (i = 0; i < dfc->dfc_held.dr_inos; i++)
xfs_irele(dfc->dfc_held.dr_ip[i]);
- kmem_free(dfc);
+ kfree(dfc);
}
/*
@@ -1096,7 +1092,11 @@ xfs_defer_ops_continue(
ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
/* Lock the captured resources to the new transaction. */
- if (dfc->dfc_held.dr_inos == 2)
+ if (dfc->dfc_held.dr_inos > 2) {
+ xfs_sort_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos);
+ xfs_lock_inodes(dfc->dfc_held.dr_ip, dfc->dfc_held.dr_inos,
+ XFS_ILOCK_EXCL);
+ } else if (dfc->dfc_held.dr_inos == 2)
xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
else if (dfc->dfc_held.dr_inos == 1)
@@ -1114,7 +1114,7 @@ xfs_defer_ops_continue(
list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
tp->t_flags |= dfc->dfc_tpflags;
- kmem_free(dfc);
+ kfree(dfc);
}
/* Release the resources captured and continued during recovery. */
@@ -1181,6 +1181,10 @@ xfs_defer_init_item_caches(void)
error = xfs_attr_intent_init_cache();
if (error)
goto err;
+ error = xfs_exchmaps_intent_init_cache();
+ if (error)
+ goto err;
+
return 0;
err:
xfs_defer_destroy_item_caches();
@@ -1191,6 +1195,7 @@ err:
void
xfs_defer_destroy_item_caches(void)
{
+ xfs_exchmaps_intent_destroy_cache();
xfs_attr_intent_destroy_cache();
xfs_extfree_intent_destroy_cache();
xfs_bmap_intent_destroy_cache();
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 18a9fb92dde8..8b338031e487 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -72,12 +72,18 @@ extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
extern const struct xfs_defer_op_type xfs_attr_defer_type;
-
+extern const struct xfs_defer_op_type xfs_exchmaps_defer_type;
/*
* Deferred operation item relogging limits.
*/
-#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
+
+/*
+ * Rename w/ parent pointers can require up to 5 inodes with deferred ops to
+ * be joined to the transaction: src_dp, target_dp, src_ip, target_ip, and wip.
+ * These inodes are locked in sorted order by their inode numbers
+ */
+#define XFS_DEFER_OPS_NR_INODES 5
#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
/* Resources that must be held across a transaction roll. */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index a76673281514..457f9a38f850 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -18,6 +18,7 @@
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
+#include "xfs_health.h"
const struct xfs_name xfs_name_dotdot = {
.name = (const unsigned char *)"..",
@@ -25,6 +26,12 @@ const struct xfs_name xfs_name_dotdot = {
.type = XFS_DIR3_FT_DIR,
};
+const struct xfs_name xfs_name_dot = {
+ .name = (const unsigned char *)".",
+ .len = 1,
+ .type = XFS_DIR3_FT_DIR,
+};
+
/*
* Convert inode mode to directory entry filetype
*/
@@ -104,13 +111,13 @@ xfs_da_mount(
ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE);
- mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
- KM_MAYFAIL);
- mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
- KM_MAYFAIL);
+ mp->m_dir_geo = kzalloc(sizeof(struct xfs_da_geometry),
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ mp->m_attr_geo = kzalloc(sizeof(struct xfs_da_geometry),
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!mp->m_dir_geo || !mp->m_attr_geo) {
- kmem_free(mp->m_dir_geo);
- kmem_free(mp->m_attr_geo);
+ kfree(mp->m_dir_geo);
+ kfree(mp->m_attr_geo);
return -ENOMEM;
}
@@ -178,8 +185,8 @@ void
xfs_da_unmount(
struct xfs_mount *mp)
{
- kmem_free(mp->m_dir_geo);
- kmem_free(mp->m_attr_geo);
+ kfree(mp->m_dir_geo);
+ kfree(mp->m_attr_geo);
}
/*
@@ -236,18 +243,75 @@ xfs_dir_init(
if (error)
return error;
- args = kmem_zalloc(sizeof(*args), KM_NOFS);
+ args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
if (!args)
return -ENOMEM;
args->geo = dp->i_mount->m_dir_geo;
args->dp = dp;
args->trans = tp;
+ args->owner = dp->i_ino;
error = xfs_dir2_sf_create(args, pdp->i_ino);
- kmem_free(args);
+ kfree(args);
return error;
}
+enum xfs_dir2_fmt
+xfs_dir2_format(
+ struct xfs_da_args *args,
+ int *error)
+{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
+ xfs_fileoff_t eof;
+
+ xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
+
+ *error = 0;
+ if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
+ return XFS_DIR2_FMT_SF;
+
+ *error = xfs_bmap_last_offset(dp, &eof, XFS_DATA_FORK);
+ if (*error)
+ return XFS_DIR2_FMT_ERROR;
+
+ if (eof == XFS_B_TO_FSB(mp, geo->blksize)) {
+ if (XFS_IS_CORRUPT(mp, dp->i_disk_size != geo->blksize)) {
+ xfs_da_mark_sick(args);
+ *error = -EFSCORRUPTED;
+ return XFS_DIR2_FMT_ERROR;
+ }
+ return XFS_DIR2_FMT_BLOCK;
+ }
+ if (eof == geo->leafblk + geo->fsbcount)
+ return XFS_DIR2_FMT_LEAF;
+ return XFS_DIR2_FMT_NODE;
+}
+
+int
+xfs_dir_createname_args(
+ struct xfs_da_args *args)
+{
+ int error;
+
+ if (!args->inumber)
+ args->op_flags |= XFS_DA_OP_JUSTCHECK;
+
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ return xfs_dir2_sf_addname(args);
+ case XFS_DIR2_FMT_BLOCK:
+ return xfs_dir2_block_addname(args);
+ case XFS_DIR2_FMT_LEAF:
+ return xfs_dir2_leaf_addname(args);
+ case XFS_DIR2_FMT_NODE:
+ return xfs_dir2_node_addname(args);
+ default:
+ return error;
+ }
+}
+
/*
* Enter a name in a directory, or check for available space.
* If inum is 0, only the available space test is performed.
@@ -262,7 +326,6 @@ xfs_dir_createname(
{
struct xfs_da_args *args;
int rval;
- bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -273,7 +336,7 @@ xfs_dir_createname(
XFS_STATS_INC(dp->i_mount, xs_dir_create);
}
- args = kmem_zalloc(sizeof(*args), KM_NOFS);
+ args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
if (!args)
return -ENOMEM;
@@ -288,32 +351,10 @@ xfs_dir_createname(
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
- if (!inum)
- args->op_flags |= XFS_DA_OP_JUSTCHECK;
+ args->owner = dp->i_ino;
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_addname(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_addname(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_addname(args);
- else
- rval = xfs_dir2_node_addname(args);
-
-out_free:
- kmem_free(args);
+ rval = xfs_dir_createname_args(args);
+ kfree(args);
return rval;
}
@@ -333,7 +374,8 @@ xfs_dir_cilookup_result(
!(args->op_flags & XFS_DA_OP_CILOOKUP))
return -EEXIST;
- args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
+ args->value = kmalloc(len,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_RETRY_MAYFAIL);
if (!args->value)
return -ENOMEM;
@@ -342,6 +384,34 @@ xfs_dir_cilookup_result(
return -EEXIST;
}
+int
+xfs_dir_lookup_args(
+ struct xfs_da_args *args)
+{
+ int error;
+
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ error = xfs_dir2_sf_lookup(args);
+ break;
+ case XFS_DIR2_FMT_BLOCK:
+ error = xfs_dir2_block_lookup(args);
+ break;
+ case XFS_DIR2_FMT_LEAF:
+ error = xfs_dir2_leaf_lookup(args);
+ break;
+ case XFS_DIR2_FMT_NODE:
+ error = xfs_dir2_node_lookup(args);
+ break;
+ default:
+ break;
+ }
+
+ if (error != -EEXIST)
+ return error;
+ return 0;
+}
+
/*
* Lookup a name in a directory, give back the inode number.
* If ci_name is not NULL, returns the actual name in ci_name if it differs
@@ -358,21 +428,13 @@ xfs_dir_lookup(
{
struct xfs_da_args *args;
int rval;
- bool v;
int lock_mode;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
- /*
- * We need to use KM_NOFS here so that lockdep will not throw false
- * positive deadlock warnings on a non-transactional lookup path. It is
- * safe to recurse into inode recalim in that case, but lockdep can't
- * easily be taught about it. Hence KM_NOFS avoids having to add more
- * lockdep Doing this avoids having to add a bunch of lockdep class
- * annotations into the reclaim path for the ilock.
- */
- args = kmem_zalloc(sizeof(*args), KM_NOFS);
+ args = kzalloc(sizeof(*args),
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
args->namelen = name->len;
@@ -382,34 +444,12 @@ xfs_dir_lookup(
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
args->op_flags = XFS_DA_OP_OKNOENT;
+ args->owner = dp->i_ino;
if (ci_name)
args->op_flags |= XFS_DA_OP_CILOOKUP;
lock_mode = xfs_ilock_data_map_shared(dp);
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_lookup(args);
- goto out_check_rval;
- }
-
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_lookup(args);
- goto out_check_rval;
- }
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_lookup(args);
- else
- rval = xfs_dir2_node_lookup(args);
-
-out_check_rval:
- if (rval == -EEXIST)
- rval = 0;
+ rval = xfs_dir_lookup_args(args);
if (!rval) {
*inum = args->inumber;
if (ci_name) {
@@ -417,12 +457,31 @@ out_check_rval:
ci_name->len = args->valuelen;
}
}
-out_free:
xfs_iunlock(dp, lock_mode);
- kmem_free(args);
+ kfree(args);
return rval;
}
+int
+xfs_dir_removename_args(
+ struct xfs_da_args *args)
+{
+ int error;
+
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ return xfs_dir2_sf_removename(args);
+ case XFS_DIR2_FMT_BLOCK:
+ return xfs_dir2_block_removename(args);
+ case XFS_DIR2_FMT_LEAF:
+ return xfs_dir2_leaf_removename(args);
+ case XFS_DIR2_FMT_NODE:
+ return xfs_dir2_node_removename(args);
+ default:
+ return error;
+ }
+}
+
/*
* Remove an entry from a directory.
*/
@@ -430,18 +489,17 @@ int
xfs_dir_removename(
struct xfs_trans *tp,
struct xfs_inode *dp,
- struct xfs_name *name,
+ const struct xfs_name *name,
xfs_ino_t ino,
xfs_extlen_t total) /* bmap's total block count */
{
struct xfs_da_args *args;
int rval;
- bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_remove);
- args = kmem_zalloc(sizeof(*args), KM_NOFS);
+ args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
if (!args)
return -ENOMEM;
@@ -455,30 +513,30 @@ xfs_dir_removename(
args->total = total;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
+ args->owner = dp->i_ino;
+ rval = xfs_dir_removename_args(args);
+ kfree(args);
+ return rval;
+}
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_removename(args);
- goto out_free;
- }
+int
+xfs_dir_replace_args(
+ struct xfs_da_args *args)
+{
+ int error;
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_removename(args);
- goto out_free;
+ switch (xfs_dir2_format(args, &error)) {
+ case XFS_DIR2_FMT_SF:
+ return xfs_dir2_sf_replace(args);
+ case XFS_DIR2_FMT_BLOCK:
+ return xfs_dir2_block_replace(args);
+ case XFS_DIR2_FMT_LEAF:
+ return xfs_dir2_leaf_replace(args);
+ case XFS_DIR2_FMT_NODE:
+ return xfs_dir2_node_replace(args);
+ default:
+ return error;
}
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_removename(args);
- else
- rval = xfs_dir2_node_removename(args);
-out_free:
- kmem_free(args);
- return rval;
}
/*
@@ -494,7 +552,6 @@ xfs_dir_replace(
{
struct xfs_da_args *args;
int rval;
- bool v;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
@@ -502,7 +559,7 @@ xfs_dir_replace(
if (rval)
return rval;
- args = kmem_zalloc(sizeof(*args), KM_NOFS);
+ args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
if (!args)
return -ENOMEM;
@@ -516,29 +573,9 @@ xfs_dir_replace(
args->total = total;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
-
- if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
- rval = xfs_dir2_sf_replace(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isblock(args, &v);
- if (rval)
- goto out_free;
- if (v) {
- rval = xfs_dir2_block_replace(args);
- goto out_free;
- }
-
- rval = xfs_dir2_isleaf(args, &v);
- if (rval)
- goto out_free;
- if (v)
- rval = xfs_dir2_leaf_replace(args);
- else
- rval = xfs_dir2_node_replace(args);
-out_free:
- kmem_free(args);
+ args->owner = dp->i_ino;
+ rval = xfs_dir_replace_args(args);
+ kfree(args);
return rval;
}
@@ -606,55 +643,6 @@ xfs_dir2_grow_inode(
}
/*
- * See if the directory is a single-block form directory.
- */
-int
-xfs_dir2_isblock(
- struct xfs_da_args *args,
- bool *isblock)
-{
- struct xfs_mount *mp = args->dp->i_mount;
- xfs_fileoff_t eof;
- int error;
-
- error = xfs_bmap_last_offset(args->dp, &eof, XFS_DATA_FORK);
- if (error)
- return error;
-
- *isblock = false;
- if (XFS_FSB_TO_B(mp, eof) != args->geo->blksize)
- return 0;
-
- *isblock = true;
- if (XFS_IS_CORRUPT(mp, args->dp->i_disk_size != args->geo->blksize))
- return -EFSCORRUPTED;
- return 0;
-}
-
-/*
- * See if the directory is a single-leaf form directory.
- */
-int
-xfs_dir2_isleaf(
- struct xfs_da_args *args,
- bool *isleaf)
-{
- xfs_fileoff_t eof;
- int error;
-
- error = xfs_bmap_last_offset(args->dp, &eof, XFS_DATA_FORK);
- if (error)
- return error;
-
- *isleaf = false;
- if (eof != args->geo->leafblk + args->geo->fsbcount)
- return 0;
-
- *isleaf = true;
- return 0;
-}
-
-/*
* Remove the given block from the directory.
* This routine is used for data and free blocks, leaf/node are done
* by xfs_da_shrink_inode.
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index 19af22a16c41..6dbe6e9ecb49 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -22,6 +22,29 @@ struct xfs_dir3_icfree_hdr;
struct xfs_dir3_icleaf_hdr;
extern const struct xfs_name xfs_name_dotdot;
+extern const struct xfs_name xfs_name_dot;
+
+static inline bool
+xfs_dir2_samename(
+ const struct xfs_name *n1,
+ const struct xfs_name *n2)
+{
+ if (n1 == n2)
+ return true;
+ if (n1->len != n2->len)
+ return false;
+ return !memcmp(n1->name, n2->name, n1->len);
+}
+
+enum xfs_dir2_fmt {
+ XFS_DIR2_FMT_SF,
+ XFS_DIR2_FMT_BLOCK,
+ XFS_DIR2_FMT_LEAF,
+ XFS_DIR2_FMT_NODE,
+ XFS_DIR2_FMT_ERROR,
+};
+
+enum xfs_dir2_fmt xfs_dir2_format(struct xfs_da_args *args, int *error);
/*
* Convert inode mode to directory entry filetype
@@ -45,7 +68,7 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t *inum,
struct xfs_name *ci_name);
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_name *name, xfs_ino_t ino,
+ const struct xfs_name *name, xfs_ino_t ino,
xfs_extlen_t tot);
extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t inum,
@@ -53,6 +76,11 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name);
+int xfs_dir_lookup_args(struct xfs_da_args *args);
+int xfs_dir_createname_args(struct xfs_da_args *args);
+int xfs_dir_removename_args(struct xfs_da_args *args);
+int xfs_dir_replace_args(struct xfs_da_args *args);
+
/*
* Direct call from the bmap code, bypassing the generic directory layer.
*/
@@ -61,8 +89,6 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
/*
* Interface routines used by userspace utilities
*/
-extern int xfs_dir2_isblock(struct xfs_da_args *args, bool *isblock);
-extern int xfs_dir2_isleaf(struct xfs_da_args *args, bool *isleaf);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
@@ -88,6 +114,10 @@ extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
+xfs_failaddr_t xfs_dir3_leaf_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+xfs_failaddr_t xfs_dir3_data_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+xfs_failaddr_t xfs_dir3_block_header_check(struct xfs_buf *bp, xfs_ino_t owner);
+
extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 3c256d4cc40b..0f93ed1a4a74 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -20,6 +20,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_log.h"
+#include "xfs_health.h"
/*
* Local function prototypes.
@@ -114,17 +115,20 @@ const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
.verify_struct = xfs_dir3_block_verify,
};
-static xfs_failaddr_t
+xfs_failaddr_t
xfs_dir3_block_header_check(
- struct xfs_inode *dp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
{
- struct xfs_mount *mp = dp->i_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_has_crc(mp)) {
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
- if (be64_to_cpu(hdr3->owner) != dp->i_ino)
+ if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->owner) != owner)
return __this_address;
}
@@ -135,6 +139,7 @@ int
xfs_dir3_block_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
struct xfs_buf **bpp)
{
struct xfs_mount *mp = dp->i_mount;
@@ -147,11 +152,12 @@ xfs_dir3_block_read(
return err;
/* Check things that we can't do in the verifier. */
- fa = xfs_dir3_block_header_check(dp, *bpp);
+ fa = xfs_dir3_block_header_check(*bpp, owner);
if (fa) {
__xfs_buf_mark_corrupt(*bpp, fa);
xfs_trans_brelse(tp, *bpp);
*bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
return -EFSCORRUPTED;
}
@@ -161,12 +167,13 @@ xfs_dir3_block_read(
static void
xfs_dir3_block_init(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- struct xfs_buf *bp,
- struct xfs_inode *dp)
+ struct xfs_da_args *args,
+ struct xfs_buf *bp)
{
- struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
bp->b_ops = &xfs_dir3_block_buf_ops;
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_BLOCK_BUF);
@@ -175,7 +182,7 @@ xfs_dir3_block_init(
memset(hdr3, 0, sizeof(*hdr3));
hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->owner = cpu_to_be64(dp->i_ino);
+ hdr3->owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
return;
@@ -380,7 +387,7 @@ xfs_dir2_block_addname(
tp = args->trans;
/* Read the (one and only) directory block into bp. */
- error = xfs_dir3_block_read(tp, dp, &bp);
+ error = xfs_dir3_block_read(tp, dp, args->owner, &bp);
if (error)
return error;
@@ -695,7 +702,7 @@ xfs_dir2_block_lookup_int(
dp = args->dp;
tp = args->trans;
- error = xfs_dir3_block_read(tp, dp, &bp);
+ error = xfs_dir3_block_read(tp, dp, args->owner, &bp);
if (error)
return error;
@@ -979,7 +986,8 @@ xfs_dir2_leaf_to_block(
* Read the data block if we don't already have it, give up if it fails.
*/
if (!dbp) {
- error = xfs_dir3_data_read(tp, dp, args->geo->datablk, 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ args->geo->datablk, 0, &dbp);
if (error)
return error;
}
@@ -1007,7 +1015,7 @@ xfs_dir2_leaf_to_block(
/*
* Start converting it to block form.
*/
- xfs_dir3_block_init(mp, tp, dbp, dp);
+ xfs_dir3_block_init(args, dbp);
needlog = 1;
needscan = 0;
@@ -1108,7 +1116,7 @@ xfs_dir2_sf_to_block(
* Copy the directory into a temporary buffer.
* Then pitch the incore inode data so we can make extents.
*/
- sfp = kmem_alloc(ifp->if_bytes, 0);
+ sfp = kmalloc(ifp->if_bytes, GFP_KERNEL | __GFP_NOFAIL);
memcpy(sfp, oldsfp, ifp->if_bytes);
xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
@@ -1127,7 +1135,7 @@ xfs_dir2_sf_to_block(
error = xfs_dir3_data_init(args, blkno, &bp);
if (error)
goto out_free;
- xfs_dir3_block_init(mp, tp, bp, dp);
+ xfs_dir3_block_init(args, bp);
hdr = bp->b_addr;
/*
@@ -1167,7 +1175,7 @@ xfs_dir2_sf_to_block(
* Create entry for .
*/
dep = bp->b_addr + offset;
- dep->inumber = cpu_to_be64(dp->i_ino);
+ dep->inumber = cpu_to_be64(args->owner);
dep->namelen = 1;
dep->name[0] = '.';
xfs_dir2_data_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
@@ -1253,7 +1261,7 @@ xfs_dir2_sf_to_block(
sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
}
/* Done with the temporary buffer */
- kmem_free(sfp);
+ kfree(sfp);
/*
* Sort the leaf entries by hash value.
*/
@@ -1268,6 +1276,6 @@ xfs_dir2_sf_to_block(
xfs_dir3_data_check(dp, bp);
return 0;
out_free:
- kmem_free(sfp);
+ kfree(sfp);
return error;
}
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index dbcf58979a59..ea0b9628df18 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -18,6 +18,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
+#include "xfs_health.h"
static xfs_failaddr_t xfs_dir2_data_freefind_verify(
struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf,
@@ -394,17 +395,20 @@ static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
.verify_write = xfs_dir3_data_write_verify,
};
-static xfs_failaddr_t
+xfs_failaddr_t
xfs_dir3_data_header_check(
- struct xfs_inode *dp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
{
- struct xfs_mount *mp = dp->i_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_has_crc(mp)) {
struct xfs_dir3_data_hdr *hdr3 = bp->b_addr;
- if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
+ if (hdr3->hdr.magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->hdr.owner) != owner)
return __this_address;
}
@@ -415,6 +419,7 @@ int
xfs_dir3_data_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t bno,
unsigned int flags,
struct xfs_buf **bpp)
@@ -428,11 +433,12 @@ xfs_dir3_data_read(
return err;
/* Check things that we can't do in the verifier. */
- fa = xfs_dir3_data_header_check(dp, *bpp);
+ fa = xfs_dir3_data_header_check(*bpp, owner);
if (fa) {
__xfs_buf_mark_corrupt(*bpp, fa);
xfs_trans_brelse(tp, *bpp);
*bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
return -EFSCORRUPTED;
}
@@ -723,7 +729,7 @@ xfs_dir3_data_init(
memset(hdr3, 0, sizeof(*hdr3));
hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->owner = cpu_to_be64(dp->i_ino);
+ hdr3->owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
} else
@@ -1198,6 +1204,7 @@ xfs_dir2_data_use_free(
corrupt:
xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount,
hdr, sizeof(*hdr), __FILE__, __LINE__, fa);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index cb9e950a911d..71c2f22a3f6e 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -19,6 +19,7 @@
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
+#include "xfs_health.h"
/*
* Local function declarations.
@@ -207,6 +208,29 @@ xfs_dir3_leaf_verify(
return xfs_dir3_leaf_check_int(mp, &leafhdr, bp->b_addr, true);
}
+xfs_failaddr_t
+xfs_dir3_leaf_header_check(
+ struct xfs_buf *bp,
+ xfs_ino_t owner)
+{
+ struct xfs_mount *mp = bp->b_mount;
+
+ if (xfs_has_crc(mp)) {
+ struct xfs_dir3_leaf *hdr3 = bp->b_addr;
+
+ if (hdr3->hdr.info.hdr.magic !=
+ cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) &&
+ hdr3->hdr.info.hdr.magic !=
+ cpu_to_be16(XFS_DIR3_LEAFN_MAGIC))
+ return __this_address;
+
+ if (be64_to_cpu(hdr3->hdr.info.owner) != owner)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
static void
xfs_dir3_leaf_read_verify(
struct xfs_buf *bp)
@@ -270,32 +294,60 @@ int
xfs_dir3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
+ xfs_failaddr_t fa;
int err;
err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
&xfs_dir3_leaf1_buf_ops);
- if (!err && tp && *bpp)
+ if (err || !(*bpp))
+ return err;
+
+ fa = xfs_dir3_leaf_header_check(*bpp, owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(*bpp, fa);
+ xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
+ return -EFSCORRUPTED;
+ }
+
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
- return err;
+ return 0;
}
int
xfs_dir3_leafn_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
+ xfs_failaddr_t fa;
int err;
err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
&xfs_dir3_leafn_buf_ops);
- if (!err && tp && *bpp)
+ if (err || !(*bpp))
+ return err;
+
+ fa = xfs_dir3_leaf_header_check(*bpp, owner);
+ if (fa) {
+ __xfs_buf_mark_corrupt(*bpp, fa);
+ xfs_trans_brelse(tp, *bpp);
+ *bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
+ return -EFSCORRUPTED;
+ }
+
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
- return err;
+ return 0;
}
/*
@@ -303,12 +355,12 @@ xfs_dir3_leafn_read(
*/
static void
xfs_dir3_leaf_init(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
+ struct xfs_da_args *args,
struct xfs_buf *bp,
- xfs_ino_t owner,
uint16_t type)
{
+ struct xfs_mount *mp = args->dp->i_mount;
+ struct xfs_trans *tp = args->trans;
struct xfs_dir2_leaf *leaf = bp->b_addr;
ASSERT(type == XFS_DIR2_LEAF1_MAGIC || type == XFS_DIR2_LEAFN_MAGIC);
@@ -322,7 +374,7 @@ xfs_dir3_leaf_init(
? cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)
: cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
leaf3->info.blkno = cpu_to_be64(xfs_buf_daddr(bp));
- leaf3->info.owner = cpu_to_be64(owner);
+ leaf3->info.owner = cpu_to_be64(args->owner);
uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid);
} else {
memset(leaf, 0, sizeof(*leaf));
@@ -355,7 +407,6 @@ xfs_dir3_leaf_get_buf(
{
struct xfs_inode *dp = args->dp;
struct xfs_trans *tp = args->trans;
- struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *bp;
int error;
@@ -368,7 +419,7 @@ xfs_dir3_leaf_get_buf(
if (error)
return error;
- xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic);
+ xfs_dir3_leaf_init(args, bp, magic);
xfs_dir3_leaf_log_header(args, bp);
if (magic == XFS_DIR2_LEAF1_MAGIC)
xfs_dir3_leaf_log_tail(args, bp);
@@ -646,7 +697,8 @@ xfs_dir2_leaf_addname(
trace_xfs_dir2_leaf_addname(args);
- error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->owner, args->geo->leafblk,
+ &lbp);
if (error)
return error;
@@ -833,9 +885,9 @@ xfs_dir2_leaf_addname(
* Already had space in some data block.
* Just read that one in.
*/
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, use_block),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, use_block), 0,
+ &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1237,7 +1289,8 @@ xfs_dir2_leaf_lookup_int(
tp = args->trans;
mp = dp->i_mount;
- error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->owner, args->geo->leafblk,
+ &lbp);
if (error)
return error;
@@ -1275,9 +1328,9 @@ xfs_dir2_leaf_lookup_int(
if (newdb != curdb) {
if (dbp)
xfs_trans_brelse(tp, dbp);
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, newdb),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, newdb), 0,
+ &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1317,9 +1370,9 @@ xfs_dir2_leaf_lookup_int(
ASSERT(cidb != -1);
if (cidb != curdb) {
xfs_trans_brelse(tp, dbp);
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, cidb),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, cidb), 0,
+ &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1393,8 +1446,10 @@ xfs_dir2_leaf_removename(
bestsp = xfs_dir2_leaf_bests_p(ltp);
if (be16_to_cpu(bestsp[db]) != oldbest) {
xfs_buf_mark_corrupt(lbp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
+
/*
* Mark the former data entry unused.
*/
@@ -1611,7 +1666,8 @@ xfs_dir2_leaf_trim_data(
/*
* Read the offending data block. We need its buffer.
*/
- error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(geo, db), 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(geo, db), 0, &dbp);
if (error)
return error;
@@ -1750,7 +1806,8 @@ xfs_dir2_node_to_leaf(
/*
* Read the freespace block.
*/
- error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp);
+ error = xfs_dir2_free_read(tp, dp, args->owner, args->geo->freeblk,
+ &fbp);
if (error)
return error;
xfs_dir2_free_hdr_from_disk(mp, &freehdr, fbp->b_addr);
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 7a03aeb9f4c9..fe8d4fa13128 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -20,6 +20,7 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
+#include "xfs_health.h"
/*
* Function declarations.
@@ -174,11 +175,11 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
/* Everything ok in the free block header? */
static xfs_failaddr_t
xfs_dir3_free_header_check(
- struct xfs_inode *dp,
- xfs_dablk_t fbno,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ xfs_ino_t owner,
+ xfs_dablk_t fbno)
{
- struct xfs_mount *mp = dp->i_mount;
+ struct xfs_mount *mp = bp->b_mount;
int maxbests = mp->m_dir_geo->free_max_bests;
unsigned int firstdb;
@@ -194,7 +195,7 @@ xfs_dir3_free_header_check(
return __this_address;
if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
return __this_address;
- if (be64_to_cpu(hdr3->hdr.owner) != dp->i_ino)
+ if (be64_to_cpu(hdr3->hdr.owner) != owner)
return __this_address;
} else {
struct xfs_dir2_free_hdr *hdr = bp->b_addr;
@@ -213,6 +214,7 @@ static int
__xfs_dir3_free_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
unsigned int flags,
struct xfs_buf **bpp)
@@ -226,11 +228,12 @@ __xfs_dir3_free_read(
return err;
/* Check things that we can't do in the verifier. */
- fa = xfs_dir3_free_header_check(dp, fbno, *bpp);
+ fa = xfs_dir3_free_header_check(*bpp, owner, fbno);
if (fa) {
__xfs_buf_mark_corrupt(*bpp, fa);
xfs_trans_brelse(tp, *bpp);
*bpp = NULL;
+ xfs_dirattr_mark_sick(dp, XFS_DATA_FORK);
return -EFSCORRUPTED;
}
@@ -297,20 +300,23 @@ int
xfs_dir2_free_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
- return __xfs_dir3_free_read(tp, dp, fbno, 0, bpp);
+ return __xfs_dir3_free_read(tp, dp, owner, fbno, 0, bpp);
}
static int
xfs_dir2_free_try_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
+ xfs_ino_t owner,
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
- return __xfs_dir3_free_read(tp, dp, fbno, XFS_DABUF_MAP_HOLE_OK, bpp);
+ return __xfs_dir3_free_read(tp, dp, owner, fbno, XFS_DABUF_MAP_HOLE_OK,
+ bpp);
}
static int
@@ -347,7 +353,7 @@ xfs_dir3_free_get_buf(
hdr.magic = XFS_DIR3_FREE_MAGIC;
hdr3->hdr.blkno = cpu_to_be64(xfs_buf_daddr(bp));
- hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
+ hdr3->hdr.owner = cpu_to_be64(args->owner);
uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid);
} else
hdr.magic = XFS_DIR2_FREE_MAGIC;
@@ -443,6 +449,7 @@ xfs_dir2_leaf_to_node(
if (be32_to_cpu(ltp->bestcount) >
(uint)dp->i_disk_size / args->geo->blksize) {
xfs_buf_mark_corrupt(lbp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -517,6 +524,7 @@ xfs_dir2_leafn_add(
*/
if (index < 0) {
xfs_buf_mark_corrupt(bp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -713,7 +721,7 @@ xfs_dir2_leafn_lookup_for_addname(
if (curbp)
xfs_trans_brelse(tp, curbp);
- error = xfs_dir2_free_read(tp, dp,
+ error = xfs_dir2_free_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo,
newfdb),
&curbp);
@@ -736,6 +744,7 @@ xfs_dir2_leafn_lookup_for_addname(
cpu_to_be16(NULLDATAOFF))) {
if (curfdb != newfdb)
xfs_trans_brelse(tp, curbp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
curfdb = newfdb;
@@ -804,6 +813,7 @@ xfs_dir2_leafn_lookup_for_entry(
xfs_dir3_leaf_check(dp, bp);
if (leafhdr.count <= 0) {
xfs_buf_mark_corrupt(bp);
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -857,7 +867,7 @@ xfs_dir2_leafn_lookup_for_entry(
ASSERT(state->extravalid);
curbp = state->extrablk.bp;
} else {
- error = xfs_dir3_data_read(tp, dp,
+ error = xfs_dir3_data_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo,
newdb),
0, &curbp);
@@ -1350,8 +1360,8 @@ xfs_dir2_leafn_remove(
* read in the free block.
*/
fdb = xfs_dir2_db_to_fdb(geo, db);
- error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(geo, fdb),
- &fbp);
+ error = xfs_dir2_free_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(geo, fdb), &fbp);
if (error)
return error;
free = fbp->b_addr;
@@ -1556,7 +1566,8 @@ xfs_dir2_leafn_toosmall(
/*
* Read the sibling leaf block.
*/
- error = xfs_dir3_leafn_read(state->args->trans, dp, blkno, &bp);
+ error = xfs_dir3_leafn_read(state->args->trans, dp,
+ state->args->owner, blkno, &bp);
if (error)
return error;
@@ -1709,7 +1720,7 @@ xfs_dir2_node_add_datablk(
* that was just allocated.
*/
fbno = xfs_dir2_db_to_fdb(args->geo, *dbno);
- error = xfs_dir2_free_try_read(tp, dp,
+ error = xfs_dir2_free_try_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo, fbno), &fbp);
if (error)
return error;
@@ -1739,6 +1750,7 @@ xfs_dir2_node_add_datablk(
} else {
xfs_alert(mp, " ... fblk is NULL");
}
+ xfs_da_mark_sick(args);
return -EFSCORRUPTED;
}
@@ -1855,7 +1867,7 @@ xfs_dir2_node_find_freeblk(
* so this might not succeed. This should be really rare, so
* there's no reason to avoid it.
*/
- error = xfs_dir2_free_try_read(tp, dp,
+ error = xfs_dir2_free_try_read(tp, dp, args->owner,
xfs_dir2_db_to_da(args->geo, fbno),
&fbp);
if (error)
@@ -1941,9 +1953,8 @@ xfs_dir2_node_addname_int(
&freehdr, &findex);
} else {
/* Read the data block in. */
- error = xfs_dir3_data_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, dbno),
- 0, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->owner,
+ xfs_dir2_db_to_da(args->geo, dbno), 0, &dbp);
}
if (error)
return error;
@@ -2295,7 +2306,7 @@ xfs_dir2_node_trim_free(
/*
* Read the freespace block.
*/
- error = xfs_dir2_free_try_read(tp, dp, fo, &bp);
+ error = xfs_dir2_free_try_read(tp, dp, args->owner, fo, &bp);
if (error)
return error;
/*
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 1db2e60ba827..3befb32509fa 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -50,8 +50,8 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
/* xfs_dir2_block.c */
-extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
- struct xfs_buf **bpp);
+int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_ino_t owner, struct xfs_buf **bpp);
extern int xfs_dir2_block_addname(struct xfs_da_args *args);
extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
extern int xfs_dir2_block_removename(struct xfs_da_args *args);
@@ -78,7 +78,8 @@ extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp,
struct xfs_buf *bp);
int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t bno, unsigned int flags,
+ struct xfs_buf **bpp);
int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
unsigned int flags);
@@ -95,9 +96,9 @@ void xfs_dir2_leaf_hdr_from_disk(struct xfs_mount *mp,
void xfs_dir2_leaf_hdr_to_disk(struct xfs_mount *mp, struct xfs_dir2_leaf *to,
struct xfs_dir3_icleaf_hdr *from);
int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t fbno, struct xfs_buf **bpp);
int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, struct xfs_buf **bpp);
+ xfs_ino_t owner, xfs_dablk_t fbno, struct xfs_buf **bpp);
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
struct xfs_buf *dbp);
extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
@@ -154,8 +155,8 @@ extern int xfs_dir2_node_removename(struct xfs_da_args *args);
extern int xfs_dir2_node_replace(struct xfs_da_args *args);
extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
int *rvalp);
-extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, struct xfs_buf **bpp);
+int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_ino_t owner, xfs_dablk_t fbno, struct xfs_buf **bpp);
/* xfs_dir2_sf.c */
xfs_ino_t xfs_dir2_sf_get_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index e1f83fc7b6ad..17a20384c8b7 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -276,7 +276,7 @@ xfs_dir2_block_to_sf(
* format the data into. Once we have formatted the data, we can free
* the block and copy the formatted data into the inode literal area.
*/
- sfp = kmem_alloc(mp->m_sb.sb_inodesize, 0);
+ sfp = kmalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | __GFP_NOFAIL);
memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
/*
@@ -350,7 +350,7 @@ xfs_dir2_block_to_sf(
xfs_dir2_sf_check(args);
out:
xfs_trans_log_inode(args->trans, dp, logflags);
- kmem_free(sfp);
+ kfree(sfp);
return error;
}
@@ -524,7 +524,7 @@ xfs_dir2_sf_addname_hard(
* Copy the old directory to the stack buffer.
*/
old_isize = (int)dp->i_disk_size;
- buf = kmem_alloc(old_isize, 0);
+ buf = kmalloc(old_isize, GFP_KERNEL | __GFP_NOFAIL);
oldsfp = (xfs_dir2_sf_hdr_t *)buf;
memcpy(oldsfp, dp->i_df.if_data, old_isize);
/*
@@ -576,7 +576,7 @@ xfs_dir2_sf_addname_hard(
sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
memcpy(sfep, oldsfep, old_isize - nbytes);
}
- kmem_free(buf);
+ kfree(buf);
dp->i_disk_size = new_isize;
xfs_dir2_sf_check(args);
}
@@ -1151,7 +1151,7 @@ xfs_dir2_sf_toino4(
* Don't want xfs_idata_realloc copying the data here.
*/
oldsize = dp->i_df.if_bytes;
- buf = kmem_alloc(oldsize, 0);
+ buf = kmalloc(oldsize, GFP_KERNEL | __GFP_NOFAIL);
ASSERT(oldsfp->i8count == 1);
memcpy(buf, oldsfp, oldsize);
/*
@@ -1190,7 +1190,7 @@ xfs_dir2_sf_toino4(
/*
* Clean up the inode.
*/
- kmem_free(buf);
+ kfree(buf);
dp->i_disk_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
@@ -1223,7 +1223,7 @@ xfs_dir2_sf_toino8(
* Don't want xfs_idata_realloc copying the data here.
*/
oldsize = dp->i_df.if_bytes;
- buf = kmem_alloc(oldsize, 0);
+ buf = kmalloc(oldsize, GFP_KERNEL | __GFP_NOFAIL);
ASSERT(oldsfp->i8count == 0);
memcpy(buf, oldsfp, oldsize);
/*
@@ -1262,7 +1262,7 @@ xfs_dir2_sf_toino8(
/*
* Clean up the inode.
*/
- kmem_free(buf);
+ kfree(buf);
dp->i_disk_size = newsize;
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
}
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index 01a9e86b3037..7002d7676a78 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -63,7 +63,8 @@
#define XFS_ERRTAG_ATTR_LEAF_TO_NODE 41
#define XFS_ERRTAG_WB_DELAY_MS 42
#define XFS_ERRTAG_WRITE_DELAY_MS 43
-#define XFS_ERRTAG_MAX 44
+#define XFS_ERRTAG_EXCHMAPS_FINISH_ONE 44
+#define XFS_ERRTAG_MAX 45
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -111,5 +112,6 @@
#define XFS_RANDOM_ATTR_LEAF_TO_NODE 1
#define XFS_RANDOM_WB_DELAY_MS 3000
#define XFS_RANDOM_WRITE_DELAY_MS 3000
+#define XFS_RANDOM_EXCHMAPS_FINISH_ONE 1
#endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c
new file mode 100644
index 000000000000..2021396651de
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_exchmaps.c
@@ -0,0 +1,1235 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_bmap.h"
+#include "xfs_icache.h"
+#include "xfs_quota.h"
+#include "xfs_exchmaps.h"
+#include "xfs_trace.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_error.h"
+#include "xfs_errortag.h"
+#include "xfs_health.h"
+#include "xfs_exchmaps_item.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_dir2.h"
+#include "xfs_symlink_remote.h"
+
+struct kmem_cache *xfs_exchmaps_intent_cache;
+
+/* bmbt mappings adjacent to a pair of records. */
+struct xfs_exchmaps_adjacent {
+ struct xfs_bmbt_irec left1;
+ struct xfs_bmbt_irec right1;
+ struct xfs_bmbt_irec left2;
+ struct xfs_bmbt_irec right2;
+};
+
+#define ADJACENT_INIT { \
+ .left1 = { .br_startblock = HOLESTARTBLOCK }, \
+ .right1 = { .br_startblock = HOLESTARTBLOCK }, \
+ .left2 = { .br_startblock = HOLESTARTBLOCK }, \
+ .right2 = { .br_startblock = HOLESTARTBLOCK }, \
+}
+
+/* Information to reset reflink flag / CoW fork state after an exchange. */
+
+/*
+ * If the reflink flag is set on either inode, make sure it has an incore CoW
+ * fork, since all reflink inodes must have them. If there's a CoW fork and it
+ * has mappings in it, make sure the inodes are tagged appropriately so that
+ * speculative preallocations can be GC'd if we run low of space.
+ */
+static inline void
+xfs_exchmaps_ensure_cowfork(
+ struct xfs_inode *ip)
+{
+ struct xfs_ifork *cfork;
+
+ if (xfs_is_reflink_inode(ip))
+ xfs_ifork_init_cow(ip);
+
+ cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
+ if (!cfork)
+ return;
+ if (cfork->if_bytes > 0)
+ xfs_inode_set_cowblocks_tag(ip);
+ else
+ xfs_inode_clear_cowblocks_tag(ip);
+}
+
+/*
+ * Adjust the on-disk inode size upwards if needed so that we never add
+ * mappings into the file past EOF. This is crucial so that log recovery won't
+ * get confused by the sudden appearance of post-eof mappings.
+ */
+STATIC void
+xfs_exchmaps_update_size(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ xfs_fsize_t new_isize)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_fsize_t len;
+
+ if (new_isize < 0)
+ return;
+
+ len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
+ new_isize);
+
+ if (len <= ip->i_disk_size)
+ return;
+
+ trace_xfs_exchmaps_update_inode_size(ip, len);
+
+ ip->i_disk_size = len;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Advance the incore state tracking after exchanging a mapping. */
+static inline void
+xmi_advance(
+ struct xfs_exchmaps_intent *xmi,
+ const struct xfs_bmbt_irec *irec)
+{
+ xmi->xmi_startoff1 += irec->br_blockcount;
+ xmi->xmi_startoff2 += irec->br_blockcount;
+ xmi->xmi_blockcount -= irec->br_blockcount;
+}
+
+/* Do we still have more mappings to exchange? */
+static inline bool
+xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
+{
+ return xmi->xmi_blockcount > 0;
+}
+
+/* Do we have post-operation cleanups to perform? */
+static inline bool
+xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
+{
+ return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
+ XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
+ __XFS_EXCHMAPS_INO2_SHORTFORM);
+}
+
+/* Check all mappings to make sure we can actually exchange them. */
+int
+xfs_exchmaps_check_forks(
+ struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req)
+{
+ struct xfs_ifork *ifp1, *ifp2;
+ int whichfork = xfs_exchmaps_reqfork(req);
+
+ /* No fork? */
+ ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
+ ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
+ if (!ifp1 || !ifp2)
+ return -EINVAL;
+
+ /* We don't know how to exchange local format forks. */
+ if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
+ ifp2->if_format == XFS_DINODE_FMT_LOCAL)
+ return -EINVAL;
+
+ return 0;
+}
+
+#ifdef CONFIG_XFS_QUOTA
+/* Log the actual updates to the quota accounting. */
+static inline void
+xfs_exchmaps_update_quota(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec1,
+ struct xfs_bmbt_irec *irec2)
+{
+ int64_t ip1_delta = 0, ip2_delta = 0;
+ unsigned int qflag;
+
+ qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
+ XFS_TRANS_DQ_BCOUNT;
+
+ if (xfs_bmap_is_real_extent(irec1)) {
+ ip1_delta -= irec1->br_blockcount;
+ ip2_delta += irec1->br_blockcount;
+ }
+
+ if (xfs_bmap_is_real_extent(irec2)) {
+ ip1_delta += irec2->br_blockcount;
+ ip2_delta -= irec2->br_blockcount;
+ }
+
+ xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
+ xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
+}
+#else
+# define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2) ((void)0)
+#endif
+
+/* Decide if we want to skip this mapping from file1. */
+static inline bool
+xfs_exchmaps_can_skip_mapping(
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec)
+{
+ struct xfs_mount *mp = xmi->xmi_ip1->i_mount;
+
+ /* Do not skip this mapping if the caller did not tell us to. */
+ if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
+ return false;
+
+ /* Do not skip mapped, written mappings. */
+ if (xfs_bmap_is_written_extent(irec))
+ return false;
+
+ /*
+ * The mapping is unwritten or a hole. It cannot be a delalloc
+ * reservation because we already excluded those. It cannot be an
+ * unwritten extent with dirty page cache because we flushed the page
+ * cache. For files where the allocation unit is 1FSB (files on the
+ * data dev, rt files if the extent size is 1FSB), we can safely
+ * skip this mapping.
+ */
+ if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
+ return true;
+
+ /*
+ * For a realtime file with a multi-fsb allocation unit, the decision
+ * is trickier because we can only swap full allocation units.
+ * Unwritten mappings can appear in the middle of an rtx if the rtx is
+ * partially written, but they can also appear for preallocations.
+ *
+ * If the mapping is a hole, skip it entirely. Holes should align with
+ * rtx boundaries.
+ */
+ if (!xfs_bmap_is_real_extent(irec))
+ return true;
+
+ /*
+ * All mappings below this point are unwritten.
+ *
+ * - If the beginning is not aligned to an rtx, trim the end of the
+ * mapping so that it does not cross an rtx boundary, and swap it.
+ *
+ * - If both ends are aligned to an rtx, skip the entire mapping.
+ */
+ if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
+ xfs_fileoff_t new_end;
+
+ new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
+ irec->br_blockcount = min(irec->br_blockcount,
+ new_end - irec->br_startoff);
+ return false;
+ }
+ if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
+ return true;
+
+ /*
+ * All mappings below this point are unwritten, start on an rtx
+ * boundary, and do not end on an rtx boundary.
+ *
+ * - If the mapping is longer than one rtx, trim the end of the mapping
+ * down to an rtx boundary and skip it.
+ *
+ * - The mapping is shorter than one rtx. Swap it.
+ */
+ if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
+ xfs_fileoff_t new_end;
+
+ new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
+ mp->m_sb.sb_rextsize);
+ irec->br_blockcount = new_end - irec->br_startoff;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Walk forward through the file ranges in @xmi until we find two different
+ * mappings to exchange. If there is work to do, return the mappings;
+ * otherwise we've reached the end of the range and xmi_blockcount will be
+ * zero.
+ *
+ * If the walk skips over a pair of mappings to the same storage, save them as
+ * the left records in @adj (if provided) so that the simulation phase can
+ * avoid an extra lookup.
+ */
+static int
+xfs_exchmaps_find_mappings(
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec1,
+ struct xfs_bmbt_irec *irec2,
+ struct xfs_exchmaps_adjacent *adj)
+{
+ int nimaps;
+ int bmap_flags;
+ int error;
+
+ bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
+
+ for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
+ /* Read mapping from the first file */
+ nimaps = 1;
+ error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
+ xmi->xmi_blockcount, irec1, &nimaps,
+ bmap_flags);
+ if (error)
+ return error;
+ if (nimaps != 1 ||
+ irec1->br_startblock == DELAYSTARTBLOCK ||
+ irec1->br_startoff != xmi->xmi_startoff1) {
+ /*
+ * We should never get no mapping or a delalloc mapping
+ * or something that doesn't match what we asked for,
+ * since the caller flushed both inodes and we hold the
+ * ILOCKs for both inodes.
+ */
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
+ trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
+ continue;
+ }
+
+ /* Read mapping from the second file */
+ nimaps = 1;
+ error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
+ irec1->br_blockcount, irec2, &nimaps,
+ bmap_flags);
+ if (error)
+ return error;
+ if (nimaps != 1 ||
+ irec2->br_startblock == DELAYSTARTBLOCK ||
+ irec2->br_startoff != xmi->xmi_startoff2) {
+ /*
+ * We should never get no mapping or a delalloc mapping
+ * or something that doesn't match what we asked for,
+ * since the caller flushed both inodes and we hold the
+ * ILOCKs for both inodes.
+ */
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ /*
+ * We can only exchange as many blocks as the smaller of the
+ * two mapping maps.
+ */
+ irec1->br_blockcount = min(irec1->br_blockcount,
+ irec2->br_blockcount);
+
+ trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
+ trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
+
+ /* We found something to exchange, so return it. */
+ if (irec1->br_startblock != irec2->br_startblock)
+ return 0;
+
+ /*
+ * Two mappings pointing to the same physical block must not
+ * have different states; that's filesystem corruption. Move
+ * on to the next mapping if they're both holes or both point
+ * to the same physical space extent.
+ */
+ if (irec1->br_state != irec2->br_state) {
+ xfs_bmap_mark_sick(xmi->xmi_ip1,
+ xfs_exchmaps_whichfork(xmi));
+ xfs_bmap_mark_sick(xmi->xmi_ip2,
+ xfs_exchmaps_whichfork(xmi));
+ return -EFSCORRUPTED;
+ }
+
+ /*
+ * Save the mappings if we're estimating work and skipping
+ * these identical mappings.
+ */
+ if (adj) {
+ memcpy(&adj->left1, irec1, sizeof(*irec1));
+ memcpy(&adj->left2, irec2, sizeof(*irec2));
+ }
+ }
+
+ return 0;
+}
+
+/* Exchange these two mappings. */
+static void
+xfs_exchmaps_one_step(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi,
+ struct xfs_bmbt_irec *irec1,
+ struct xfs_bmbt_irec *irec2)
+{
+ int whichfork = xfs_exchmaps_whichfork(xmi);
+
+ xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
+
+ /* Remove both mappings. */
+ xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
+ xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
+
+ /*
+ * Re-add both mappings. We exchange the file offsets between the two
+ * maps and add the opposite map, which has the effect of filling the
+ * logical offsets we just unmapped, but with with the physical mapping
+ * information exchanged.
+ */
+ swap(irec1->br_startoff, irec2->br_startoff);
+ xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
+ xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
+
+ /* Make sure we're not adding mappings past EOF. */
+ if (whichfork == XFS_DATA_FORK) {
+ xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
+ xmi->xmi_isize1);
+ xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
+ xmi->xmi_isize2);
+ }
+
+ /*
+ * Advance our cursor and exit. The caller (either defer ops or log
+ * recovery) will log the XMD item, and if *blockcount is nonzero, it
+ * will log a new XMI item for the remainder and call us back.
+ */
+ xmi_advance(xmi, irec1);
+}
+
+/* Convert inode2's leaf attr fork back to shortform, if possible.. */
+STATIC int
+xfs_exchmaps_attr_to_sf(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_da_args args = {
+ .dp = xmi->xmi_ip2,
+ .geo = tp->t_mountp->m_attr_geo,
+ .whichfork = XFS_ATTR_FORK,
+ .trans = tp,
+ .owner = xmi->xmi_ip2->i_ino,
+ };
+ struct xfs_buf *bp;
+ int forkoff;
+ int error;
+
+ if (!xfs_attr_is_leaf(xmi->xmi_ip2))
+ return 0;
+
+ error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
+ &bp);
+ if (error)
+ return error;
+
+ forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
+ if (forkoff == 0)
+ return 0;
+
+ return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
+}
+
+/* Convert inode2's block dir fork back to shortform, if possible.. */
+STATIC int
+xfs_exchmaps_dir_to_sf(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_da_args args = {
+ .dp = xmi->xmi_ip2,
+ .geo = tp->t_mountp->m_dir_geo,
+ .whichfork = XFS_DATA_FORK,
+ .trans = tp,
+ .owner = xmi->xmi_ip2->i_ino,
+ };
+ struct xfs_dir2_sf_hdr sfh;
+ struct xfs_buf *bp;
+ int size;
+ int error = 0;
+
+ if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
+ return error;
+
+ error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
+ if (error)
+ return error;
+
+ size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
+ if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
+ return 0;
+
+ return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
+}
+
+/* Convert inode2's remote symlink target back to shortform, if possible. */
+STATIC int
+xfs_exchmaps_link_to_sf(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_inode *ip = xmi->xmi_ip2;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ char *buf;
+ int error;
+
+ if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
+ ip->i_disk_size > xfs_inode_data_fork_size(ip))
+ return 0;
+
+ /* Read the current symlink target into a buffer. */
+ buf = kmalloc(ip->i_disk_size + 1,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
+ if (!buf) {
+ ASSERT(0);
+ return -ENOMEM;
+ }
+
+ error = xfs_symlink_remote_read(ip, buf);
+ if (error)
+ goto free;
+
+ /* Remove the blocks. */
+ error = xfs_symlink_remote_truncate(tp, ip);
+ if (error)
+ goto free;
+
+ /* Convert fork to local format and log our changes. */
+ xfs_idestroy_fork(ifp);
+ ifp->if_bytes = 0;
+ ifp->if_format = XFS_DINODE_FMT_LOCAL;
+ xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
+free:
+ kfree(buf);
+ return error;
+}
+
+/* Clear the reflink flag after an exchange. */
+static inline void
+xfs_exchmaps_clear_reflink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ trace_xfs_reflink_unset_inode_flag(ip);
+
+ ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Finish whatever work might come after an exchange operation. */
+static int
+xfs_exchmaps_do_postop_work(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
+ int error = 0;
+
+ if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
+ error = xfs_exchmaps_attr_to_sf(tp, xmi);
+ else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
+ error = xfs_exchmaps_dir_to_sf(tp, xmi);
+ else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
+ error = xfs_exchmaps_link_to_sf(tp, xmi);
+ xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
+ if (error)
+ return error;
+ }
+
+ if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
+ xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
+ xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
+ }
+
+ if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
+ xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
+ xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
+ }
+
+ return 0;
+}
+
+/* Finish one step in a mapping exchange operation, possibly relogging. */
+int
+xfs_exchmaps_finish_one(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ struct xfs_bmbt_irec irec1, irec2;
+ int error;
+
+ if (xmi_has_more_exchange_work(xmi)) {
+ /*
+ * If the operation state says that some range of the files
+ * have not yet been exchanged, look for mappings in that range
+ * to exchange. If we find some mappings, exchange them.
+ */
+ error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
+ if (error)
+ return error;
+
+ if (xmi_has_more_exchange_work(xmi))
+ xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
+
+ /*
+ * If the caller asked us to exchange the file sizes after the
+ * exchange and either we just exchanged the last mappings in
+ * the range or we didn't find anything to exchange, update the
+ * ondisk file sizes.
+ */
+ if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
+ !xmi_has_more_exchange_work(xmi)) {
+ xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
+ xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
+
+ xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
+ xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
+ }
+ } else if (xmi_has_postop_work(xmi)) {
+ /*
+ * Now that we're finished with the exchange operation,
+ * complete the post-op cleanup work.
+ */
+ error = xfs_exchmaps_do_postop_work(tp, xmi);
+ if (error)
+ return error;
+ }
+
+ if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
+ return -EIO;
+
+ /* If we still have work to do, ask for a new transaction. */
+ if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
+ trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
+ return -EAGAIN;
+ }
+
+ /*
+ * If we reach here, we've finished all the exchange work and the post
+ * operation work. The last thing we need to do before returning to
+ * the caller is to make sure that COW forks are set up correctly.
+ */
+ if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
+ xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
+ xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
+ }
+
+ return 0;
+}
+
+/*
+ * Compute the amount of bmbt blocks we should reserve for each file. In the
+ * worst case, each exchange will fill a hole with a new mapping, which could
+ * result in a btree split every time we add a new leaf block.
+ */
+static inline uint64_t
+xfs_exchmaps_bmbt_blocks(
+ struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req)
+{
+ return howmany_64(req->nr_exchanges,
+ XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
+ XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
+}
+
+/* Compute the space we should reserve for the rmap btree expansions. */
+static inline uint64_t
+xfs_exchmaps_rmapbt_blocks(
+ struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req)
+{
+ if (!xfs_has_rmapbt(mp))
+ return 0;
+ if (XFS_IS_REALTIME_INODE(req->ip1))
+ return 0;
+
+ return howmany_64(req->nr_exchanges,
+ XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
+ XFS_RMAPADD_SPACE_RES(mp);
+}
+
+/* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
+int
+xfs_exchmaps_estimate_overhead(
+ struct xfs_exchmaps_req *req)
+{
+ struct xfs_mount *mp = req->ip1->i_mount;
+ xfs_filblks_t bmbt_blocks;
+ xfs_filblks_t rmapbt_blocks;
+ xfs_filblks_t resblks = req->resblks;
+
+ /*
+ * Compute the number of bmbt and rmapbt blocks we might need to handle
+ * the estimated number of exchanges.
+ */
+ bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
+ rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
+
+ trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
+
+ /* Make sure the change in file block count doesn't overflow. */
+ if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
+ return -EFBIG;
+ if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
+ return -EFBIG;
+
+ /*
+ * Add together the number of blocks we need to handle btree growth,
+ * then add it to the number of blocks we need to reserve to this
+ * transaction.
+ */
+ if (check_add_overflow(resblks, bmbt_blocks, &resblks))
+ return -ENOSPC;
+ if (check_add_overflow(resblks, bmbt_blocks, &resblks))
+ return -ENOSPC;
+ if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
+ return -ENOSPC;
+ if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
+ return -ENOSPC;
+
+ /* Can't actually reserve more than UINT_MAX blocks. */
+ if (req->resblks > UINT_MAX)
+ return -ENOSPC;
+
+ req->resblks = resblks;
+ trace_xfs_exchmaps_final_estimate(req);
+ return 0;
+}
+
+/* Decide if we can merge two real mappings. */
+static inline bool
+xmi_can_merge(
+ const struct xfs_bmbt_irec *b1,
+ const struct xfs_bmbt_irec *b2)
+{
+ /* Don't merge holes. */
+ if (b1->br_startblock == HOLESTARTBLOCK ||
+ b2->br_startblock == HOLESTARTBLOCK)
+ return false;
+
+ /* We don't merge holes. */
+ if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
+ return false;
+
+ if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
+ b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
+ b1->br_state == b2->br_state &&
+ b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ return true;
+
+ return false;
+}
+
+/*
+ * Decide if we can merge three mappings. Caller must ensure all three
+ * mappings must not be holes or delalloc reservations.
+ */
+static inline bool
+xmi_can_merge_all(
+ const struct xfs_bmbt_irec *l,
+ const struct xfs_bmbt_irec *m,
+ const struct xfs_bmbt_irec *r)
+{
+ xfs_filblks_t new_len;
+
+ new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
+ return new_len <= XFS_MAX_BMBT_EXTLEN;
+}
+
+#define CLEFT_CONTIG 0x01
+#define CRIGHT_CONTIG 0x02
+#define CHOLE 0x04
+#define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
+
+#define NLEFT_CONTIG 0x10
+#define NRIGHT_CONTIG 0x20
+#define NHOLE 0x40
+#define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
+
+/* Estimate the effect of a single exchange on mapping count. */
+static inline int
+xmi_delta_nextents_step(
+ struct xfs_mount *mp,
+ const struct xfs_bmbt_irec *left,
+ const struct xfs_bmbt_irec *curr,
+ const struct xfs_bmbt_irec *new,
+ const struct xfs_bmbt_irec *right)
+{
+ bool lhole, rhole, chole, nhole;
+ unsigned int state = 0;
+ int ret = 0;
+
+ lhole = left->br_startblock == HOLESTARTBLOCK;
+ rhole = right->br_startblock == HOLESTARTBLOCK;
+ chole = curr->br_startblock == HOLESTARTBLOCK;
+ nhole = new->br_startblock == HOLESTARTBLOCK;
+
+ if (chole)
+ state |= CHOLE;
+ if (!lhole && !chole && xmi_can_merge(left, curr))
+ state |= CLEFT_CONTIG;
+ if (!rhole && !chole && xmi_can_merge(curr, right))
+ state |= CRIGHT_CONTIG;
+ if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
+ !xmi_can_merge_all(left, curr, right))
+ state &= ~CRIGHT_CONTIG;
+
+ if (nhole)
+ state |= NHOLE;
+ if (!lhole && !nhole && xmi_can_merge(left, new))
+ state |= NLEFT_CONTIG;
+ if (!rhole && !nhole && xmi_can_merge(new, right))
+ state |= NRIGHT_CONTIG;
+ if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
+ !xmi_can_merge_all(left, new, right))
+ state &= ~NRIGHT_CONTIG;
+
+ switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
+ case CLEFT_CONTIG | CRIGHT_CONTIG:
+ /*
+ * left/curr/right are the same mapping, so deleting curr
+ * causes 2 new mappings to be created.
+ */
+ ret += 2;
+ break;
+ case 0:
+ /*
+ * curr is not contiguous with any mapping, so we remove curr
+ * completely
+ */
+ ret--;
+ break;
+ case CHOLE:
+ /* hole, do nothing */
+ break;
+ case CLEFT_CONTIG:
+ case CRIGHT_CONTIG:
+ /* trim either left or right, no change */
+ break;
+ }
+
+ switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
+ case NLEFT_CONTIG | NRIGHT_CONTIG:
+ /*
+ * left/curr/right will become the same mapping, so adding
+ * curr causes the deletion of right.
+ */
+ ret--;
+ break;
+ case 0:
+ /* new is not contiguous with any mapping */
+ ret++;
+ break;
+ case NHOLE:
+ /* hole, do nothing. */
+ break;
+ case NLEFT_CONTIG:
+ case NRIGHT_CONTIG:
+ /* new is absorbed into left or right, no change */
+ break;
+ }
+
+ trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
+ state);
+ return ret;
+}
+
+/* Make sure we don't overflow the extent (mapping) counters. */
+static inline int
+xmi_ensure_delta_nextents(
+ struct xfs_exchmaps_req *req,
+ struct xfs_inode *ip,
+ int64_t delta)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ int whichfork = xfs_exchmaps_reqfork(req);
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
+ uint64_t new_nextents;
+ xfs_extnum_t max_nextents;
+
+ if (delta < 0)
+ return 0;
+
+ /*
+ * It's always an error if the delta causes integer overflow. delta
+ * needs an explicit cast here to avoid warnings about implicit casts
+ * coded into the overflow check.
+ */
+ if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
+ &new_nextents))
+ return -EFBIG;
+
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
+ new_nextents > 10)
+ return -EFBIG;
+
+ /*
+ * We always promote both inodes to have large extent counts if the
+ * superblock feature is enabled, so we only need to check against the
+ * theoretical maximum.
+ */
+ max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
+ whichfork);
+ if (new_nextents > max_nextents)
+ return -EFBIG;
+
+ return 0;
+}
+
+/* Find the next mapping after irec. */
+static inline int
+xmi_next(
+ struct xfs_inode *ip,
+ int bmap_flags,
+ const struct xfs_bmbt_irec *irec,
+ struct xfs_bmbt_irec *nrec)
+{
+ xfs_fileoff_t off;
+ xfs_filblks_t blockcount;
+ int nimaps = 1;
+ int error;
+
+ off = irec->br_startoff + irec->br_blockcount;
+ blockcount = XFS_MAX_FILEOFF - off;
+ error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
+ if (error)
+ return error;
+ if (nrec->br_startblock == DELAYSTARTBLOCK ||
+ nrec->br_startoff != off) {
+ /*
+ * If we don't get the mapping we want, return a zero-length
+ * mapping, which our estimator function will pretend is a hole.
+ * We shouldn't get delalloc reservations.
+ */
+ nrec->br_startblock = HOLESTARTBLOCK;
+ }
+
+ return 0;
+}
+
+int __init
+xfs_exchmaps_intent_init_cache(void)
+{
+ xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
+ sizeof(struct xfs_exchmaps_intent),
+ 0, 0, NULL);
+
+ return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
+}
+
+void
+xfs_exchmaps_intent_destroy_cache(void)
+{
+ kmem_cache_destroy(xfs_exchmaps_intent_cache);
+ xfs_exchmaps_intent_cache = NULL;
+}
+
+/*
+ * Decide if we will exchange the reflink flags between the two files after the
+ * exchange. The only time we want to do this is if we're exchanging all
+ * mappings under EOF and the inode reflink flags have different states.
+ */
+static inline bool
+xmi_can_exchange_reflink_flags(
+ const struct xfs_exchmaps_req *req,
+ unsigned int reflink_state)
+{
+ struct xfs_mount *mp = req->ip1->i_mount;
+
+ if (hweight32(reflink_state) != 1)
+ return false;
+ if (req->startoff1 != 0 || req->startoff2 != 0)
+ return false;
+ if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
+ return false;
+ if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
+ return false;
+ return true;
+}
+
+
+/* Allocate and initialize a new incore intent item from a request. */
+struct xfs_exchmaps_intent *
+xfs_exchmaps_init_intent(
+ const struct xfs_exchmaps_req *req)
+{
+ struct xfs_exchmaps_intent *xmi;
+ unsigned int rs = 0;
+
+ xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
+ GFP_NOFS | __GFP_NOFAIL);
+ INIT_LIST_HEAD(&xmi->xmi_list);
+ xmi->xmi_ip1 = req->ip1;
+ xmi->xmi_ip2 = req->ip2;
+ xmi->xmi_startoff1 = req->startoff1;
+ xmi->xmi_startoff2 = req->startoff2;
+ xmi->xmi_blockcount = req->blockcount;
+ xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
+ xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
+
+ if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
+ xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
+ return xmi;
+ }
+
+ if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
+ xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
+ xmi->xmi_isize1 = req->ip2->i_disk_size;
+ xmi->xmi_isize2 = req->ip1->i_disk_size;
+ }
+
+ /* Record the state of each inode's reflink flag before the op. */
+ if (xfs_is_reflink_inode(req->ip1))
+ rs |= 1;
+ if (xfs_is_reflink_inode(req->ip2))
+ rs |= 2;
+
+ /*
+ * Figure out if we're clearing the reflink flags (which effectively
+ * exchanges them) after the operation.
+ */
+ if (xmi_can_exchange_reflink_flags(req, rs)) {
+ if (rs & 1)
+ xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
+ if (rs & 2)
+ xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
+ }
+
+ if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
+ S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
+ xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
+
+ return xmi;
+}
+
+/*
+ * Estimate the number of exchange operations and the number of file blocks
+ * in each file that will be affected by the exchange operation.
+ */
+int
+xfs_exchmaps_estimate(
+ struct xfs_exchmaps_req *req)
+{
+ struct xfs_exchmaps_intent *xmi;
+ struct xfs_bmbt_irec irec1, irec2;
+ struct xfs_exchmaps_adjacent adj = ADJACENT_INIT;
+ xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
+ int64_t d_nexts1, d_nexts2;
+ int bmap_flags;
+ int error;
+
+ ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
+
+ bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
+ xmi = xfs_exchmaps_init_intent(req);
+
+ /*
+ * To guard against the possibility of overflowing the extent counters,
+ * we have to estimate an upper bound on the potential increase in that
+ * counter. We can split the mapping at each end of the range, and for
+ * each step of the exchange we can split the mapping that we're
+ * working on if the mappings do not align.
+ */
+ d_nexts1 = d_nexts2 = 3;
+
+ while (xmi_has_more_exchange_work(xmi)) {
+ /*
+ * Walk through the file ranges until we find something to
+ * exchange. Because we're simulating the exchange, pass in
+ * adj to capture skipped mappings for correct estimation of
+ * bmbt record merges.
+ */
+ error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
+ if (error)
+ goto out_free;
+ if (!xmi_has_more_exchange_work(xmi))
+ break;
+
+ /* Update accounting. */
+ if (xfs_bmap_is_real_extent(&irec1))
+ ip1_blocks += irec1.br_blockcount;
+ if (xfs_bmap_is_real_extent(&irec2))
+ ip2_blocks += irec2.br_blockcount;
+ req->nr_exchanges++;
+
+ /* Read the next mappings from both files. */
+ error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
+ if (error)
+ goto out_free;
+
+ error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
+ if (error)
+ goto out_free;
+
+ /* Update extent count deltas. */
+ d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
+ &adj.left1, &irec1, &irec2, &adj.right1);
+
+ d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
+ &adj.left2, &irec2, &irec1, &adj.right2);
+
+ /* Now pretend we exchanged the mappings. */
+ if (xmi_can_merge(&adj.left2, &irec1))
+ adj.left2.br_blockcount += irec1.br_blockcount;
+ else
+ memcpy(&adj.left2, &irec1, sizeof(irec1));
+
+ if (xmi_can_merge(&adj.left1, &irec2))
+ adj.left1.br_blockcount += irec2.br_blockcount;
+ else
+ memcpy(&adj.left1, &irec2, sizeof(irec2));
+
+ xmi_advance(xmi, &irec1);
+ }
+
+ /* Account for the blocks that are being exchanged. */
+ if (XFS_IS_REALTIME_INODE(req->ip1) &&
+ xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
+ req->ip1_rtbcount = ip1_blocks;
+ req->ip2_rtbcount = ip2_blocks;
+ } else {
+ req->ip1_bcount = ip1_blocks;
+ req->ip2_bcount = ip2_blocks;
+ }
+
+ /*
+ * Make sure that both forks have enough slack left in their extent
+ * counters that the exchange operation will not overflow.
+ */
+ trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
+ if (req->ip1 == req->ip2) {
+ error = xmi_ensure_delta_nextents(req, req->ip1,
+ d_nexts1 + d_nexts2);
+ } else {
+ error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
+ if (error)
+ goto out_free;
+ error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
+ }
+ if (error)
+ goto out_free;
+
+ trace_xfs_exchmaps_initial_estimate(req);
+ error = xfs_exchmaps_estimate_overhead(req);
+out_free:
+ kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
+ return error;
+}
+
+/* Set the reflink flag before an operation. */
+static inline void
+xfs_exchmaps_set_reflink(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ trace_xfs_reflink_set_inode_flag(ip);
+
+ ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/*
+ * If either file has shared blocks and we're exchanging data forks, we must
+ * flag the other file as having shared blocks so that we get the shared-block
+ * rmap functions if we need to fix up the rmaps.
+ */
+void
+xfs_exchmaps_ensure_reflink(
+ struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi)
+{
+ unsigned int rs = 0;
+
+ if (xfs_is_reflink_inode(xmi->xmi_ip1))
+ rs |= 1;
+ if (xfs_is_reflink_inode(xmi->xmi_ip2))
+ rs |= 2;
+
+ if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
+ xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
+
+ if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
+ xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
+}
+
+/* Set the large extent count flag before an operation if needed. */
+static inline void
+xfs_exchmaps_ensure_large_extent_counts(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ if (xfs_inode_has_large_extent_counts(ip))
+ return;
+
+ ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Widen the extent counter fields of both inodes if necessary. */
+void
+xfs_exchmaps_upgrade_extent_counts(
+ struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi)
+{
+ if (!xfs_has_large_extent_counts(tp->t_mountp))
+ return;
+
+ xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
+ xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
+}
+
+/*
+ * Schedule an exchange a range of mappings from one inode to another.
+ *
+ * The use of file mapping exchange log intent items ensures the operation can
+ * be resumed even if the system goes down. The caller must commit the
+ * transaction to start the work.
+ *
+ * The caller must ensure the inodes must be joined to the transaction and
+ * ILOCKd; they will still be joined to the transaction at exit.
+ */
+void
+xfs_exchange_mappings(
+ struct xfs_trans *tp,
+ const struct xfs_exchmaps_req *req)
+{
+ struct xfs_exchmaps_intent *xmi;
+
+ BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
+
+ xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
+ ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
+ if (req->flags & XFS_EXCHMAPS_SET_SIZES)
+ ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
+ ASSERT(xfs_has_exchange_range(tp->t_mountp));
+
+ if (req->blockcount == 0)
+ return;
+
+ xmi = xfs_exchmaps_init_intent(req);
+ xfs_exchmaps_defer_add(tp, xmi);
+ xfs_exchmaps_ensure_reflink(tp, xmi);
+ xfs_exchmaps_upgrade_extent_counts(tp, xmi);
+}
diff --git a/fs/xfs/libxfs/xfs_exchmaps.h b/fs/xfs/libxfs/xfs_exchmaps.h
new file mode 100644
index 000000000000..fa822dff202a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_exchmaps.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_EXCHMAPS_H__
+#define __XFS_EXCHMAPS_H__
+
+/* In-core deferred operation info about a file mapping exchange request. */
+struct xfs_exchmaps_intent {
+ /* List of other incore deferred work. */
+ struct list_head xmi_list;
+
+ /* Inodes participating in the operation. */
+ struct xfs_inode *xmi_ip1;
+ struct xfs_inode *xmi_ip2;
+
+ /* File offset range information. */
+ xfs_fileoff_t xmi_startoff1;
+ xfs_fileoff_t xmi_startoff2;
+ xfs_filblks_t xmi_blockcount;
+
+ /* Set these file sizes after the operation, unless negative. */
+ xfs_fsize_t xmi_isize1;
+ xfs_fsize_t xmi_isize2;
+
+ uint64_t xmi_flags; /* XFS_EXCHMAPS_* flags */
+};
+
+/* Try to convert inode2 from block to short format at the end, if possible. */
+#define __XFS_EXCHMAPS_INO2_SHORTFORM (1ULL << 63)
+
+#define XFS_EXCHMAPS_INTERNAL_FLAGS (__XFS_EXCHMAPS_INO2_SHORTFORM)
+
+/* flags that can be passed to xfs_exchmaps_{estimate,mappings} */
+#define XFS_EXCHMAPS_PARAMS (XFS_EXCHMAPS_ATTR_FORK | \
+ XFS_EXCHMAPS_SET_SIZES | \
+ XFS_EXCHMAPS_INO1_WRITTEN)
+
+static inline int
+xfs_exchmaps_whichfork(const struct xfs_exchmaps_intent *xmi)
+{
+ if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
+ return XFS_ATTR_FORK;
+ return XFS_DATA_FORK;
+}
+
+/* Parameters for a mapping exchange request. */
+struct xfs_exchmaps_req {
+ /* Inodes participating in the operation. */
+ struct xfs_inode *ip1;
+ struct xfs_inode *ip2;
+
+ /* File offset range information. */
+ xfs_fileoff_t startoff1;
+ xfs_fileoff_t startoff2;
+ xfs_filblks_t blockcount;
+
+ /* XFS_EXCHMAPS_* operation flags */
+ uint64_t flags;
+
+ /*
+ * Fields below this line are filled out by xfs_exchmaps_estimate;
+ * callers should initialize this part of the struct to zero.
+ */
+
+ /*
+ * Data device blocks to be moved out of ip1, and free space needed to
+ * handle the bmbt changes.
+ */
+ xfs_filblks_t ip1_bcount;
+
+ /*
+ * Data device blocks to be moved out of ip2, and free space needed to
+ * handle the bmbt changes.
+ */
+ xfs_filblks_t ip2_bcount;
+
+ /* rt blocks to be moved out of ip1. */
+ xfs_filblks_t ip1_rtbcount;
+
+ /* rt blocks to be moved out of ip2. */
+ xfs_filblks_t ip2_rtbcount;
+
+ /* Free space needed to handle the bmbt changes */
+ unsigned long long resblks;
+
+ /* Number of exchanges needed to complete the operation */
+ unsigned long long nr_exchanges;
+};
+
+static inline int
+xfs_exchmaps_reqfork(const struct xfs_exchmaps_req *req)
+{
+ if (req->flags & XFS_EXCHMAPS_ATTR_FORK)
+ return XFS_ATTR_FORK;
+ return XFS_DATA_FORK;
+}
+
+int xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req *req);
+int xfs_exchmaps_estimate(struct xfs_exchmaps_req *req);
+
+extern struct kmem_cache *xfs_exchmaps_intent_cache;
+
+int __init xfs_exchmaps_intent_init_cache(void);
+void xfs_exchmaps_intent_destroy_cache(void);
+
+struct xfs_exchmaps_intent *xfs_exchmaps_init_intent(
+ const struct xfs_exchmaps_req *req);
+void xfs_exchmaps_ensure_reflink(struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi);
+void xfs_exchmaps_upgrade_extent_counts(struct xfs_trans *tp,
+ const struct xfs_exchmaps_intent *xmi);
+
+int xfs_exchmaps_finish_one(struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi);
+
+int xfs_exchmaps_check_forks(struct xfs_mount *mp,
+ const struct xfs_exchmaps_req *req);
+
+void xfs_exchange_mappings(struct xfs_trans *tp,
+ const struct xfs_exchmaps_req *req);
+
+#endif /* __XFS_EXCHMAPS_H__ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 382ab1e71c0b..61f51becff4f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -367,19 +367,23 @@ xfs_sb_has_ro_compat_feature(
return (sbp->sb_features_ro_compat & feature) != 0;
}
-#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
-#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
-#define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */
-#define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */
-#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */
-#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
+#define XFS_SB_FEAT_INCOMPAT_FTYPE (1 << 0) /* filetype in dirent */
+#define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */
+#define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */
+#define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */
+#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */
+#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
+#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */
+#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */
#define XFS_SB_FEAT_INCOMPAT_ALL \
- (XFS_SB_FEAT_INCOMPAT_FTYPE| \
- XFS_SB_FEAT_INCOMPAT_SPINODES| \
- XFS_SB_FEAT_INCOMPAT_META_UUID| \
- XFS_SB_FEAT_INCOMPAT_BIGTIME| \
- XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR| \
- XFS_SB_FEAT_INCOMPAT_NREXT64)
+ (XFS_SB_FEAT_INCOMPAT_FTYPE | \
+ XFS_SB_FEAT_INCOMPAT_SPINODES | \
+ XFS_SB_FEAT_INCOMPAT_META_UUID | \
+ XFS_SB_FEAT_INCOMPAT_BIGTIME | \
+ XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | \
+ XFS_SB_FEAT_INCOMPAT_NREXT64 | \
+ XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \
+ XFS_SB_FEAT_INCOMPAT_PARENT)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
@@ -477,15 +481,9 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
#define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION)
/*
- * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the
- * arrays below.
- */
-#define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1)
-
-/*
- * The second word of agf_levels in the first a.g. overlaps the EFS
- * superblock's magic number. Since the magic numbers valid for EFS
- * are > 64k, our value cannot be confused for an EFS superblock's.
+ * agf_cnt_level in the first AGF overlaps the EFS superblock's magic number.
+ * Since the magic numbers valid for EFS are > 64k, our value cannot be confused
+ * for an EFS superblock.
*/
typedef struct xfs_agf {
@@ -499,8 +497,13 @@ typedef struct xfs_agf {
/*
* Freespace and rmap information
*/
- __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */
- __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */
+ __be32 agf_bno_root; /* bnobt root block */
+ __be32 agf_cnt_root; /* cntbt root block */
+ __be32 agf_rmap_root; /* rmapbt root block */
+
+ __be32 agf_bno_level; /* bnobt btree levels */
+ __be32 agf_cnt_level; /* cntbt btree levels */
+ __be32 agf_rmap_level; /* rmapbt btree levels */
__be32 agf_flfirst; /* first freelist block's index */
__be32 agf_fllast; /* last freelist block's index */
@@ -899,6 +902,12 @@ static inline uint xfs_dinode_size(int version)
#define XFS_MAXLINK ((1U << 31) - 1U)
/*
+ * Any file that hits the maximum ondisk link count should be pinned to avoid
+ * a use-after-free situation.
+ */
+#define XFS_NLINK_PINNED (~0U)
+
+/*
* Values for di_format
*
* This enum is used in string mapping in xfs_trace.h; please keep the
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 6360073865db..97996cb79aaa 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -195,6 +195,8 @@ struct xfs_fsop_geom {
#define XFS_FSOP_GEOM_SICK_PQUOTA (1 << 3) /* project quota */
#define XFS_FSOP_GEOM_SICK_RT_BITMAP (1 << 4) /* realtime bitmap */
#define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */
+#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */
+#define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */
/* Output for XFS_FS_COUNTS */
typedef struct xfs_fsop_counts {
@@ -237,6 +239,8 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_BIGTIME (1 << 21) /* 64-bit nsec timestamps */
#define XFS_FSOP_GEOM_FLAGS_INOBTCNT (1 << 22) /* inobt btree counter */
#define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */
+#define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */
+#define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */
/*
* Minimum and maximum sizes need for growth checks.
@@ -292,6 +296,7 @@ struct xfs_ag_geometry {
#define XFS_AG_GEOM_SICK_FINOBT (1 << 7) /* free inode index */
#define XFS_AG_GEOM_SICK_RMAPBT (1 << 8) /* reverse mappings */
#define XFS_AG_GEOM_SICK_REFCNTBT (1 << 9) /* reference counts */
+#define XFS_AG_GEOM_SICK_INODES (1 << 10) /* bad inodes were seen */
/*
* Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
@@ -406,6 +411,7 @@ struct xfs_bulkstat {
#define XFS_BS_SICK_XATTR (1 << 5) /* extended attributes */
#define XFS_BS_SICK_SYMLINK (1 << 6) /* symbolic link remote target */
#define XFS_BS_SICK_PARENT (1 << 7) /* parent pointers */
+#define XFS_BS_SICK_DIRTREE (1 << 8) /* directory tree structure */
/*
* Project quota id helpers (previously projid was 16bit only
@@ -629,7 +635,9 @@ typedef struct xfs_fsop_attrmulti_handlereq {
/*
* per machine unique filesystem identifier types.
*/
-typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */
+typedef struct xfs_fsid {
+ __u32 val[2]; /* file system id type */
+} xfs_fsid_t;
typedef struct xfs_fid {
__u16 fid_len; /* length of remainder */
@@ -709,9 +717,22 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */
#define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */
#define XFS_SCRUB_TYPE_FSCOUNTERS 24 /* fs summary counters */
+#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */
+#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */
+#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */
+#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 25
+#define XFS_SCRUB_TYPE_NR 29
+
+/*
+ * This special type code only applies to the vectored scrub implementation.
+ *
+ * If any of the previous scrub vectors recorded runtime errors or have
+ * sv_flags bits set that match the OFLAG bits in the barrier vector's
+ * sv_flags, set the barrier's sv_ret to -ECANCELED and return to userspace.
+ */
+#define XFS_SCRUB_TYPE_BARRIER (0xFFFFFFFF)
/* i: Repair this metadata. */
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)
@@ -757,6 +778,29 @@ struct xfs_scrub_metadata {
XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED)
#define XFS_SCRUB_FLAGS_ALL (XFS_SCRUB_FLAGS_IN | XFS_SCRUB_FLAGS_OUT)
+/* Vectored scrub calls to reduce the number of kernel transitions. */
+
+struct xfs_scrub_vec {
+ __u32 sv_type; /* XFS_SCRUB_TYPE_* */
+ __u32 sv_flags; /* XFS_SCRUB_FLAGS_* */
+ __s32 sv_ret; /* 0 or a negative error code */
+ __u32 sv_reserved; /* must be zero */
+};
+
+/* Vectored metadata scrub control structure. */
+struct xfs_scrub_vec_head {
+ __u64 svh_ino; /* inode number. */
+ __u32 svh_gen; /* inode generation. */
+ __u32 svh_agno; /* ag number. */
+ __u32 svh_flags; /* XFS_SCRUB_VEC_FLAGS_* */
+ __u16 svh_rest_us; /* wait this much time between vector items */
+ __u16 svh_nr; /* number of svh_vectors */
+ __u64 svh_reserved; /* must be zero */
+ __u64 svh_vectors; /* pointer to buffer of xfs_scrub_vec */
+};
+
+#define XFS_SCRUB_VEC_FLAGS_ALL (0)
+
/*
* ioctl limits
*/
@@ -766,6 +810,118 @@ struct xfs_scrub_metadata {
# define XFS_XATTR_LIST_MAX 65536
#endif
+/*
+ * Exchange part of file1 with part of the file that this ioctl that is being
+ * called against (which we'll call file2). Filesystems must be able to
+ * restart and complete the operation even after the system goes down.
+ */
+struct xfs_exchange_range {
+ __s32 file1_fd;
+ __u32 pad; /* must be zeroes */
+ __u64 file1_offset; /* file1 offset, bytes */
+ __u64 file2_offset; /* file2 offset, bytes */
+ __u64 length; /* bytes to exchange */
+
+ __u64 flags; /* see XFS_EXCHANGE_RANGE_* below */
+};
+
+/*
+ * Exchange file data all the way to the ends of both files, and then exchange
+ * the file sizes. This flag can be used to replace a file's contents with a
+ * different amount of data. length will be ignored.
+ */
+#define XFS_EXCHANGE_RANGE_TO_EOF (1ULL << 0)
+
+/* Flush all changes in file data and file metadata to disk before returning. */
+#define XFS_EXCHANGE_RANGE_DSYNC (1ULL << 1)
+
+/* Dry run; do all the parameter verification but do not change anything. */
+#define XFS_EXCHANGE_RANGE_DRY_RUN (1ULL << 2)
+
+/*
+ * Exchange only the parts of the two files where the file allocation units
+ * mapped to file1's range have been written to. This can accelerate
+ * scatter-gather atomic writes with a temp file if all writes are aligned to
+ * the file allocation unit.
+ */
+#define XFS_EXCHANGE_RANGE_FILE1_WRITTEN (1ULL << 3)
+
+#define XFS_EXCHANGE_RANGE_ALL_FLAGS (XFS_EXCHANGE_RANGE_TO_EOF | \
+ XFS_EXCHANGE_RANGE_DSYNC | \
+ XFS_EXCHANGE_RANGE_DRY_RUN | \
+ XFS_EXCHANGE_RANGE_FILE1_WRITTEN)
+
+/* Iterating parent pointers of files. */
+
+/* target was the root directory */
+#define XFS_GETPARENTS_OFLAG_ROOT (1U << 0)
+
+/* Cursor is done iterating pptrs */
+#define XFS_GETPARENTS_OFLAG_DONE (1U << 1)
+
+#define XFS_GETPARENTS_OFLAGS_ALL (XFS_GETPARENTS_OFLAG_ROOT | \
+ XFS_GETPARENTS_OFLAG_DONE)
+
+#define XFS_GETPARENTS_IFLAGS_ALL (0)
+
+struct xfs_getparents_rec {
+ struct xfs_handle gpr_parent; /* Handle to parent */
+ __u32 gpr_reclen; /* Length of entire record */
+ __u32 gpr_reserved; /* zero */
+ char gpr_name[]; /* Null-terminated filename */
+};
+
+/* Iterate through this file's directory parent pointers */
+struct xfs_getparents {
+ /*
+ * Structure to track progress in iterating the parent pointers.
+ * Must be initialized to zeroes before the first ioctl call, and
+ * not touched by callers after that.
+ */
+ struct xfs_attrlist_cursor gp_cursor;
+
+ /* Input flags: XFS_GETPARENTS_IFLAG* */
+ __u16 gp_iflags;
+
+ /* Output flags: XFS_GETPARENTS_OFLAG* */
+ __u16 gp_oflags;
+
+ /* Size of the gp_buffer in bytes */
+ __u32 gp_bufsize;
+
+ /* Must be set to zero */
+ __u64 gp_reserved;
+
+ /* Pointer to a buffer in which to place xfs_getparents_rec */
+ __u64 gp_buffer;
+};
+
+static inline struct xfs_getparents_rec *
+xfs_getparents_first_rec(struct xfs_getparents *gp)
+{
+ return (struct xfs_getparents_rec *)(uintptr_t)gp->gp_buffer;
+}
+
+static inline struct xfs_getparents_rec *
+xfs_getparents_next_rec(struct xfs_getparents *gp,
+ struct xfs_getparents_rec *gpr)
+{
+ void *next = ((void *)gpr + gpr->gpr_reclen);
+ void *end = (void *)(uintptr_t)(gp->gp_buffer + gp->gp_bufsize);
+
+ if (next >= end)
+ return NULL;
+
+ return next;
+}
+
+/* Iterate through this file handle's directory parent pointers. */
+struct xfs_getparents_by_handle {
+ /* Handle to file whose parents we want. */
+ struct xfs_handle gph_handle;
+
+ struct xfs_getparents gph_request;
+};
/*
* ioctl commands that are used by Linux filesystems
@@ -802,6 +958,9 @@ struct xfs_scrub_metadata {
/* XFS_IOC_GETFSMAP ------ hoisted 59 */
#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
#define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry)
+#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents)
+#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
+#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
/*
* ioctl commands that replace IRIX syssgi()'s
@@ -837,6 +996,7 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
+#define XFS_IOC_EXCHANGE_RANGE _IOWR('X', 129, struct xfs_exchange_range)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 6296993ff8f3..b0edb4288e59 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -26,21 +26,40 @@
* and the "sick" field tells us if that piece was found to need repairs.
* Therefore we can conclude that for a given sick flag value:
*
- * - checked && sick => metadata needs repair
- * - checked && !sick => metadata is ok
- * - !checked => has not been examined since mount
+ * - checked && sick => metadata needs repair
+ * - checked && !sick => metadata is ok
+ * - !checked && sick => errors have been observed during normal operation,
+ * but the metadata has not been checked thoroughly
+ * - !checked && !sick => has not been examined since mount
+ *
+ * Evidence of health problems can be sorted into three basic categories:
+ *
+ * a) Primary evidence, which signals that something is defective within the
+ * general grouping of metadata.
+ *
+ * b) Secondary evidence, which are side effects of primary problem but are
+ * not themselves problems. These can be forgotten when the primary
+ * health problems are addressed.
+ *
+ * c) Indirect evidence, which points to something being wrong in another
+ * group, but we had to release resources and this is all that's left of
+ * that state.
*/
struct xfs_mount;
struct xfs_perag;
struct xfs_inode;
struct xfs_fsop_geom;
+struct xfs_btree_cur;
+struct xfs_da_args;
/* Observable health issues for metadata spanning the entire filesystem. */
#define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */
#define XFS_SICK_FS_UQUOTA (1 << 1) /* user quota */
#define XFS_SICK_FS_GQUOTA (1 << 2) /* group quota */
#define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */
+#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */
+#define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */
/* Observable health issues for realtime volume metadata. */
#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
@@ -57,6 +76,7 @@ struct xfs_fsop_geom;
#define XFS_SICK_AG_FINOBT (1 << 7) /* free inode index */
#define XFS_SICK_AG_RMAPBT (1 << 8) /* reverse mappings */
#define XFS_SICK_AG_REFCNTBT (1 << 9) /* reference counts */
+#define XFS_SICK_AG_INODES (1 << 10) /* inactivated bad inodes */
/* Observable health issues for inode metadata. */
#define XFS_SICK_INO_CORE (1 << 0) /* inode core */
@@ -73,11 +93,17 @@ struct xfs_fsop_geom;
#define XFS_SICK_INO_DIR_ZAPPED (1 << 10) /* directory erased */
#define XFS_SICK_INO_SYMLINK_ZAPPED (1 << 11) /* symlink erased */
+/* Don't propagate sick status to ag health summary during inactivation */
+#define XFS_SICK_INO_FORGET (1 << 12)
+#define XFS_SICK_INO_DIRTREE (1 << 13) /* directory tree structure */
+
/* Primary evidence of health problems in a given group. */
#define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \
XFS_SICK_FS_UQUOTA | \
XFS_SICK_FS_GQUOTA | \
- XFS_SICK_FS_PQUOTA)
+ XFS_SICK_FS_PQUOTA | \
+ XFS_SICK_FS_QUOTACHECK | \
+ XFS_SICK_FS_NLINKS)
#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
XFS_SICK_RT_SUMMARY)
@@ -100,36 +126,94 @@ struct xfs_fsop_geom;
XFS_SICK_INO_DIR | \
XFS_SICK_INO_XATTR | \
XFS_SICK_INO_SYMLINK | \
- XFS_SICK_INO_PARENT)
+ XFS_SICK_INO_PARENT | \
+ XFS_SICK_INO_DIRTREE)
#define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \
XFS_SICK_INO_BMBTA_ZAPPED | \
XFS_SICK_INO_DIR_ZAPPED | \
XFS_SICK_INO_SYMLINK_ZAPPED)
-/* These functions must be provided by the xfs implementation. */
+/* Secondary state related to (but not primary evidence of) health problems. */
+#define XFS_SICK_FS_SECONDARY (0)
+#define XFS_SICK_RT_SECONDARY (0)
+#define XFS_SICK_AG_SECONDARY (0)
+#define XFS_SICK_INO_SECONDARY (XFS_SICK_INO_FORGET)
+
+/* Evidence of health problems elsewhere. */
+#define XFS_SICK_FS_INDIRECT (0)
+#define XFS_SICK_RT_INDIRECT (0)
+#define XFS_SICK_AG_INDIRECT (XFS_SICK_AG_INODES)
+#define XFS_SICK_INO_INDIRECT (0)
+
+/* All health masks. */
+#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
+ XFS_SICK_FS_SECONDARY | \
+ XFS_SICK_FS_INDIRECT)
+
+#define XFS_SICK_RT_ALL (XFS_SICK_RT_PRIMARY | \
+ XFS_SICK_RT_SECONDARY | \
+ XFS_SICK_RT_INDIRECT)
+
+#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
+ XFS_SICK_AG_SECONDARY | \
+ XFS_SICK_AG_INDIRECT)
+
+#define XFS_SICK_INO_ALL (XFS_SICK_INO_PRIMARY | \
+ XFS_SICK_INO_SECONDARY | \
+ XFS_SICK_INO_INDIRECT | \
+ XFS_SICK_INO_ZAPPED)
+
+/*
+ * These functions must be provided by the xfs implementation. Function
+ * behavior with respect to the first argument should be as follows:
+ *
+ * xfs_*_mark_sick: Set the sick flags and do not set checked flags.
+ * Runtime code should call this upon encountering
+ * a corruption.
+ *
+ * xfs_*_mark_corrupt: Set the sick and checked flags simultaneously.
+ * Fsck tools should call this when corruption is
+ * found.
+ *
+ * xfs_*_mark_healthy: Clear the sick flags and set the checked flags.
+ * Fsck tools should call this after correcting errors.
+ *
+ * xfs_*_measure_sickness: Return the sick and check status in the provided
+ * out parameters.
+ */
void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask);
+void xfs_fs_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
unsigned int *checked);
void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask);
+void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask);
void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
unsigned int *checked);
+void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno,
+ unsigned int mask);
void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask);
+void xfs_ag_mark_corrupt(struct xfs_perag *pag, unsigned int mask);
void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask);
void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick,
unsigned int *checked);
void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask);
+void xfs_inode_mark_corrupt(struct xfs_inode *ip, unsigned int mask);
void xfs_inode_mark_healthy(struct xfs_inode *ip, unsigned int mask);
void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
unsigned int *checked);
void xfs_health_unmount(struct xfs_mount *mp);
+void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
+void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
+void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork);
+void xfs_da_mark_sick(struct xfs_da_args *args);
/* Now some helpers. */
@@ -197,4 +281,7 @@ void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
+#define xfs_metadata_is_sick(error) \
+ (unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC))
+
#endif /* __XFS_HEALTH_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 2361a22035b0..14c81f227c5b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -27,6 +27,7 @@
#include "xfs_log.h"
#include "xfs_rmap.h"
#include "xfs_ag.h"
+#include "xfs_health.h"
/*
* Lookup a record by ino in the btree given by cur.
@@ -140,13 +141,13 @@ xfs_inobt_complain_bad_rec(
struct xfs_mount *mp = cur->bc_mp;
xfs_warn(mp,
- "%s Inode BTree record corruption in AG %d detected at %pS!",
- cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free",
- cur->bc_ag.pag->pag_agno, fa);
+ "%sbt record corruption in AG %d detected at %pS!",
+ cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
irec->ir_startino, irec->ir_count, irec->ir_freecount,
irec->ir_free, irec->ir_holemask);
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@@ -205,14 +206,17 @@ xfs_inobt_insert(
struct xfs_buf *agbp,
xfs_agino_t newino,
xfs_agino_t newlen,
- xfs_btnum_t btnum)
+ bool is_finobt)
{
struct xfs_btree_cur *cur;
xfs_agino_t thisino;
int i;
int error;
- cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
+ if (is_finobt)
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
+ else
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
for (thisino = newino;
thisino < newino + newlen;
@@ -528,16 +532,14 @@ __xfs_inobt_rec_merge(
}
/*
- * Insert a new sparse inode chunk into the associated inode btree. The inode
- * record for the sparse chunk is pre-aligned to a startino that should match
- * any pre-existing sparse inode record in the tree. This allows sparse chunks
- * to fill over time.
+ * Insert a new sparse inode chunk into the associated inode allocation btree.
+ * The inode record for the sparse chunk is pre-aligned to a startino that
+ * should match any pre-existing sparse inode record in the tree. This allows
+ * sparse chunks to fill over time.
*
- * This function supports two modes of handling preexisting records depending on
- * the merge flag. If merge is true, the provided record is merged with the
+ * If no preexisting record exists, the provided record is inserted.
+ * If there is a preexisting record, the provided record is merged with the
* existing record and updated in place. The merged record is returned in nrec.
- * If merge is false, an existing record is replaced with the provided record.
- * If no preexisting record exists, the provided record is always inserted.
*
* It is considered corruption if a merge is requested and not possible. Given
* the sparse inode alignment constraints, this should never happen.
@@ -547,9 +549,7 @@ xfs_inobt_insert_sprec(
struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfs_buf *agbp,
- int btnum,
- struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
- bool merge) /* merge or replace */
+ struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_btree_cur *cur;
@@ -557,7 +557,7 @@ xfs_inobt_insert_sprec(
int i;
struct xfs_inobt_rec_incore rec;
- cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
/* the new record is pre-aligned so we know where to look */
error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
@@ -571,6 +571,7 @@ xfs_inobt_insert_sprec(
if (error)
goto error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -579,45 +580,45 @@ xfs_inobt_insert_sprec(
}
/*
- * A record exists at this startino. Merge or replace the record
- * depending on what we've been asked to do.
+ * A record exists at this startino. Merge the records.
*/
- if (merge) {
- error = xfs_inobt_get_rec(cur, &rec, &i);
- if (error)
- goto error;
- if (XFS_IS_CORRUPT(mp, i != 1)) {
- error = -EFSCORRUPTED;
- goto error;
- }
- if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
- error = -EFSCORRUPTED;
- goto error;
- }
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (error)
+ goto error;
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
+ if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
- /*
- * This should never fail. If we have coexisting records that
- * cannot merge, something is seriously wrong.
- */
- if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
- error = -EFSCORRUPTED;
- goto error;
- }
+ /*
+ * This should never fail. If we have coexisting records that
+ * cannot merge, something is seriously wrong.
+ */
+ if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
- trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
- rec.ir_holemask, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
+ rec.ir_holemask, nrec->ir_startino,
+ nrec->ir_holemask);
- /* merge to nrec to output the updated record */
- __xfs_inobt_rec_merge(nrec, &rec);
+ /* merge to nrec to output the updated record */
+ __xfs_inobt_rec_merge(nrec, &rec);
- trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
+ nrec->ir_holemask);
- error = xfs_inobt_rec_check_count(mp, nrec);
- if (error)
- goto error;
- }
+ error = xfs_inobt_rec_check_count(mp, nrec);
+ if (error)
+ goto error;
error = xfs_inobt_update(cur, nrec);
if (error)
@@ -632,6 +633,59 @@ error:
}
/*
+ * Insert a new sparse inode chunk into the free inode btree. The inode
+ * record for the sparse chunk is pre-aligned to a startino that should match
+ * any pre-existing sparse inode record in the tree. This allows sparse chunks
+ * to fill over time.
+ *
+ * The new record is always inserted, overwriting a pre-existing record if
+ * there is one.
+ */
+STATIC int
+xfs_finobt_insert_sprec(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */
+{
+ struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_btree_cur *cur;
+ int error;
+ int i;
+
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
+
+ /* the new record is pre-aligned so we know where to look */
+ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
+ if (error)
+ goto error;
+ /* if nothing there, insert a new record and return */
+ if (i == 0) {
+ error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
+ nrec->ir_count, nrec->ir_freecount,
+ nrec->ir_free, &i);
+ if (error)
+ goto error;
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
+ error = -EFSCORRUPTED;
+ goto error;
+ }
+ } else {
+ error = xfs_inobt_update(cur, nrec);
+ if (error)
+ goto error;
+ }
+
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ return 0;
+error:
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return error;
+}
+
+
+/*
* Allocate new inodes in the allocation group specified by agbp. Returns 0 if
* inodes were allocated in this AG; -EAGAIN if there was no space in this AG so
* the caller knows it can try another AG, a hard -ENOSPC when over the maximum
@@ -857,8 +911,7 @@ sparse_alloc:
* if necessary. If a merge does occur, rec is updated to the
* merged record.
*/
- error = xfs_inobt_insert_sprec(pag, tp, agbp,
- XFS_BTNUM_INO, &rec, true);
+ error = xfs_inobt_insert_sprec(pag, tp, agbp, &rec);
if (error == -EFSCORRUPTED) {
xfs_alert(args.mp,
"invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
@@ -882,21 +935,19 @@ sparse_alloc:
* existing record with this one.
*/
if (xfs_has_finobt(args.mp)) {
- error = xfs_inobt_insert_sprec(pag, tp, agbp,
- XFS_BTNUM_FINO, &rec, false);
+ error = xfs_finobt_insert_sprec(pag, tp, agbp, &rec);
if (error)
return error;
}
} else {
/* full chunk - insert new records to both btrees */
- error = xfs_inobt_insert(pag, tp, agbp, newino, newlen,
- XFS_BTNUM_INO);
+ error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, false);
if (error)
return error;
if (xfs_has_finobt(args.mp)) {
error = xfs_inobt_insert(pag, tp, agbp, newino,
- newlen, XFS_BTNUM_FINO);
+ newlen, true);
if (error)
return error;
}
@@ -949,8 +1000,10 @@ xfs_ialloc_next_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
}
return 0;
@@ -974,8 +1027,10 @@ xfs_ialloc_get_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
}
return 0;
@@ -1003,6 +1058,33 @@ xfs_inobt_first_free_inode(
}
/*
+ * If this AG has corrupt inodes, check if allocating this inode would fail
+ * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again
+ * somewhere else.
+ */
+static int
+xfs_dialloc_check_ino(
+ struct xfs_perag *pag,
+ struct xfs_trans *tp,
+ xfs_ino_t ino)
+{
+ struct xfs_imap imap;
+ struct xfs_buf *bp;
+ int error;
+
+ error = xfs_imap(pag, tp, ino, &imap, 0);
+ if (error)
+ return -EAGAIN;
+
+ error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp);
+ if (error)
+ return -EAGAIN;
+
+ xfs_trans_brelse(tp, bp);
+ return 0;
+}
+
+/*
* Allocate an inode using the inobt-only algorithm.
*/
STATIC int
@@ -1030,7 +1112,7 @@ xfs_dialloc_ag_inobt(
ASSERT(pag->pagi_freecount > 0);
restart_pagno:
- cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
/*
* If pagino is 0 (this is the root inode allocation) use newino.
* This must work because we've just allocated some.
@@ -1053,6 +1135,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1061,6 +1144,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, j != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1219,6 +1303,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1228,6 +1313,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1237,6 +1323,7 @@ xfs_dialloc_ag_inobt(
if (error)
goto error0;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1249,6 +1336,13 @@ alloc_inode:
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error0;
+ }
+
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--;
error = xfs_inobt_update(cur, &rec);
@@ -1297,8 +1391,10 @@ xfs_dialloc_ag_finobt_near(
error = xfs_inobt_get_rec(lcur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(lcur);
return -EFSCORRUPTED;
+ }
/*
* See if we've landed in the parent inode record. The finobt
@@ -1322,12 +1418,14 @@ xfs_dialloc_ag_finobt_near(
if (error)
goto error_rcur;
if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) {
+ xfs_btree_mark_sick(lcur);
error = -EFSCORRUPTED;
goto error_rcur;
}
}
if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) {
+ xfs_btree_mark_sick(lcur);
error = -EFSCORRUPTED;
goto error_rcur;
}
@@ -1383,8 +1481,10 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
return 0;
}
}
@@ -1395,14 +1495,18 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
return 0;
}
@@ -1424,14 +1528,18 @@ xfs_dialloc_ag_update_inobt(
error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
return error;
- if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
@@ -1440,8 +1548,10 @@ xfs_dialloc_ag_update_inobt(
if (XFS_IS_CORRUPT(cur->bc_mp,
rec.ir_free != frec->ir_free ||
- rec.ir_freecount != frec->ir_freecount))
+ rec.ir_freecount != frec->ir_freecount)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
return xfs_inobt_update(cur, &rec);
}
@@ -1483,7 +1593,7 @@ xfs_dialloc_ag(
if (!pagino)
pagino = be32_to_cpu(agi->agi_newino);
- cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(cur);
if (error)
@@ -1508,6 +1618,12 @@ xfs_dialloc_ag(
XFS_INODES_PER_CHUNK) == 0);
ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
+ error = xfs_dialloc_check_ino(pag, tp, ino);
+ if (error)
+ goto error_cur;
+ }
+
/*
* Modify or remove the finobt record.
*/
@@ -1526,7 +1642,7 @@ xfs_dialloc_ag(
* the original freecount. If all is well, make the equivalent update to
* the inobt using the finobt record and offset information.
*/
- icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
+ icur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(icur);
if (error)
@@ -1623,7 +1739,7 @@ xfs_dialloc_good_ag(
return false;
if (!xfs_perag_initialised_agi(pag)) {
- error = xfs_ialloc_read_agi(pag, tp, NULL);
+ error = xfs_ialloc_read_agi(pag, tp, 0, NULL);
if (error)
return false;
}
@@ -1692,7 +1808,7 @@ xfs_dialloc_try_ag(
* Then read in the AGI buffer and recheck with the AGI buffer
* lock held.
*/
- error = xfs_ialloc_read_agi(pag, *tpp, &agbp);
+ error = xfs_ialloc_read_agi(pag, *tpp, 0, &agbp);
if (error)
return error;
@@ -1943,7 +2059,7 @@ xfs_difree_inobt(
/*
* Initialize the cursor.
*/
- cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_check_agi_freecount(cur);
if (error)
@@ -1958,6 +2074,7 @@ xfs_difree_inobt(
goto error0;
}
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -1968,6 +2085,7 @@ xfs_difree_inobt(
goto error0;
}
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@@ -2068,7 +2186,7 @@ xfs_difree_finobt(
int error;
int i;
- cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
+ cur = xfs_finobt_init_cursor(pag, tp, agbp);
error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error)
@@ -2080,6 +2198,7 @@ xfs_difree_finobt(
* something is out of sync.
*/
if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -2106,6 +2225,7 @@ xfs_difree_finobt(
if (error)
goto error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -2116,6 +2236,7 @@ xfs_difree_finobt(
if (XFS_IS_CORRUPT(mp,
rec.ir_free != ibtrec->ir_free ||
rec.ir_freecount != ibtrec->ir_freecount)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error;
}
@@ -2205,7 +2326,7 @@ xfs_difree(
/*
* Get the allocation group header.
*/
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error) {
xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
__func__, error);
@@ -2251,7 +2372,7 @@ xfs_imap_lookup(
int error;
int i;
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error) {
xfs_alert(mp,
"%s: xfs_ialloc_read_agi() returned error %d, agno %d",
@@ -2265,7 +2386,7 @@ xfs_imap_lookup(
* we have a record, we need to ensure it contains the inode number
* we are looking up.
*/
- cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (!error) {
if (i)
@@ -2594,6 +2715,7 @@ int
xfs_read_agi(
struct xfs_perag *pag,
struct xfs_trans *tp,
+ xfs_buf_flags_t flags,
struct xfs_buf **agibpp)
{
struct xfs_mount *mp = pag->pag_mount;
@@ -2603,7 +2725,9 @@ xfs_read_agi(
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)),
- XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops);
+ XFS_FSS_TO_BB(mp, 1), flags, agibpp, &xfs_agi_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
if (error)
return error;
if (tp)
@@ -2621,6 +2745,7 @@ int
xfs_ialloc_read_agi(
struct xfs_perag *pag,
struct xfs_trans *tp,
+ int flags,
struct xfs_buf **agibpp)
{
struct xfs_buf *agibp;
@@ -2629,7 +2754,9 @@ xfs_ialloc_read_agi(
trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno);
- error = xfs_read_agi(pag, tp, &agibp);
+ error = xfs_read_agi(pag, tp,
+ (flags & XFS_IALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
+ &agibp);
if (error)
return error;
@@ -2765,7 +2892,7 @@ xfs_ialloc_count_inodes(
struct xfs_ialloc_count_inodes ci = {0};
int error;
- ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
+ ASSERT(xfs_btree_is_ino(cur->bc_ops));
error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
if (error)
return error;
@@ -2982,7 +3109,7 @@ xfs_ialloc_check_shrink(
if (!xfs_has_sparseinodes(pag->pag_mount))
return 0;
- cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO);
+ cur = xfs_inobt_init_cursor(pag, tp, agibp);
/* Look up the inobt record that would correspond to the new EOFS. */
agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
@@ -2995,6 +3122,7 @@ xfs_ialloc_check_shrink(
goto out;
if (!has) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_INOBT);
error = -EFSCORRUPTED;
goto out;
}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index f1412183bb44..b549627e3a61 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -63,10 +63,11 @@ xfs_ialloc_log_agi(
struct xfs_buf *bp, /* allocation group header buffer */
uint32_t fields); /* bitmask of fields to log */
-int xfs_read_agi(struct xfs_perag *pag, struct xfs_trans *tp,
+int xfs_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, xfs_buf_flags_t flags,
struct xfs_buf **agibpp);
int xfs_ialloc_read_agi(struct xfs_perag *pag, struct xfs_trans *tp,
- struct xfs_buf **agibpp);
+ int flags, struct xfs_buf **agibpp);
+#define XFS_IALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */
/*
* Lookup a record by ino in the btree given by cur.
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 42a5e1f227a0..42e9fd47f6c7 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -17,6 +17,7 @@
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
+#include "xfs_health.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_rmap.h"
@@ -37,7 +38,15 @@ xfs_inobt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
- cur->bc_ag.agbp, cur->bc_btnum);
+ cur->bc_ag.agbp);
+}
+
+STATIC struct xfs_btree_cur *
+xfs_finobt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_finobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
+ cur->bc_ag.agbp);
}
STATIC void
@@ -81,9 +90,9 @@ xfs_inobt_mod_blockcount(
if (!xfs_has_inobtcounts(cur->bc_mp))
return;
- if (cur->bc_btnum == XFS_BTNUM_FINO)
+ if (xfs_btree_is_fino(cur->bc_ops))
be32_add_cpu(&agi->agi_fblocks, howmuch);
- else if (cur->bc_btnum == XFS_BTNUM_INO)
+ else
be32_add_cpu(&agi->agi_iblocks, howmuch);
xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_IBLOCKS);
}
@@ -300,7 +309,7 @@ xfs_inobt_verify(
* xfs_perag_initialised_agi(pag)) if we ever do.
*/
if (xfs_has_crc(mp)) {
- fa = xfs_btree_sblock_v5hdr_verify(bp);
+ fa = xfs_btree_agblock_v5hdr_verify(bp);
if (fa)
return fa;
}
@@ -310,7 +319,7 @@ xfs_inobt_verify(
if (level >= M_IGEO(mp)->inobt_maxlevels)
return __this_address;
- return xfs_btree_sblock_verify(bp,
+ return xfs_btree_agblock_verify(bp,
M_IGEO(mp)->inobt_mxr[level != 0]);
}
@@ -320,7 +329,7 @@ xfs_inobt_read_verify(
{
xfs_failaddr_t fa;
- if (!xfs_btree_sblock_verify_crc(bp))
+ if (!xfs_btree_agblock_verify_crc(bp))
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_inobt_verify(bp);
@@ -344,7 +353,7 @@ xfs_inobt_write_verify(
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
- xfs_btree_sblock_calc_crc(bp);
+ xfs_btree_agblock_calc_crc(bp);
}
@@ -398,9 +407,17 @@ xfs_inobt_keys_contiguous(
be32_to_cpu(key2->inobt.ir_startino));
}
-static const struct xfs_btree_ops xfs_inobt_ops = {
+const struct xfs_btree_ops xfs_inobt_ops = {
+ .name = "ino",
+ .type = XFS_BTREE_TYPE_AG,
+
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
+ .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
+
+ .lru_refs = XFS_INO_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_ibt_2),
+ .sick_mask = XFS_SICK_AG_INOBT,
.dup_cursor = xfs_inobt_dup_cursor,
.set_root = xfs_inobt_set_root,
@@ -420,11 +437,19 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
.keys_contiguous = xfs_inobt_keys_contiguous,
};
-static const struct xfs_btree_ops xfs_finobt_ops = {
+const struct xfs_btree_ops xfs_finobt_ops = {
+ .name = "fino",
+ .type = XFS_BTREE_TYPE_AG,
+
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
+ .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
- .dup_cursor = xfs_inobt_dup_cursor,
+ .lru_refs = XFS_INO_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_fibt_2),
+ .sick_mask = XFS_SICK_AG_FINOBT,
+
+ .dup_cursor = xfs_finobt_dup_cursor,
.set_root = xfs_finobt_set_root,
.alloc_block = xfs_finobt_alloc_block,
.free_block = xfs_finobt_free_block,
@@ -443,65 +468,54 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
};
/*
- * Initialize a new inode btree cursor.
+ * Create an inode btree cursor.
+ *
+ * For staging cursors tp and agbp are NULL.
*/
-static struct xfs_btree_cur *
-xfs_inobt_init_common(
+struct xfs_btree_cur *
+xfs_inobt_init_cursor(
struct xfs_perag *pag,
- struct xfs_trans *tp, /* transaction pointer */
- xfs_btnum_t btnum) /* ialloc or free ino btree */
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp)
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_btree_cur *cur;
- cur = xfs_btree_alloc_cursor(mp, tp, btnum,
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_inobt_ops,
M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache);
- if (btnum == XFS_BTNUM_INO) {
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2);
- cur->bc_ops = &xfs_inobt_ops;
- } else {
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2);
- cur->bc_ops = &xfs_finobt_ops;
- }
-
- if (xfs_has_crc(mp))
- cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
-
cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_ag.agbp = agbp;
+ if (agbp) {
+ struct xfs_agi *agi = agbp->b_addr;
+
+ cur->bc_nlevels = be32_to_cpu(agi->agi_level);
+ }
return cur;
}
-/* Create an inode btree cursor. */
+/*
+ * Create a free inode btree cursor.
+ *
+ * For staging cursors tp and agbp are NULL.
+ */
struct xfs_btree_cur *
-xfs_inobt_init_cursor(
+xfs_finobt_init_cursor(
struct xfs_perag *pag,
struct xfs_trans *tp,
- struct xfs_buf *agbp,
- xfs_btnum_t btnum)
+ struct xfs_buf *agbp)
{
+ struct xfs_mount *mp = pag->pag_mount;
struct xfs_btree_cur *cur;
- struct xfs_agi *agi = agbp->b_addr;
- cur = xfs_inobt_init_common(pag, tp, btnum);
- if (btnum == XFS_BTNUM_INO)
- cur->bc_nlevels = be32_to_cpu(agi->agi_level);
- else
- cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_finobt_ops,
+ M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache);
+ cur->bc_ag.pag = xfs_perag_hold(pag);
cur->bc_ag.agbp = agbp;
- return cur;
-}
+ if (agbp) {
+ struct xfs_agi *agi = agbp->b_addr;
-/* Create an inode btree cursor with a fake root for staging. */
-struct xfs_btree_cur *
-xfs_inobt_stage_cursor(
- struct xfs_perag *pag,
- struct xbtree_afakeroot *afake,
- xfs_btnum_t btnum)
-{
- struct xfs_btree_cur *cur;
-
- cur = xfs_inobt_init_common(pag, NULL, btnum);
- xfs_btree_stage_afakeroot(cur, afake);
+ cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
+ }
return cur;
}
@@ -521,7 +535,7 @@ xfs_inobt_commit_staged_btree(
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
- if (cur->bc_btnum == XFS_BTNUM_INO) {
+ if (xfs_btree_is_ino(cur->bc_ops)) {
fields = XFS_AGI_ROOT | XFS_AGI_LEVEL;
agi->agi_root = cpu_to_be32(afake->af_root);
agi->agi_level = cpu_to_be32(afake->af_levels);
@@ -530,7 +544,7 @@ xfs_inobt_commit_staged_btree(
fields |= XFS_AGI_IBLOCKS;
}
xfs_ialloc_log_agi(tp, agbp, fields);
- xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops);
+ xfs_btree_commit_afakeroot(cur, tp, agbp);
} else {
fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL;
agi->agi_free_root = cpu_to_be32(afake->af_root);
@@ -540,7 +554,7 @@ xfs_inobt_commit_staged_btree(
fields |= XFS_AGI_IBLOCKS;
}
xfs_ialloc_log_agi(tp, agbp, fields);
- xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops);
+ xfs_btree_commit_afakeroot(cur, tp, agbp);
}
}
@@ -721,45 +735,21 @@ xfs_inobt_max_size(
XFS_INODES_PER_CHUNK);
}
-/* Read AGI and create inobt cursor. */
-int
-xfs_inobt_cur(
- struct xfs_perag *pag,
- struct xfs_trans *tp,
- xfs_btnum_t which,
- struct xfs_btree_cur **curpp,
- struct xfs_buf **agi_bpp)
-{
- struct xfs_btree_cur *cur;
- int error;
-
- ASSERT(*agi_bpp == NULL);
- ASSERT(*curpp == NULL);
-
- error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
- if (error)
- return error;
-
- cur = xfs_inobt_init_cursor(pag, tp, *agi_bpp, which);
- *curpp = cur;
- return 0;
-}
-
static int
-xfs_inobt_count_blocks(
+xfs_finobt_count_blocks(
struct xfs_perag *pag,
struct xfs_trans *tp,
- xfs_btnum_t btnum,
xfs_extlen_t *tree_blocks)
{
struct xfs_buf *agbp = NULL;
- struct xfs_btree_cur *cur = NULL;
+ struct xfs_btree_cur *cur;
int error;
- error = xfs_inobt_cur(pag, tp, btnum, &cur, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error)
return error;
+ cur = xfs_inobt_init_cursor(pag, tp, agbp);
error = xfs_btree_count_blocks(cur, tree_blocks);
xfs_btree_del_cursor(cur, error);
xfs_trans_brelse(tp, agbp);
@@ -778,7 +768,7 @@ xfs_finobt_read_blocks(
struct xfs_agi *agi;
int error;
- error = xfs_ialloc_read_agi(pag, tp, &agbp);
+ error = xfs_ialloc_read_agi(pag, tp, 0, &agbp);
if (error)
return error;
@@ -807,8 +797,7 @@ xfs_finobt_calc_reserves(
if (xfs_has_inobtcounts(pag->pag_mount))
error = xfs_finobt_read_blocks(pag, tp, &tree_len);
else
- error = xfs_inobt_count_blocks(pag, tp, XFS_BTNUM_FINO,
- &tree_len);
+ error = xfs_finobt_count_blocks(pag, tp, &tree_len);
if (error)
return error;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index 3262c3fe5ebe..6472ec1ecbb4 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -46,10 +46,10 @@ struct xfs_perag;
(maxrecs) * sizeof(xfs_inobt_key_t) + \
((index) - 1) * sizeof(xfs_inobt_ptr_t)))
-extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag,
- struct xfs_trans *tp, struct xfs_buf *agbp, xfs_btnum_t btnum);
-struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_perag *pag,
- struct xbtree_afakeroot *afake, xfs_btnum_t btnum);
+struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag,
+ struct xfs_trans *tp, struct xfs_buf *agbp);
+struct xfs_btree_cur *xfs_finobt_init_cursor(struct xfs_perag *pag,
+ struct xfs_trans *tp, struct xfs_buf *agbp);
extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int);
/* ir_holemask to inode allocation bitmap conversion */
@@ -66,9 +66,6 @@ int xfs_finobt_calc_reserves(struct xfs_perag *perag, struct xfs_trans *tp,
xfs_extlen_t *ask, xfs_extlen_t *used);
extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
-int xfs_inobt_cur(struct xfs_perag *pag, struct xfs_trans *tp,
- xfs_btnum_t btnum, struct xfs_btree_cur **curpp,
- struct xfs_buf **agi_bpp);
void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, struct xfs_buf *agbp);
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index f4e6b200cdf8..8796f2b3e534 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -394,11 +394,18 @@ xfs_iext_leaf_key(
return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK;
}
+static inline void *
+xfs_iext_alloc_node(
+ int size)
+{
+ return kzalloc(size, GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
+}
+
static void
xfs_iext_grow(
struct xfs_ifork *ifp)
{
- struct xfs_iext_node *node = kmem_zalloc(NODE_SIZE, KM_NOFS);
+ struct xfs_iext_node *node = xfs_iext_alloc_node(NODE_SIZE);
int i;
if (ifp->if_height == 1) {
@@ -454,7 +461,7 @@ xfs_iext_split_node(
int *nr_entries)
{
struct xfs_iext_node *node = *nodep;
- struct xfs_iext_node *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
+ struct xfs_iext_node *new = xfs_iext_alloc_node(NODE_SIZE);
const int nr_move = KEYS_PER_NODE / 2;
int nr_keep = nr_move + (KEYS_PER_NODE & 1);
int i = 0;
@@ -542,7 +549,7 @@ xfs_iext_split_leaf(
int *nr_entries)
{
struct xfs_iext_leaf *leaf = cur->leaf;
- struct xfs_iext_leaf *new = kmem_zalloc(NODE_SIZE, KM_NOFS);
+ struct xfs_iext_leaf *new = xfs_iext_alloc_node(NODE_SIZE);
const int nr_move = RECS_PER_LEAF / 2;
int nr_keep = nr_move + (RECS_PER_LEAF & 1);
int i;
@@ -583,7 +590,7 @@ xfs_iext_alloc_root(
{
ASSERT(ifp->if_bytes == 0);
- ifp->if_data = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS);
+ ifp->if_data = xfs_iext_alloc_node(sizeof(struct xfs_iext_rec));
ifp->if_height = 1;
/* now that we have a node step into it */
@@ -603,7 +610,8 @@ xfs_iext_realloc_root(
if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF)
new_size = NODE_SIZE;
- new = krealloc(ifp->if_data, new_size, GFP_NOFS | __GFP_NOFAIL);
+ new = krealloc(ifp->if_data, new_size,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes);
ifp->if_data = new;
cur->leaf = new;
@@ -743,7 +751,7 @@ xfs_iext_remove_node(
again:
ASSERT(node->ptrs[pos]);
ASSERT(node->ptrs[pos] == victim);
- kmem_free(victim);
+ kfree(victim);
nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
offset = node->keys[0];
@@ -789,7 +797,7 @@ again:
ASSERT(node == ifp->if_data);
ifp->if_data = node->ptrs[0];
ifp->if_height--;
- kmem_free(node);
+ kfree(node);
}
}
@@ -863,7 +871,7 @@ xfs_iext_free_last_leaf(
struct xfs_ifork *ifp)
{
ifp->if_height--;
- kmem_free(ifp->if_data);
+ kfree(ifp->if_data);
ifp->if_data = NULL;
}
@@ -1044,7 +1052,7 @@ xfs_iext_destroy_node(
}
}
- kmem_free(node);
+ kfree(node);
}
void
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 137a65bda95d..d79002343d0b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -18,6 +18,7 @@
#include "xfs_trans.h"
#include "xfs_ialloc.h"
#include "xfs_dir2.h"
+#include "xfs_health.h"
#include <linux/iversion.h>
@@ -132,9 +133,14 @@ xfs_imap_to_bp(
struct xfs_imap *imap,
struct xfs_buf **bpp)
{
- return xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
- imap->im_len, XBF_UNMAPPED, bpp,
- &xfs_inode_buf_ops);
+ int error;
+
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+ imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno),
+ XFS_SICK_AG_INODES);
+ return error;
}
static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
@@ -485,6 +491,14 @@ xfs_dinode_verify(
return __this_address;
}
+ if (dip->di_version > 1) {
+ if (dip->di_onlink)
+ return __this_address;
+ } else {
+ if (dip->di_nlink)
+ return __this_address;
+ }
+
/* don't allow invalid i_size */
di_size = be64_to_cpu(dip->di_size);
if (di_size & (1ULL << 63))
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index f4569e18a8d0..9d11ae015909 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -25,6 +25,8 @@
#include "xfs_attr_leaf.h"
#include "xfs_types.h"
#include "xfs_errortag.h"
+#include "xfs_health.h"
+#include "xfs_symlink_remote.h"
struct kmem_cache *xfs_ifork_cache;
@@ -50,7 +52,8 @@ xfs_init_local_fork(
mem_size++;
if (size) {
- char *new_data = kmem_alloc(mem_size, KM_NOFS);
+ char *new_data = kmalloc(mem_size,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
memcpy(new_data, data, size);
if (zero_terminate)
@@ -77,7 +80,7 @@ xfs_iformat_local(
/*
* If the size is unreasonable, then something
* is wrong and we just bail out rather than crash in
- * kmem_alloc() or memcpy() below.
+ * kmalloc() or memcpy() below.
*/
if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
xfs_warn(ip->i_mount,
@@ -87,6 +90,7 @@ xfs_iformat_local(
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_local", dip, sizeof(*dip),
__this_address);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
return -EFSCORRUPTED;
}
@@ -116,7 +120,7 @@ xfs_iformat_extents(
/*
* If the number of extents is unreasonable, then something is wrong and
- * we just bail out rather than crash in kmem_alloc() or memcpy() below.
+ * we just bail out rather than crash in kmalloc() or memcpy() below.
*/
if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
xfs_warn(ip->i_mount, "corrupt inode %llu ((a)extents = %llu).",
@@ -124,6 +128,7 @@ xfs_iformat_extents(
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_extents(1)", dip, sizeof(*dip),
__this_address);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
return -EFSCORRUPTED;
}
@@ -143,6 +148,7 @@ xfs_iformat_extents(
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_extents(2)",
dp, sizeof(*dp), fa);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
return xfs_bmap_complain_bad_rec(ip, whichfork,
fa, &new);
}
@@ -201,11 +207,13 @@ xfs_iformat_btree(
xfs_inode_verifier_error(ip, -EFSCORRUPTED,
"xfs_iformat_btree", dfp, size,
__this_address);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
return -EFSCORRUPTED;
}
ifp->if_broot_bytes = size;
- ifp->if_broot = kmem_alloc(size, KM_NOFS);
+ ifp->if_broot = kmalloc(size,
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
ASSERT(ifp->if_broot != NULL);
/*
* Copy and convert from the on-disk structure
@@ -265,12 +273,14 @@ xfs_iformat_data_fork(
default:
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
dip, sizeof(*dip), __this_address);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
return -EFSCORRUPTED;
}
break;
default:
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
sizeof(*dip), __this_address);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
return -EFSCORRUPTED;
}
}
@@ -342,6 +352,7 @@ xfs_iformat_attr_fork(
default:
xfs_inode_verifier_error(ip, error, __func__, dip,
sizeof(*dip), __this_address);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
error = -EFSCORRUPTED;
break;
}
@@ -399,7 +410,8 @@ xfs_iroot_realloc(
*/
if (ifp->if_broot_bytes == 0) {
new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
- ifp->if_broot = kmem_alloc(new_size, KM_NOFS);
+ ifp->if_broot = kmalloc(new_size,
+ GFP_KERNEL | __GFP_NOFAIL);
ifp->if_broot_bytes = (int)new_size;
return;
}
@@ -414,7 +426,7 @@ xfs_iroot_realloc(
new_max = cur_max + rec_diff;
new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
ifp->if_broot = krealloc(ifp->if_broot, new_size,
- GFP_NOFS | __GFP_NOFAIL);
+ GFP_KERNEL | __GFP_NOFAIL);
op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -440,7 +452,7 @@ xfs_iroot_realloc(
else
new_size = 0;
if (new_size > 0) {
- new_broot = kmem_alloc(new_size, KM_NOFS);
+ new_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOFAIL);
/*
* First copy over the btree block header.
*/
@@ -470,7 +482,7 @@ xfs_iroot_realloc(
(int)new_size);
memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
}
- kmem_free(ifp->if_broot);
+ kfree(ifp->if_broot);
ifp->if_broot = new_broot;
ifp->if_broot_bytes = (int)new_size;
if (ifp->if_broot)
@@ -488,7 +500,7 @@ xfs_iroot_realloc(
*
* If the amount of space needed has decreased below the size of the
* inline buffer, then switch to using the inline buffer. Otherwise,
- * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * use krealloc() or kmalloc() to adjust the size of the buffer
* to what is needed.
*
* ip -- the inode whose if_data area is changing
@@ -509,7 +521,7 @@ xfs_idata_realloc(
if (byte_diff) {
ifp->if_data = krealloc(ifp->if_data, new_size,
- GFP_NOFS | __GFP_NOFAIL);
+ GFP_KERNEL | __GFP_NOFAIL);
if (new_size == 0)
ifp->if_data = NULL;
ifp->if_bytes = new_size;
@@ -524,13 +536,13 @@ xfs_idestroy_fork(
struct xfs_ifork *ifp)
{
if (ifp->if_broot != NULL) {
- kmem_free(ifp->if_broot);
+ kfree(ifp->if_broot);
ifp->if_broot = NULL;
}
switch (ifp->if_format) {
case XFS_DINODE_FMT_LOCAL:
- kmem_free(ifp->if_data);
+ kfree(ifp->if_data);
ifp->if_data = NULL;
break;
case XFS_DINODE_FMT_EXTENTS:
@@ -562,7 +574,7 @@ xfs_iextents_copy(
struct xfs_bmbt_irec rec;
int64_t copied = 0;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED);
ASSERT(ifp->if_bytes > 0);
for_each_xfs_iext(ifp, &icur, &rec) {
@@ -689,7 +701,7 @@ xfs_ifork_init_cow(
return;
ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_cache,
- GFP_NOFS | __GFP_NOFAIL);
+ GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS;
}
@@ -753,52 +765,54 @@ xfs_ifork_verify_local_attr(
return 0;
}
+/*
+ * Check if the inode fork supports adding nr_to_add more extents.
+ *
+ * If it doesn't but we can upgrade it to large extent counters, do the upgrade.
+ * If we can't upgrade or are already using big counters but still can't fit the
+ * additional extents, return -EFBIG.
+ */
int
-xfs_iext_count_may_overflow(
+xfs_iext_count_extend(
+ struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork,
- int nr_to_add)
+ uint nr_to_add)
{
+ struct xfs_mount *mp = ip->i_mount;
+ bool has_large =
+ xfs_inode_has_large_extent_counts(ip);
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
- uint64_t max_exts;
uint64_t nr_exts;
+ ASSERT(nr_to_add <= XFS_MAX_EXTCNT_UPGRADE_NR);
+
if (whichfork == XFS_COW_FORK)
return 0;
- max_exts = xfs_iext_max_nextents(xfs_inode_has_large_extent_counts(ip),
- whichfork);
-
- if (XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS))
- max_exts = 10;
-
+ /* no point in upgrading if if_nextents overflows */
nr_exts = ifp->if_nextents + nr_to_add;
- if (nr_exts < ifp->if_nextents || nr_exts > max_exts)
+ if (nr_exts < ifp->if_nextents)
+ return -EFBIG;
+
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
+ nr_exts > 10)
return -EFBIG;
+ if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) {
+ if (has_large || !xfs_has_large_extent_counts(mp))
+ return -EFBIG;
+ ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ }
return 0;
}
-/*
- * Upgrade this inode's extent counter fields to be able to handle a potential
- * increase in the extent count by nr_to_add. Normally this is the same
- * quantity that caused xfs_iext_count_may_overflow() to return -EFBIG.
- */
-int
-xfs_iext_count_upgrade(
- struct xfs_trans *tp,
+/* Decide if a file mapping is on the realtime device or not. */
+bool
+xfs_ifork_is_realtime(
struct xfs_inode *ip,
- uint nr_to_add)
+ int whichfork)
{
- ASSERT(nr_to_add <= XFS_MAX_EXTCNT_UPGRADE_NR);
-
- if (!xfs_has_large_extent_counts(ip->i_mount) ||
- xfs_inode_has_large_extent_counts(ip) ||
- XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS))
- return -EFBIG;
-
- ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- return 0;
+ return XFS_IS_REALTIME_INODE(ip) && whichfork != XFS_ATTR_FORK;
}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 96303249d28a..2373d12fd474 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -256,10 +256,9 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip);
int xfs_ifork_verify_local_data(struct xfs_inode *ip);
int xfs_ifork_verify_local_attr(struct xfs_inode *ip);
-int xfs_iext_count_may_overflow(struct xfs_inode *ip, int whichfork,
- int nr_to_add);
-int xfs_iext_count_upgrade(struct xfs_trans *tp, struct xfs_inode *ip,
- uint nr_to_add);
+int xfs_iext_count_extend(struct xfs_trans *tp, struct xfs_inode *ip,
+ int whichfork, uint nr_to_add);
+bool xfs_ifork_is_realtime(struct xfs_inode *ip, int whichfork);
/* returns true if the fork has extents but they are not read in yet. */
static inline bool xfs_need_iread_extents(const struct xfs_ifork *ifp)
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 269573c82808..3e6682ed656b 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -115,10 +115,13 @@ struct xfs_unmount_log_format {
#define XLOG_REG_TYPE_BUD_FORMAT 26
#define XLOG_REG_TYPE_ATTRI_FORMAT 27
#define XLOG_REG_TYPE_ATTRD_FORMAT 28
-#define XLOG_REG_TYPE_ATTR_NAME 29
+#define XLOG_REG_TYPE_ATTR_NAME 29
#define XLOG_REG_TYPE_ATTR_VALUE 30
-#define XLOG_REG_TYPE_MAX 30
-
+#define XLOG_REG_TYPE_XMI_FORMAT 31
+#define XLOG_REG_TYPE_XMD_FORMAT 32
+#define XLOG_REG_TYPE_ATTR_NEWNAME 33
+#define XLOG_REG_TYPE_ATTR_NEWVALUE 34
+#define XLOG_REG_TYPE_MAX 34
/*
* Flags to log operation header
@@ -243,6 +246,8 @@ typedef struct xfs_trans_header {
#define XFS_LI_BUD 0x1245
#define XFS_LI_ATTRI 0x1246 /* attr set/remove intent*/
#define XFS_LI_ATTRD 0x1247 /* attr set/remove done */
+#define XFS_LI_XMI 0x1248 /* mapping exchange intent */
+#define XFS_LI_XMD 0x1249 /* mapping exchange done */
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -260,7 +265,9 @@ typedef struct xfs_trans_header {
{ XFS_LI_BUI, "XFS_LI_BUI" }, \
{ XFS_LI_BUD, "XFS_LI_BUD" }, \
{ XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \
- { XFS_LI_ATTRD, "XFS_LI_ATTRD" }
+ { XFS_LI_ATTRD, "XFS_LI_ATTRD" }, \
+ { XFS_LI_XMI, "XFS_LI_XMI" }, \
+ { XFS_LI_XMD, "XFS_LI_XMD" }
/*
* Inode Log Item Format definitions.
@@ -838,10 +845,12 @@ struct xfs_cud_log_format {
#define XFS_BMAP_EXTENT_ATTR_FORK (1U << 31)
#define XFS_BMAP_EXTENT_UNWRITTEN (1U << 30)
+#define XFS_BMAP_EXTENT_REALTIME (1U << 29)
#define XFS_BMAP_EXTENT_FLAGS (XFS_BMAP_EXTENT_TYPE_MASK | \
XFS_BMAP_EXTENT_ATTR_FORK | \
- XFS_BMAP_EXTENT_UNWRITTEN)
+ XFS_BMAP_EXTENT_UNWRITTEN | \
+ XFS_BMAP_EXTENT_REALTIME)
/*
* This is the structure used to lay out an bui log item in the
@@ -877,6 +886,61 @@ struct xfs_bud_log_format {
};
/*
+ * XMI/XMD (file mapping exchange) log format definitions
+ */
+
+/* This is the structure used to lay out an mapping exchange log item. */
+struct xfs_xmi_log_format {
+ uint16_t xmi_type; /* xmi log item type */
+ uint16_t xmi_size; /* size of this item */
+ uint32_t __pad; /* must be zero */
+ uint64_t xmi_id; /* xmi identifier */
+
+ uint64_t xmi_inode1; /* inumber of first file */
+ uint64_t xmi_inode2; /* inumber of second file */
+ uint32_t xmi_igen1; /* generation of first file */
+ uint32_t xmi_igen2; /* generation of second file */
+ uint64_t xmi_startoff1; /* block offset into file1 */
+ uint64_t xmi_startoff2; /* block offset into file2 */
+ uint64_t xmi_blockcount; /* number of blocks */
+ uint64_t xmi_flags; /* XFS_EXCHMAPS_* */
+ uint64_t xmi_isize1; /* intended file1 size */
+ uint64_t xmi_isize2; /* intended file2 size */
+};
+
+/* Exchange mappings between extended attribute forks instead of data forks. */
+#define XFS_EXCHMAPS_ATTR_FORK (1ULL << 0)
+
+/* Set the file sizes when finished. */
+#define XFS_EXCHMAPS_SET_SIZES (1ULL << 1)
+
+/*
+ * Exchange the mappings of the two files only if the file allocation units
+ * mapped to file1's range have been written.
+ */
+#define XFS_EXCHMAPS_INO1_WRITTEN (1ULL << 2)
+
+/* Clear the reflink flag from inode1 after the operation. */
+#define XFS_EXCHMAPS_CLEAR_INO1_REFLINK (1ULL << 3)
+
+/* Clear the reflink flag from inode2 after the operation. */
+#define XFS_EXCHMAPS_CLEAR_INO2_REFLINK (1ULL << 4)
+
+#define XFS_EXCHMAPS_LOGGED_FLAGS (XFS_EXCHMAPS_ATTR_FORK | \
+ XFS_EXCHMAPS_SET_SIZES | \
+ XFS_EXCHMAPS_INO1_WRITTEN | \
+ XFS_EXCHMAPS_CLEAR_INO1_REFLINK | \
+ XFS_EXCHMAPS_CLEAR_INO2_REFLINK)
+
+/* This is the structure used to lay out an mapping exchange done log item. */
+struct xfs_xmd_log_format {
+ uint16_t xmd_type; /* xmd log item type */
+ uint16_t xmd_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t xmd_xmi_id; /* id of corresponding xmi */
+};
+
+/*
* Dquot Log format definitions.
*
* The first two fields must be the type and size fitting into
@@ -964,6 +1028,9 @@ struct xfs_icreate_log {
#define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */
#define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */
#define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */
+#define XFS_ATTRI_OP_FLAGS_PPTR_SET 4 /* Set parent pointer */
+#define XFS_ATTRI_OP_FLAGS_PPTR_REMOVE 5 /* Remove parent pointer */
+#define XFS_ATTRI_OP_FLAGS_PPTR_REPLACE 6 /* Replace parent pointer */
#define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */
/*
@@ -972,6 +1039,7 @@ struct xfs_icreate_log {
*/
#define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \
XFS_ATTR_SECURE | \
+ XFS_ATTR_PARENT | \
XFS_ATTR_INCOMPLETE)
/*
@@ -981,11 +1049,22 @@ struct xfs_icreate_log {
struct xfs_attri_log_format {
uint16_t alfi_type; /* attri log item type */
uint16_t alfi_size; /* size of this item */
- uint32_t __pad; /* pad to 64 bit aligned */
+ uint32_t alfi_igen; /* generation of alfi_ino for pptr ops */
uint64_t alfi_id; /* attri identifier */
uint64_t alfi_ino; /* the inode for this attr operation */
uint32_t alfi_op_flags; /* marks the op as a set or remove */
- uint32_t alfi_name_len; /* attr name length */
+ union {
+ uint32_t alfi_name_len; /* attr name length */
+ struct {
+ /*
+ * For PPTR_REPLACE, these are the lengths of the old
+ * and new attr names. The new and old values must
+ * have the same length.
+ */
+ uint16_t alfi_old_name_len;
+ uint16_t alfi_new_name_len;
+ };
+ };
uint32_t alfi_value_len; /* attr value length */
uint32_t alfi_attr_filter;/* attr filter flags */
};
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 9fe7a9564bca..521d327e4c89 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -75,6 +75,8 @@ extern const struct xlog_recover_item_ops xlog_cui_item_ops;
extern const struct xlog_recover_item_ops xlog_cud_item_ops;
extern const struct xlog_recover_item_ops xlog_attri_item_ops;
extern const struct xlog_recover_item_ops xlog_attrd_item_ops;
+extern const struct xlog_recover_item_ops xlog_xmi_item_ops;
+extern const struct xlog_recover_item_ops xlog_xmd_item_ops;
/*
* Macros, structures, prototypes for internal log manager use.
@@ -121,6 +123,8 @@ bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
int xlog_recover_iget(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_inode **ipp);
+int xlog_recover_iget_handle(struct xfs_mount *mp, xfs_ino_t ino, uint32_t gen,
+ struct xfs_inode **ipp);
void xlog_recover_release_intent(struct xlog *log, unsigned short intent_type,
uint64_t intent_id);
int xlog_alloc_buf_cancel_table(struct xlog *log);
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 9975b93a7412..d3bd6a86c8fe 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -17,6 +17,34 @@
#include "xfs_trace.h"
/*
+ * Shortly after enabling the large extents count feature in 2023, longstanding
+ * bugs were found in the code that computes the minimum log size. Luckily,
+ * the bugs resulted in over-estimates of that size, so there's no impact to
+ * existing users. However, we don't want to reduce the minimum log size
+ * because that can create the situation where a newer mkfs writes a new
+ * filesystem that an older kernel won't mount.
+ *
+ * Several years prior, we also discovered that the transaction reservations
+ * for rmap and reflink operations were unnecessarily large. That was fixed,
+ * but the minimum log size computation was left alone to avoid the
+ * compatibility problems noted above. Fix that too.
+ *
+ * Therefore, we only may correct the computation starting with filesystem
+ * features that didn't exist in 2023. In other words, only turn this on if
+ * the filesystem has parent pointers.
+ *
+ * This function can be called before the XFS_HAS_* flags have been set up,
+ * (e.g. mkfs) so we must check the ondisk superblock.
+ */
+static inline bool
+xfs_want_minlogsize_fixes(
+ struct xfs_sb *sb)
+{
+ return xfs_sb_is_v5(sb) &&
+ xfs_sb_has_incompat_feature(sb, XFS_SB_FEAT_INCOMPAT_PARENT);
+}
+
+/*
* Calculate the maximum length in bytes that would be required for a local
* attribute value as large attributes out of line are not logged.
*/
@@ -31,6 +59,15 @@ xfs_log_calc_max_attrsetm_res(
MAXNAMELEN - 1;
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
nblks += XFS_B_TO_FSB(mp, size);
+
+ /*
+ * If the feature set is new enough, correct a unit conversion error in
+ * the xattr transaction reservation code that resulted in oversized
+ * minimum log size computations.
+ */
+ if (xfs_want_minlogsize_fixes(&mp->m_sb))
+ size = XFS_B_TO_FSB(mp, size);
+
nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
return M_RES(mp)->tr_attrsetm.tr_logres +
@@ -49,6 +86,15 @@ xfs_log_calc_trans_resv_for_minlogblocks(
unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
/*
+ * If the feature set is new enough, drop the oversized minimum log
+ * size computation introduced by the original reflink code.
+ */
+ if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
+ xfs_trans_resv_calc(mp, resv);
+ return;
+ }
+
+ /*
* In the early days of rmap+reflink, we always set the rmap maxlevels
* to 9 even if the AG was small enough that it would never grow to
* that height. Transaction reservation sizes influence the minimum
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 81885a6a028e..e8cdd77d03fa 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -119,6 +119,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset, 1);
XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name, 3);
XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_parent_rec, 12);
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
@@ -155,6 +156,11 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
+ /* parent pointer ioctls */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_rec, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_getparents, 40);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_by_handle, 64);
+
/*
* The v5 superblock format extended several v4 header structures with
* additional data. While new fields are only accessible on v5
diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c
new file mode 100644
index 000000000000..69366c44a701
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_parent.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022-2024 Oracle.
+ * All rights reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_log_format.h"
+#include "xfs_shared.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_trans.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_attr_sf.h"
+#include "xfs_bmap.h"
+#include "xfs_defer.h"
+#include "xfs_log.h"
+#include "xfs_xattr.h"
+#include "xfs_parent.h"
+#include "xfs_trans_space.h"
+#include "xfs_attr_item.h"
+#include "xfs_health.h"
+
+struct kmem_cache *xfs_parent_args_cache;
+
+/*
+ * Parent pointer attribute handling.
+ *
+ * Because the attribute name is a filename component, it will never be longer
+ * than 255 bytes and must not contain nulls or slashes. These are roughly the
+ * same constraints that apply to attribute names.
+ *
+ * The attribute value must always be a struct xfs_parent_rec. This means the
+ * attribute will never be in remote format because 12 bytes is nowhere near
+ * xfs_attr_leaf_entsize_local_max() (~75% of block size).
+ *
+ * Creating a new parent attribute will always create a new attribute - there
+ * should never, ever be an existing attribute in the tree for a new inode.
+ * ENOSPC behavior is problematic - creating the inode without the parent
+ * pointer is effectively a corruption, so we allow parent attribute creation
+ * to dip into the reserve block pool to avoid unexpected ENOSPC errors from
+ * occurring.
+ */
+
+/* Return true if parent pointer attr name is valid. */
+bool
+xfs_parent_namecheck(
+ unsigned int attr_flags,
+ const void *name,
+ size_t length)
+{
+ /*
+ * Parent pointers always use logged operations, so there should never
+ * be incomplete xattrs.
+ */
+ if (attr_flags & XFS_ATTR_INCOMPLETE)
+ return false;
+
+ return xfs_dir2_namecheck(name, length);
+}
+
+/* Return true if parent pointer attr value is valid. */
+bool
+xfs_parent_valuecheck(
+ struct xfs_mount *mp,
+ const void *value,
+ size_t valuelen)
+{
+ const struct xfs_parent_rec *rec = value;
+
+ if (!xfs_has_parent(mp))
+ return false;
+
+ /* The xattr value must be a parent record. */
+ if (valuelen != sizeof(struct xfs_parent_rec))
+ return false;
+
+ /* The parent record must be local. */
+ if (value == NULL)
+ return false;
+
+ /* The parent inumber must be valid. */
+ if (!xfs_verify_dir_ino(mp, be64_to_cpu(rec->p_ino)))
+ return false;
+
+ return true;
+}
+
+/* Compute the attribute name hash for a parent pointer. */
+xfs_dahash_t
+xfs_parent_hashval(
+ struct xfs_mount *mp,
+ const uint8_t *name,
+ int namelen,
+ xfs_ino_t parent_ino)
+{
+ struct xfs_name xname = {
+ .name = name,
+ .len = namelen,
+ };
+
+ /*
+ * Use the same dirent name hash as would be used on the directory, but
+ * mix in the parent inode number to avoid collisions on hardlinked
+ * files with identical names but different parents.
+ */
+ return xfs_dir2_hashname(mp, &xname) ^
+ upper_32_bits(parent_ino) ^ lower_32_bits(parent_ino);
+}
+
+/* Compute the attribute name hash from the xattr components. */
+xfs_dahash_t
+xfs_parent_hashattr(
+ struct xfs_mount *mp,
+ const uint8_t *name,
+ int namelen,
+ const void *value,
+ int valuelen)
+{
+ const struct xfs_parent_rec *rec = value;
+
+ /* Requires a local attr value in xfs_parent_rec format */
+ if (valuelen != sizeof(struct xfs_parent_rec)) {
+ ASSERT(valuelen == sizeof(struct xfs_parent_rec));
+ return 0;
+ }
+
+ if (!value) {
+ ASSERT(value != NULL);
+ return 0;
+ }
+
+ return xfs_parent_hashval(mp, name, namelen, be64_to_cpu(rec->p_ino));
+}
+
+/*
+ * Initialize the parent pointer arguments structure. Caller must have zeroed
+ * the contents of @args. @tp is only required for updates.
+ */
+static void
+xfs_parent_da_args_init(
+ struct xfs_da_args *args,
+ struct xfs_trans *tp,
+ struct xfs_parent_rec *rec,
+ struct xfs_inode *child,
+ xfs_ino_t owner,
+ const struct xfs_name *parent_name)
+{
+ args->geo = child->i_mount->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK;
+ args->attr_filter = XFS_ATTR_PARENT;
+ args->op_flags = XFS_DA_OP_LOGGED | XFS_DA_OP_OKNOENT;
+ args->trans = tp;
+ args->dp = child;
+ args->owner = owner;
+ args->name = parent_name->name;
+ args->namelen = parent_name->len;
+ args->value = rec;
+ args->valuelen = sizeof(struct xfs_parent_rec);
+ xfs_attr_sethash(args);
+}
+
+/* Make sure the incore state is ready for a parent pointer query/update. */
+static inline int
+xfs_parent_iread_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *child)
+{
+ /* Parent pointers require that the attr fork must exist. */
+ if (XFS_IS_CORRUPT(child->i_mount, !xfs_inode_has_attr_fork(child))) {
+ xfs_inode_mark_sick(child, XFS_SICK_INO_PARENT);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_iread_extents(tp, child, XFS_ATTR_FORK);
+}
+
+/* Add a parent pointer to reflect a dirent addition. */
+int
+xfs_parent_addname(
+ struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp,
+ const struct xfs_name *parent_name,
+ struct xfs_inode *child)
+{
+ int error;
+
+ error = xfs_parent_iread_extents(tp, child);
+ if (error)
+ return error;
+
+ xfs_inode_to_parent_rec(&ppargs->rec, dp);
+ xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
+ child->i_ino, parent_name);
+ xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET);
+ return 0;
+}
+
+/* Remove a parent pointer to reflect a dirent removal. */
+int
+xfs_parent_removename(
+ struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp,
+ const struct xfs_name *parent_name,
+ struct xfs_inode *child)
+{
+ int error;
+
+ error = xfs_parent_iread_extents(tp, child);
+ if (error)
+ return error;
+
+ xfs_inode_to_parent_rec(&ppargs->rec, dp);
+ xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
+ child->i_ino, parent_name);
+ xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE);
+ return 0;
+}
+
+/* Replace one parent pointer with another to reflect a rename. */
+int
+xfs_parent_replacename(
+ struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *old_dp,
+ const struct xfs_name *old_name,
+ struct xfs_inode *new_dp,
+ const struct xfs_name *new_name,
+ struct xfs_inode *child)
+{
+ int error;
+
+ error = xfs_parent_iread_extents(tp, child);
+ if (error)
+ return error;
+
+ xfs_inode_to_parent_rec(&ppargs->rec, old_dp);
+ xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
+ child->i_ino, old_name);
+
+ xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp);
+ ppargs->args.new_name = new_name->name;
+ ppargs->args.new_namelen = new_name->len;
+ ppargs->args.new_value = &ppargs->new_rec;
+ ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec);
+ xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE);
+ return 0;
+}
+
+/*
+ * Extract parent pointer information from any parent pointer xattr into
+ * @parent_ino/gen. The last two parameters can be NULL pointers.
+ *
+ * Returns 0 if this is not a parent pointer xattr at all; or -EFSCORRUPTED for
+ * garbage.
+ */
+int
+xfs_parent_from_attr(
+ struct xfs_mount *mp,
+ unsigned int attr_flags,
+ const unsigned char *name,
+ unsigned int namelen,
+ const void *value,
+ unsigned int valuelen,
+ xfs_ino_t *parent_ino,
+ uint32_t *parent_gen)
+{
+ const struct xfs_parent_rec *rec = value;
+
+ ASSERT(attr_flags & XFS_ATTR_PARENT);
+
+ if (!xfs_parent_namecheck(attr_flags, name, namelen))
+ return -EFSCORRUPTED;
+ if (!xfs_parent_valuecheck(mp, value, valuelen))
+ return -EFSCORRUPTED;
+
+ if (parent_ino)
+ *parent_ino = be64_to_cpu(rec->p_ino);
+ if (parent_gen)
+ *parent_gen = be32_to_cpu(rec->p_gen);
+ return 0;
+}
+
+/*
+ * Look up a parent pointer record (@parent_name -> @pptr) of @ip.
+ *
+ * Caller must hold at least ILOCK_SHARED. The scratchpad need not be
+ * initialized.
+ *
+ * Returns 0 if the pointer is found, -ENOATTR if there is no match, or a
+ * negative errno.
+ */
+int
+xfs_parent_lookup(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ const struct xfs_name *parent_name,
+ struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch)
+{
+ memset(scratch, 0, sizeof(struct xfs_da_args));
+ xfs_parent_da_args_init(scratch, tp, pptr, ip, ip->i_ino, parent_name);
+ return xfs_attr_get_ilocked(scratch);
+}
+
+/* Sanity-check a parent pointer before we try to perform repairs. */
+static inline bool
+xfs_parent_sanity_check(
+ struct xfs_mount *mp,
+ const struct xfs_name *parent_name,
+ const struct xfs_parent_rec *pptr)
+{
+ if (!xfs_parent_namecheck(XFS_ATTR_PARENT, parent_name->name,
+ parent_name->len))
+ return false;
+
+ if (!xfs_parent_valuecheck(mp, pptr, sizeof(*pptr)))
+ return false;
+
+ return true;
+}
+
+
+/*
+ * Attach the parent pointer (@parent_name -> @pptr) to @ip immediately.
+ * Caller must not have a transaction or hold the ILOCK. This is for
+ * specialized repair functions only. The scratchpad need not be initialized.
+ */
+int
+xfs_parent_set(
+ struct xfs_inode *ip,
+ xfs_ino_t owner,
+ const struct xfs_name *parent_name,
+ struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch)
+{
+ if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ memset(scratch, 0, sizeof(struct xfs_da_args));
+ xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
+ return xfs_attr_set(scratch, XFS_ATTRUPDATE_CREATE, false);
+}
+
+/*
+ * Remove the parent pointer (@parent_name -> @pptr) from @ip immediately.
+ * Caller must not have a transaction or hold the ILOCK. This is for
+ * specialized repair functions only. The scratchpad need not be initialized.
+ */
+int
+xfs_parent_unset(
+ struct xfs_inode *ip,
+ xfs_ino_t owner,
+ const struct xfs_name *parent_name,
+ struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch)
+{
+ if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ memset(scratch, 0, sizeof(struct xfs_da_args));
+ xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
+ return xfs_attr_set(scratch, XFS_ATTRUPDATE_REMOVE, false);
+}
diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h
new file mode 100644
index 000000000000..b8036527cdc7
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_parent.h
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022-2024 Oracle.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_PARENT_H__
+#define __XFS_PARENT_H__
+
+/* Metadata validators */
+bool xfs_parent_namecheck(unsigned int attr_flags, const void *name,
+ size_t length);
+bool xfs_parent_valuecheck(struct xfs_mount *mp, const void *value,
+ size_t valuelen);
+
+xfs_dahash_t xfs_parent_hashval(struct xfs_mount *mp, const uint8_t *name,
+ int namelen, xfs_ino_t parent_ino);
+xfs_dahash_t xfs_parent_hashattr(struct xfs_mount *mp, const uint8_t *name,
+ int namelen, const void *value, int valuelen);
+
+/* Initializes a xfs_parent_rec to be stored as an attribute name. */
+static inline void
+xfs_parent_rec_init(
+ struct xfs_parent_rec *rec,
+ xfs_ino_t ino,
+ uint32_t gen)
+{
+ rec->p_ino = cpu_to_be64(ino);
+ rec->p_gen = cpu_to_be32(gen);
+}
+
+/* Initializes a xfs_parent_rec to be stored as an attribute name. */
+static inline void
+xfs_inode_to_parent_rec(
+ struct xfs_parent_rec *rec,
+ const struct xfs_inode *dp)
+{
+ xfs_parent_rec_init(rec, dp->i_ino, VFS_IC(dp)->i_generation);
+}
+
+extern struct kmem_cache *xfs_parent_args_cache;
+
+/*
+ * Parent pointer information needed to pass around the deferred xattr update
+ * machinery.
+ */
+struct xfs_parent_args {
+ struct xfs_parent_rec rec;
+ struct xfs_parent_rec new_rec;
+ struct xfs_da_args args;
+};
+
+/*
+ * Start a parent pointer update by allocating the context object we need to
+ * perform a parent pointer update.
+ */
+static inline int
+xfs_parent_start(
+ struct xfs_mount *mp,
+ struct xfs_parent_args **ppargsp)
+{
+ if (!xfs_has_parent(mp)) {
+ *ppargsp = NULL;
+ return 0;
+ }
+
+ *ppargsp = kmem_cache_zalloc(xfs_parent_args_cache, GFP_KERNEL);
+ if (!*ppargsp)
+ return -ENOMEM;
+ return 0;
+}
+
+/* Finish a parent pointer update by freeing the context object. */
+static inline void
+xfs_parent_finish(
+ struct xfs_mount *mp,
+ struct xfs_parent_args *ppargs)
+{
+ if (ppargs)
+ kmem_cache_free(xfs_parent_args_cache, ppargs);
+}
+
+int xfs_parent_addname(struct xfs_trans *tp, struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp, const struct xfs_name *parent_name,
+ struct xfs_inode *child);
+int xfs_parent_removename(struct xfs_trans *tp, struct xfs_parent_args *ppargs,
+ struct xfs_inode *dp, const struct xfs_name *parent_name,
+ struct xfs_inode *child);
+int xfs_parent_replacename(struct xfs_trans *tp,
+ struct xfs_parent_args *ppargs,
+ struct xfs_inode *old_dp, const struct xfs_name *old_name,
+ struct xfs_inode *new_dp, const struct xfs_name *new_name,
+ struct xfs_inode *child);
+
+int xfs_parent_from_attr(struct xfs_mount *mp, unsigned int attr_flags,
+ const unsigned char *name, unsigned int namelen,
+ const void *value, unsigned int valuelen,
+ xfs_ino_t *parent_ino, uint32_t *parent_gen);
+
+/* Repair functions */
+int xfs_parent_lookup(struct xfs_trans *tp, struct xfs_inode *ip,
+ const struct xfs_name *name, struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch);
+int xfs_parent_set(struct xfs_inode *ip, xfs_ino_t owner,
+ const struct xfs_name *name, struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch);
+int xfs_parent_unset(struct xfs_inode *ip, xfs_ino_t owner,
+ const struct xfs_name *name, struct xfs_parent_rec *pptr,
+ struct xfs_da_args *scratch);
+
+#endif /* __XFS_PARENT_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 6709a7f8bad5..511c912d515c 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -23,6 +23,7 @@
#include "xfs_refcount.h"
#include "xfs_rmap.h"
#include "xfs_ag.h"
+#include "xfs_health.h"
struct kmem_cache *xfs_refcount_intent_cache;
@@ -156,6 +157,7 @@ xfs_refcount_complain_bad_rec(
xfs_warn(mp,
"Start block 0x%x, block count 0x%x, references 0x%x",
irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@@ -238,6 +240,7 @@ xfs_refcount_insert(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -268,12 +271,14 @@ xfs_refcount_delete(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
error = xfs_btree_delete(cur, i);
if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -398,6 +403,7 @@ xfs_refcount_split_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -425,6 +431,7 @@ xfs_refcount_split_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -470,6 +477,7 @@ xfs_refcount_merge_center_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -478,6 +486,7 @@ xfs_refcount_merge_center_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -487,6 +496,7 @@ xfs_refcount_merge_center_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -498,6 +508,7 @@ xfs_refcount_merge_center_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -542,6 +553,7 @@ xfs_refcount_merge_left_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -550,6 +562,7 @@ xfs_refcount_merge_left_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -561,6 +574,7 @@ xfs_refcount_merge_left_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -608,6 +622,7 @@ xfs_refcount_merge_right_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -616,6 +631,7 @@ xfs_refcount_merge_right_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -627,6 +643,7 @@ xfs_refcount_merge_right_extent(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -674,6 +691,7 @@ xfs_refcount_find_left_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -693,6 +711,7 @@ xfs_refcount_find_left_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -767,6 +786,7 @@ xfs_refcount_find_right_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -786,6 +806,7 @@ xfs_refcount_find_right_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1056,7 +1077,7 @@ xfs_refcount_still_have_space(
* to handle each of the shape changes to the refcount btree.
*/
overhead = xfs_allocfree_block_count(cur->bc_mp,
- cur->bc_ag.refc.shape_changes);
+ cur->bc_refc.shape_changes);
overhead += cur->bc_mp->m_refc_maxlevels;
overhead *= cur->bc_mp->m_sb.sb_blocksize;
@@ -1064,17 +1085,17 @@ xfs_refcount_still_have_space(
* Only allow 2 refcount extent updates per transaction if the
* refcount continue update "error" has been injected.
*/
- if (cur->bc_ag.refc.nr_ops > 2 &&
+ if (cur->bc_refc.nr_ops > 2 &&
XFS_TEST_ERROR(false, cur->bc_mp,
XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
return false;
- if (cur->bc_ag.refc.nr_ops == 0)
+ if (cur->bc_refc.nr_ops == 0)
return true;
else if (overhead > cur->bc_tp->t_log_res)
return false;
- return cur->bc_tp->t_log_res - overhead >
- cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
+ return cur->bc_tp->t_log_res - overhead >
+ cur->bc_refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
}
/*
@@ -1134,7 +1155,7 @@ xfs_refcount_adjust_extents(
* Either cover the hole (increment) or
* delete the range (decrement).
*/
- cur->bc_ag.refc.nr_ops++;
+ cur->bc_refc.nr_ops++;
if (tmp.rc_refcount) {
error = xfs_refcount_insert(cur, &tmp,
&found_tmp);
@@ -1142,6 +1163,7 @@ xfs_refcount_adjust_extents(
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp,
found_tmp != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1180,6 +1202,7 @@ xfs_refcount_adjust_extents(
*/
if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1193,7 +1216,7 @@ xfs_refcount_adjust_extents(
ext.rc_refcount += adj;
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &ext);
- cur->bc_ag.refc.nr_ops++;
+ cur->bc_refc.nr_ops++;
if (ext.rc_refcount > 1) {
error = xfs_refcount_update(cur, &ext);
if (error)
@@ -1203,6 +1226,7 @@ xfs_refcount_adjust_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1281,7 +1305,7 @@ xfs_refcount_adjust(
if (shape_changed)
shape_changes++;
if (shape_changes)
- cur->bc_ag.refc.shape_changes++;
+ cur->bc_refc.shape_changes++;
/* Now that we've taken care of the ends, adjust the middle extents */
error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj);
@@ -1327,8 +1351,10 @@ xfs_refcount_continue_op(
struct xfs_perag *pag = cur->bc_ag.pag;
if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
- ri->ri_blockcount)))
+ ri->ri_blockcount))) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
@@ -1374,8 +1400,8 @@ xfs_refcount_finish_one(
*/
rcur = *pcur;
if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
- nr_ops = rcur->bc_ag.refc.nr_ops;
- shape_changes = rcur->bc_ag.refc.shape_changes;
+ nr_ops = rcur->bc_refc.nr_ops;
+ shape_changes = rcur->bc_refc.shape_changes;
xfs_refcount_finish_one_cleanup(tp, rcur, 0);
rcur = NULL;
*pcur = NULL;
@@ -1387,8 +1413,8 @@ xfs_refcount_finish_one(
return error;
rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
- rcur->bc_ag.refc.nr_ops = nr_ops;
- rcur->bc_ag.refc.shape_changes = shape_changes;
+ rcur->bc_refc.nr_ops = nr_ops;
+ rcur->bc_refc.shape_changes = shape_changes;
}
*pcur = rcur;
@@ -1449,7 +1475,7 @@ __xfs_refcount_add(
blockcount);
ri = kmem_cache_alloc(xfs_refcount_intent_cache,
- GFP_NOFS | __GFP_NOFAIL);
+ GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&ri->ri_list);
ri->ri_type = type;
ri->ri_startblock = startblock;
@@ -1535,6 +1561,7 @@ xfs_refcount_find_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1552,6 +1579,7 @@ xfs_refcount_find_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1585,6 +1613,7 @@ xfs_refcount_find_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1682,6 +1711,7 @@ xfs_refcount_adjust_cow_extents(
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1697,6 +1727,7 @@ xfs_refcount_adjust_cow_extents(
/* Adding a CoW reservation, there should be nothing here. */
if (XFS_IS_CORRUPT(cur->bc_mp,
agbno + aglen > ext.rc_startblock)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1714,6 +1745,7 @@ xfs_refcount_adjust_cow_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1721,14 +1753,17 @@ xfs_refcount_adjust_cow_extents(
case XFS_REFCOUNT_ADJUST_COW_FREE:
/* Removing a CoW reservation, there should be one extent. */
if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1740,6 +1775,7 @@ xfs_refcount_adjust_cow_extents(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1889,8 +1925,10 @@ xfs_refcount_recover_extent(
struct xfs_refcount_recovery *rr;
if (XFS_IS_CORRUPT(cur->bc_mp,
- be32_to_cpu(rec->refc.rc_refcount) != 1))
+ be32_to_cpu(rec->refc.rc_refcount) != 1)) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
rr = kmalloc(sizeof(struct xfs_refcount_recovery),
GFP_KERNEL | __GFP_NOFAIL);
@@ -1900,6 +1938,7 @@ xfs_refcount_recover_extent(
if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL ||
XFS_IS_CORRUPT(cur->bc_mp,
rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
+ xfs_btree_mark_sick(cur);
kfree(rr);
return -EFSCORRUPTED;
}
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 0d80bd99147c..ca59f6c89f3e 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -16,6 +16,7 @@
#include "xfs_refcount.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
+#include "xfs_health.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_bit.h"
@@ -77,8 +78,6 @@ xfs_refcountbt_alloc_block(
xfs_refc_block(args.mp)));
if (error)
goto out_error;
- trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- args.agbno, 1);
if (args.fsbno == NULLFSBLOCK) {
*stat = 0;
return 0;
@@ -107,8 +106,6 @@ xfs_refcountbt_free_block(
struct xfs_agf *agf = agbp->b_addr;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
- trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
- XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
@@ -220,7 +217,7 @@ xfs_refcountbt_verify(
if (!xfs_has_reflink(mp))
return __this_address;
- fa = xfs_btree_sblock_v5hdr_verify(bp);
+ fa = xfs_btree_agblock_v5hdr_verify(bp);
if (fa)
return fa;
@@ -242,7 +239,7 @@ xfs_refcountbt_verify(
} else if (level >= mp->m_refc_maxlevels)
return __this_address;
- return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]);
+ return xfs_btree_agblock_verify(bp, mp->m_refc_mxr[level != 0]);
}
STATIC void
@@ -251,7 +248,7 @@ xfs_refcountbt_read_verify(
{
xfs_failaddr_t fa;
- if (!xfs_btree_sblock_verify_crc(bp))
+ if (!xfs_btree_agblock_verify_crc(bp))
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_refcountbt_verify(bp);
@@ -275,7 +272,7 @@ xfs_refcountbt_write_verify(
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
- xfs_btree_sblock_calc_crc(bp);
+ xfs_btree_agblock_calc_crc(bp);
}
@@ -321,9 +318,17 @@ xfs_refcountbt_keys_contiguous(
be32_to_cpu(key2->refc.rc_startblock));
}
-static const struct xfs_btree_ops xfs_refcountbt_ops = {
+const struct xfs_btree_ops xfs_refcountbt_ops = {
+ .name = "refcount",
+ .type = XFS_BTREE_TYPE_AG,
+
.rec_len = sizeof(struct xfs_refcount_rec),
.key_len = sizeof(struct xfs_refcount_key),
+ .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
+
+ .lru_refs = XFS_REFC_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2),
+ .sick_mask = XFS_SICK_AG_REFCNTBT,
.dup_cursor = xfs_refcountbt_dup_cursor,
.set_root = xfs_refcountbt_set_root,
@@ -344,59 +349,32 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = {
};
/*
- * Initialize a new refcount btree cursor.
+ * Create a new refcount btree cursor.
+ *
+ * For staging cursors tp and agbp are NULL.
*/
-static struct xfs_btree_cur *
-xfs_refcountbt_init_common(
+struct xfs_btree_cur *
+xfs_refcountbt_init_cursor(
struct xfs_mount *mp,
struct xfs_trans *tp,
+ struct xfs_buf *agbp,
struct xfs_perag *pag)
{
struct xfs_btree_cur *cur;
ASSERT(pag->pag_agno < mp->m_sb.sb_agcount);
- cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_REFC,
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_refcountbt_ops,
mp->m_refc_maxlevels, xfs_refcountbt_cur_cache);
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2);
-
- cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
-
cur->bc_ag.pag = xfs_perag_hold(pag);
- cur->bc_ag.refc.nr_ops = 0;
- cur->bc_ag.refc.shape_changes = 0;
- cur->bc_ops = &xfs_refcountbt_ops;
- return cur;
-}
-
-/* Create a btree cursor. */
-struct xfs_btree_cur *
-xfs_refcountbt_init_cursor(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- struct xfs_buf *agbp,
- struct xfs_perag *pag)
-{
- struct xfs_agf *agf = agbp->b_addr;
- struct xfs_btree_cur *cur;
-
- cur = xfs_refcountbt_init_common(mp, tp, pag);
- cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level);
+ cur->bc_refc.nr_ops = 0;
+ cur->bc_refc.shape_changes = 0;
cur->bc_ag.agbp = agbp;
- return cur;
-}
+ if (agbp) {
+ struct xfs_agf *agf = agbp->b_addr;
-/* Create a btree cursor with a fake root for staging. */
-struct xfs_btree_cur *
-xfs_refcountbt_stage_cursor(
- struct xfs_mount *mp,
- struct xbtree_afakeroot *afake,
- struct xfs_perag *pag)
-{
- struct xfs_btree_cur *cur;
-
- cur = xfs_refcountbt_init_common(mp, NULL, pag);
- xfs_btree_stage_afakeroot(cur, afake);
+ cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level);
+ }
return cur;
}
@@ -421,7 +399,7 @@ xfs_refcountbt_commit_staged_btree(
xfs_alloc_log_agf(tp, agbp, XFS_AGF_REFCOUNT_BLOCKS |
XFS_AGF_REFCOUNT_ROOT |
XFS_AGF_REFCOUNT_LEVEL);
- xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_refcountbt_ops);
+ xfs_btree_commit_afakeroot(cur, tp, agbp);
}
/* Calculate number of records in a refcount btree block. */
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index d66b37259bed..1e0ab25f6c68 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -48,8 +48,6 @@ struct xbtree_afakeroot;
extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp,
struct xfs_trans *tp, struct xfs_buf *agbp,
struct xfs_perag *pag);
-struct xfs_btree_cur *xfs_refcountbt_stage_cursor(struct xfs_mount *mp,
- struct xbtree_afakeroot *afake, struct xfs_perag *pag);
extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf);
extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 76bf7f48cb5a..ef16f6f9cef6 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -23,6 +23,7 @@
#include "xfs_error.h"
#include "xfs_inode.h"
#include "xfs_ag.h"
+#include "xfs_health.h"
struct kmem_cache *xfs_rmap_intent_cache;
@@ -56,8 +57,10 @@ xfs_rmap_lookup_le(
error = xfs_rmap_get_rec(cur, irec, &get_stat);
if (error)
return error;
- if (!get_stat)
+ if (!get_stat) {
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
+ }
return 0;
}
@@ -132,6 +135,7 @@ xfs_rmap_insert(
if (error)
goto done;
if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) {
+ xfs_btree_mark_sick(rcur);
error = -EFSCORRUPTED;
goto done;
}
@@ -145,6 +149,7 @@ xfs_rmap_insert(
if (error)
goto done;
if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(rcur);
error = -EFSCORRUPTED;
goto done;
}
@@ -174,6 +179,7 @@ xfs_rmap_delete(
if (error)
goto done;
if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(rcur);
error = -EFSCORRUPTED;
goto done;
}
@@ -182,6 +188,7 @@ xfs_rmap_delete(
if (error)
goto done;
if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
+ xfs_btree_mark_sick(rcur);
error = -EFSCORRUPTED;
goto done;
}
@@ -208,10 +215,10 @@ xfs_rmap_btrec_to_irec(
/* Simple checks for rmap records. */
xfs_failaddr_t
xfs_rmap_check_irec(
- struct xfs_btree_cur *cur,
+ struct xfs_perag *pag,
const struct xfs_rmap_irec *irec)
{
- struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_mount *mp = pag->pag_mount;
bool is_inode;
bool is_unwritten;
bool is_bmbt;
@@ -226,8 +233,8 @@ xfs_rmap_check_irec(
return __this_address;
} else {
/* check for valid extent range, including overflow */
- if (!xfs_verify_agbext(cur->bc_ag.pag, irec->rm_startblock,
- irec->rm_blockcount))
+ if (!xfs_verify_agbext(pag, irec->rm_startblock,
+ irec->rm_blockcount))
return __this_address;
}
@@ -262,6 +269,16 @@ xfs_rmap_check_irec(
return NULL;
}
+static inline xfs_failaddr_t
+xfs_rmap_check_btrec(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *irec)
+{
+ if (xfs_btree_is_mem_rmap(cur->bc_ops))
+ return xfs_rmap_check_irec(cur->bc_mem.pag, irec);
+ return xfs_rmap_check_irec(cur->bc_ag.pag, irec);
+}
+
static inline int
xfs_rmap_complain_bad_rec(
struct xfs_btree_cur *cur,
@@ -270,13 +287,18 @@ xfs_rmap_complain_bad_rec(
{
struct xfs_mount *mp = cur->bc_mp;
- xfs_warn(mp,
- "Reverse Mapping BTree record corruption in AG %d detected at %pS!",
- cur->bc_ag.pag->pag_agno, fa);
+ if (xfs_btree_is_mem_rmap(cur->bc_ops))
+ xfs_warn(mp,
+ "In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa);
+ else
+ xfs_warn(mp,
+ "Reverse Mapping BTree record corruption in AG %d detected at %pS!",
+ cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
irec->rm_blockcount);
+ xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@@ -299,7 +321,7 @@ xfs_rmap_get_rec(
fa = xfs_rmap_btrec_to_irec(rec, irec);
if (!fa)
- fa = xfs_rmap_check_irec(cur, irec);
+ fa = xfs_rmap_check_btrec(cur, irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, irec);
@@ -512,7 +534,7 @@ xfs_rmap_lookup_le_range(
*/
static int
xfs_rmap_free_check_owner(
- struct xfs_mount *mp,
+ struct xfs_btree_cur *cur,
uint64_t ltoff,
struct xfs_rmap_irec *rec,
xfs_filblks_t len,
@@ -520,6 +542,7 @@ xfs_rmap_free_check_owner(
uint64_t offset,
unsigned int flags)
{
+ struct xfs_mount *mp = cur->bc_mp;
int error = 0;
if (owner == XFS_RMAP_OWN_UNKNOWN)
@@ -529,12 +552,14 @@ xfs_rmap_free_check_owner(
if (XFS_IS_CORRUPT(mp,
(flags & XFS_RMAP_UNWRITTEN) !=
(rec->rm_flags & XFS_RMAP_UNWRITTEN))) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out;
}
/* Make sure the owner matches what we expect to find in the tree. */
if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out;
}
@@ -546,16 +571,19 @@ xfs_rmap_free_check_owner(
if (flags & XFS_RMAP_BMBT_BLOCK) {
if (XFS_IS_CORRUPT(mp,
!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out;
}
} else {
if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out;
}
if (XFS_IS_CORRUPT(mp,
offset + len > ltoff + rec->rm_blockcount)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out;
}
@@ -618,6 +646,7 @@ xfs_rmap_unmap(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -639,6 +668,7 @@ xfs_rmap_unmap(
if (XFS_IS_CORRUPT(mp,
bno <
ltrec.rm_startblock + ltrec.rm_blockcount)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -665,6 +695,7 @@ xfs_rmap_unmap(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -677,12 +708,13 @@ xfs_rmap_unmap(
ltrec.rm_startblock > bno ||
ltrec.rm_startblock + ltrec.rm_blockcount <
bno + len)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
/* Check owner information. */
- error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, len, owner,
+ error = xfs_rmap_free_check_owner(cur, ltoff, &ltrec, len, owner,
offset, flags);
if (error)
goto out_error;
@@ -697,6 +729,7 @@ xfs_rmap_unmap(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -788,6 +821,86 @@ out_error:
return error;
}
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/*
+ * Use a static key here to reduce the overhead of rmapbt live updates. If
+ * the compiler supports jump labels, the static branch will be replaced by a
+ * nop sled when there are no hook users. Online fsck is currently the only
+ * caller, so this is a reasonable tradeoff.
+ *
+ * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
+ * parts of the kernel allocate memory with that lock held, which means that
+ * XFS callers cannot hold any locks that might be used by memory reclaim or
+ * writeback when calling the static_branch_{inc,dec} functions.
+ */
+DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_rmap_hooks_switch);
+
+void
+xfs_rmap_hook_disable(void)
+{
+ xfs_hooks_switch_off(&xfs_rmap_hooks_switch);
+}
+
+void
+xfs_rmap_hook_enable(void)
+{
+ xfs_hooks_switch_on(&xfs_rmap_hooks_switch);
+}
+
+/* Call downstream hooks for a reverse mapping update. */
+static inline void
+xfs_rmap_update_hook(
+ struct xfs_trans *tp,
+ struct xfs_perag *pag,
+ enum xfs_rmap_intent_type op,
+ xfs_agblock_t startblock,
+ xfs_extlen_t blockcount,
+ bool unwritten,
+ const struct xfs_owner_info *oinfo)
+{
+ if (xfs_hooks_switched_on(&xfs_rmap_hooks_switch)) {
+ struct xfs_rmap_update_params p = {
+ .startblock = startblock,
+ .blockcount = blockcount,
+ .unwritten = unwritten,
+ .oinfo = *oinfo, /* struct copy */
+ };
+
+ if (pag)
+ xfs_hooks_call(&pag->pag_rmap_update_hooks, op, &p);
+ }
+}
+
+/* Call the specified function during a reverse mapping update. */
+int
+xfs_rmap_hook_add(
+ struct xfs_perag *pag,
+ struct xfs_rmap_hook *hook)
+{
+ return xfs_hooks_add(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+}
+
+/* Stop calling the specified function during a reverse mapping update. */
+void
+xfs_rmap_hook_del(
+ struct xfs_perag *pag,
+ struct xfs_rmap_hook *hook)
+{
+ xfs_hooks_del(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+}
+
+/* Configure rmap update hook functions. */
+void
+xfs_rmap_hook_setup(
+ struct xfs_rmap_hook *hook,
+ notifier_fn_t mod_fn)
+{
+ xfs_hook_setup(&hook->rmap_hook, mod_fn);
+}
+#else
+# define xfs_rmap_update_hook(t, p, o, s, b, u, oi) do { } while (0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
/*
* Remove a reference to an extent in the rmap btree.
*/
@@ -808,7 +921,7 @@ xfs_rmap_free(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
-
+ xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo);
error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -900,6 +1013,7 @@ xfs_rmap_map(
if (XFS_IS_CORRUPT(mp,
have_lt != 0 &&
ltrec.rm_startblock + ltrec.rm_blockcount > bno)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -917,10 +1031,12 @@ xfs_rmap_map(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -974,6 +1090,7 @@ xfs_rmap_map(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1021,6 +1138,7 @@ xfs_rmap_map(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -1055,6 +1173,7 @@ xfs_rmap_alloc(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
+ xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo);
error = xfs_rmap_map(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -1116,6 +1235,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1153,12 +1273,14 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if (XFS_IS_CORRUPT(mp,
LEFT.rm_startblock + LEFT.rm_blockcount >
bno)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1181,6 +1303,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1193,10 +1316,12 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1227,6 +1352,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1246,6 +1372,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1257,6 +1384,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1264,6 +1392,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1275,6 +1404,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1282,6 +1412,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1305,6 +1436,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1312,6 +1444,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1331,6 +1464,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1342,6 +1476,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1349,6 +1484,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1419,6 +1555,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1461,6 +1598,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1476,6 +1614,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1509,6 +1648,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1522,6 +1662,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 0)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1534,6 +1675,7 @@ xfs_rmap_convert(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1606,6 +1748,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1634,6 +1777,7 @@ xfs_rmap_convert_shared(
if (XFS_IS_CORRUPT(mp,
LEFT.rm_startblock + LEFT.rm_blockcount >
bno)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1652,10 +1796,12 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1706,6 +1852,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1732,6 +1879,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1758,6 +1906,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1781,6 +1930,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1816,6 +1966,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1861,6 +2012,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1896,6 +2048,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -1934,6 +2087,7 @@ xfs_rmap_convert_shared(
if (error)
goto done;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto done;
}
@@ -2023,6 +2177,7 @@ xfs_rmap_unmap_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -2033,12 +2188,14 @@ xfs_rmap_unmap_shared(
ltrec.rm_startblock > bno ||
ltrec.rm_startblock + ltrec.rm_blockcount <
bno + len)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
/* Make sure the owner matches what we expect to find in the tree. */
if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -2047,16 +2204,19 @@ xfs_rmap_unmap_shared(
if (XFS_IS_CORRUPT(mp,
(flags & XFS_RMAP_UNWRITTEN) !=
(ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
/* Check the offset. */
if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -2113,6 +2273,7 @@ xfs_rmap_unmap_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -2142,6 +2303,7 @@ xfs_rmap_unmap_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -2221,6 +2383,7 @@ xfs_rmap_map_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -2273,6 +2436,7 @@ xfs_rmap_map_shared(
if (error)
goto out_error;
if (XFS_IS_CORRUPT(mp, i != 1)) {
+ xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto out_error;
}
@@ -2335,15 +2499,12 @@ xfs_rmap_map_raw(
{
struct xfs_owner_info oinfo;
- oinfo.oi_owner = rmap->rm_owner;
- oinfo.oi_offset = rmap->rm_offset;
- oinfo.oi_flags = 0;
- if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
- oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
- if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
- oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
+ xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset,
+ rmap->rm_flags);
- if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
+ if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
+ XFS_RMAP_UNWRITTEN)) ||
+ XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
return xfs_rmap_map(cur, rmap->rm_startblock,
rmap->rm_blockcount,
rmap->rm_flags & XFS_RMAP_UNWRITTEN,
@@ -2373,7 +2534,7 @@ xfs_rmap_query_range_helper(
fa = xfs_rmap_btrec_to_irec(rec, &irec);
if (!fa)
- fa = xfs_rmap_check_irec(cur, &irec);
+ fa = xfs_rmap_check_btrec(cur, &irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, &irec);
@@ -2428,6 +2589,38 @@ xfs_rmap_finish_one_cleanup(
xfs_trans_brelse(tp, agbp);
}
+/* Commit an rmap operation into the ondisk tree. */
+int
+__xfs_rmap_finish_intent(
+ struct xfs_btree_cur *rcur,
+ enum xfs_rmap_intent_type op,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo,
+ bool unwritten)
+{
+ switch (op) {
+ case XFS_RMAP_ALLOC:
+ case XFS_RMAP_MAP:
+ return xfs_rmap_map(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_MAP_SHARED:
+ return xfs_rmap_map_shared(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_FREE:
+ case XFS_RMAP_UNMAP:
+ return xfs_rmap_unmap(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_UNMAP_SHARED:
+ return xfs_rmap_unmap_shared(rcur, bno, len, unwritten, oinfo);
+ case XFS_RMAP_CONVERT:
+ return xfs_rmap_convert(rcur, bno, len, !unwritten, oinfo);
+ case XFS_RMAP_CONVERT_SHARED:
+ return xfs_rmap_convert_shared(rcur, bno, len, !unwritten,
+ oinfo);
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+}
+
/*
* Process one of the deferred rmap operations. We pass back the
* btree cursor to maintain our lock on the rmapbt between calls.
@@ -2476,10 +2669,14 @@ xfs_rmap_finish_one(
* allocate blocks.
*/
error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp);
- if (error)
+ if (error) {
+ xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL);
return error;
- if (XFS_IS_CORRUPT(tp->t_mountp, !agbp))
+ }
+ if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
+ xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL);
return -EFSCORRUPTED;
+ }
rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag);
}
@@ -2490,39 +2687,14 @@ xfs_rmap_finish_one(
unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN;
bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock);
- switch (ri->ri_type) {
- case XFS_RMAP_ALLOC:
- case XFS_RMAP_MAP:
- error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount,
- unwritten, &oinfo);
- break;
- case XFS_RMAP_MAP_SHARED:
- error = xfs_rmap_map_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, unwritten, &oinfo);
- break;
- case XFS_RMAP_FREE:
- case XFS_RMAP_UNMAP:
- error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount,
- unwritten, &oinfo);
- break;
- case XFS_RMAP_UNMAP_SHARED:
- error = xfs_rmap_unmap_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, unwritten, &oinfo);
- break;
- case XFS_RMAP_CONVERT:
- error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount,
- !unwritten, &oinfo);
- break;
- case XFS_RMAP_CONVERT_SHARED:
- error = xfs_rmap_convert_shared(rcur, bno,
- ri->ri_bmap.br_blockcount, !unwritten, &oinfo);
- break;
- default:
- ASSERT(0);
- error = -EFSCORRUPTED;
- }
+ error = __xfs_rmap_finish_intent(rcur, ri->ri_type, bno,
+ ri->ri_bmap.br_blockcount, &oinfo, unwritten);
+ if (error)
+ return error;
- return error;
+ xfs_rmap_update_hook(tp, ri->ri_pag, ri->ri_type, bno,
+ ri->ri_bmap.br_blockcount, unwritten, &oinfo);
+ return 0;
}
/*
@@ -2559,7 +2731,7 @@ __xfs_rmap_add(
bmap->br_blockcount,
bmap->br_state);
- ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
+ ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
INIT_LIST_HEAD(&ri->ri_list);
ri->ri_type = type;
ri->ri_owner = owner;
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 3c98d9d50afb..9d01fe689497 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -186,6 +186,10 @@ void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
struct xfs_btree_cur *rcur, int error);
int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur);
+int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur,
+ enum xfs_rmap_intent_type op, xfs_agblock_t bno,
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo,
+ bool unwritten);
int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno,
uint64_t owner, uint64_t offset, unsigned int flags,
@@ -195,7 +199,7 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a,
union xfs_btree_rec;
xfs_failaddr_t xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec);
-xfs_failaddr_t xfs_rmap_check_irec(struct xfs_btree_cur *cur,
+xfs_failaddr_t xfs_rmap_check_irec(struct xfs_perag *pag,
const struct xfs_rmap_irec *irec);
int xfs_rmap_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno,
@@ -235,4 +239,29 @@ extern struct kmem_cache *xfs_rmap_intent_cache;
int __init xfs_rmap_intent_init_cache(void);
void xfs_rmap_intent_destroy_cache(void);
+/*
+ * Parameters for tracking reverse mapping changes. The hook function arg
+ * parameter is enum xfs_rmap_intent_type, and the rest is below.
+ */
+struct xfs_rmap_update_params {
+ xfs_agblock_t startblock;
+ xfs_extlen_t blockcount;
+ struct xfs_owner_info oinfo;
+ bool unwritten;
+};
+
+#ifdef CONFIG_XFS_LIVE_HOOKS
+
+struct xfs_rmap_hook {
+ struct xfs_hook rmap_hook;
+};
+
+void xfs_rmap_hook_disable(void);
+void xfs_rmap_hook_enable(void);
+
+int xfs_rmap_hook_add(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
+void xfs_rmap_hook_del(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
+void xfs_rmap_hook_setup(struct xfs_rmap_hook *hook, notifier_fn_t mod_fn);
+#endif
+
#endif /* __XFS_RMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 6c81b20e97d2..9e759efa81cc 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -16,11 +16,14 @@
#include "xfs_btree_staging.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
+#include "xfs_health.h"
#include "xfs_trace.h"
#include "xfs_error.h"
#include "xfs_extent_busy.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
static struct kmem_cache *xfs_rmapbt_cur_cache;
@@ -65,13 +68,12 @@ xfs_rmapbt_set_root(
{
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
- int btnum = cur->bc_btnum;
ASSERT(ptr->s != 0);
- agf->agf_roots[btnum] = ptr->s;
- be32_add_cpu(&agf->agf_levels[btnum], inc);
- cur->bc_ag.pag->pagf_levels[btnum] += inc;
+ agf->agf_rmap_root = ptr->s;
+ be32_add_cpu(&agf->agf_rmap_level, inc);
+ cur->bc_ag.pag->pagf_rmap_level += inc;
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
@@ -94,8 +96,6 @@ xfs_rmapbt_alloc_block(
&bno, 1);
if (error)
return error;
-
- trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1);
if (bno == NULLAGBLOCK) {
*stat = 0;
return 0;
@@ -125,8 +125,6 @@ xfs_rmapbt_free_block(
int error;
bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
- trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno,
- bno, 1);
be32_add_cpu(&agf->agf_rmap_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
@@ -226,7 +224,7 @@ xfs_rmapbt_init_ptr_from_cur(
ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
- ptr->s = agf->agf_roots[cur->bc_btnum];
+ ptr->s = agf->agf_rmap_root;
}
/*
@@ -340,18 +338,29 @@ xfs_rmapbt_verify(
if (!xfs_has_rmapbt(mp))
return __this_address;
- fa = xfs_btree_sblock_v5hdr_verify(bp);
+ fa = xfs_btree_agblock_v5hdr_verify(bp);
if (fa)
return fa;
level = be16_to_cpu(block->bb_level);
if (pag && xfs_perag_initialised_agf(pag)) {
- if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi])
+ unsigned int maxlevel = pag->pagf_rmap_level;
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+ /*
+ * Online repair could be rewriting the free space btrees, so
+ * we'll validate against the larger of either tree while this
+ * is going on.
+ */
+ maxlevel = max_t(unsigned int, maxlevel,
+ pag->pagf_repair_rmap_level);
+#endif
+ if (level >= maxlevel)
return __this_address;
} else if (level >= mp->m_rmap_maxlevels)
return __this_address;
- return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
+ return xfs_btree_agblock_verify(bp, mp->m_rmap_mxr[level != 0]);
}
static void
@@ -360,7 +369,7 @@ xfs_rmapbt_read_verify(
{
xfs_failaddr_t fa;
- if (!xfs_btree_sblock_verify_crc(bp))
+ if (!xfs_btree_agblock_verify_crc(bp))
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
else {
fa = xfs_rmapbt_verify(bp);
@@ -384,7 +393,7 @@ xfs_rmapbt_write_verify(
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
return;
}
- xfs_btree_sblock_calc_crc(bp);
+ xfs_btree_agblock_calc_crc(bp);
}
@@ -476,9 +485,19 @@ xfs_rmapbt_keys_contiguous(
be32_to_cpu(key2->rmap.rm_startblock));
}
-static const struct xfs_btree_ops xfs_rmapbt_ops = {
+const struct xfs_btree_ops xfs_rmapbt_ops = {
+ .name = "rmap",
+ .type = XFS_BTREE_TYPE_AG,
+ .geom_flags = XFS_BTGEO_OVERLAPPING,
+
.rec_len = sizeof(struct xfs_rmap_rec),
+ /* Overlapping btree; 2 keys per pointer. */
.key_len = 2 * sizeof(struct xfs_rmap_key),
+ .ptr_len = XFS_BTREE_SHORT_PTR_LEN,
+
+ .lru_refs = XFS_RMAP_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_rmap_2),
+ .sick_mask = XFS_SICK_AG_RMAPBT,
.dup_cursor = xfs_rmapbt_dup_cursor,
.set_root = xfs_rmapbt_set_root,
@@ -498,55 +517,176 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = {
.keys_contiguous = xfs_rmapbt_keys_contiguous,
};
-static struct xfs_btree_cur *
-xfs_rmapbt_init_common(
+/*
+ * Create a new reverse mapping btree cursor.
+ *
+ * For staging cursors tp and agbp are NULL.
+ */
+struct xfs_btree_cur *
+xfs_rmapbt_init_cursor(
struct xfs_mount *mp,
struct xfs_trans *tp,
+ struct xfs_buf *agbp,
struct xfs_perag *pag)
{
struct xfs_btree_cur *cur;
- /* Overlapping btree; 2 keys per pointer. */
- cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_ops,
mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
- cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING;
- cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
- cur->bc_ops = &xfs_rmapbt_ops;
-
cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_ag.agbp = agbp;
+ if (agbp) {
+ struct xfs_agf *agf = agbp->b_addr;
+
+ cur->bc_nlevels = be32_to_cpu(agf->agf_rmap_level);
+ }
return cur;
}
-/* Create a new reverse mapping btree cursor. */
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+static inline unsigned int
+xfs_rmapbt_mem_block_maxrecs(
+ unsigned int blocklen,
+ bool leaf)
+{
+ if (leaf)
+ return blocklen / sizeof(struct xfs_rmap_rec);
+ return blocklen /
+ (2 * sizeof(struct xfs_rmap_key) + sizeof(__be64));
+}
+
+/*
+ * Validate an in-memory rmap btree block. Callers are allowed to generate an
+ * in-memory btree even if the ondisk feature is not enabled.
+ */
+static xfs_failaddr_t
+xfs_rmapbt_mem_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_failaddr_t fa;
+ unsigned int level;
+ unsigned int maxrecs;
+
+ if (!xfs_verify_magic(bp, block->bb_magic))
+ return __this_address;
+
+ fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+ if (fa)
+ return fa;
+
+ level = be16_to_cpu(block->bb_level);
+ if (level >= xfs_rmapbt_maxlevels_ondisk())
+ return __this_address;
+
+ maxrecs = xfs_rmapbt_mem_block_maxrecs(
+ XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN, level == 0);
+ return xfs_btree_memblock_verify(bp, maxrecs);
+}
+
+static void
+xfs_rmapbt_mem_rw_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa = xfs_rmapbt_mem_verify(bp);
+
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+}
+
+/* skip crc checks on in-memory btrees to save time */
+static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
+ .name = "xfs_rmapbt_mem",
+ .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
+ .verify_read = xfs_rmapbt_mem_rw_verify,
+ .verify_write = xfs_rmapbt_mem_rw_verify,
+ .verify_struct = xfs_rmapbt_mem_verify,
+};
+
+const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
+ .name = "mem_rmap",
+ .type = XFS_BTREE_TYPE_MEM,
+ .geom_flags = XFS_BTGEO_OVERLAPPING,
+
+ .rec_len = sizeof(struct xfs_rmap_rec),
+ /* Overlapping btree; 2 keys per pointer. */
+ .key_len = 2 * sizeof(struct xfs_rmap_key),
+ .ptr_len = XFS_BTREE_LONG_PTR_LEN,
+
+ .lru_refs = XFS_RMAP_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_rmap_mem_2),
+
+ .dup_cursor = xfbtree_dup_cursor,
+ .set_root = xfbtree_set_root,
+ .alloc_block = xfbtree_alloc_block,
+ .free_block = xfbtree_free_block,
+ .get_minrecs = xfbtree_get_minrecs,
+ .get_maxrecs = xfbtree_get_maxrecs,
+ .init_key_from_rec = xfs_rmapbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfbtree_init_ptr_from_cur,
+ .key_diff = xfs_rmapbt_key_diff,
+ .buf_ops = &xfs_rmapbt_mem_buf_ops,
+ .diff_two_keys = xfs_rmapbt_diff_two_keys,
+ .keys_inorder = xfs_rmapbt_keys_inorder,
+ .recs_inorder = xfs_rmapbt_recs_inorder,
+ .keys_contiguous = xfs_rmapbt_keys_contiguous,
+};
+
+/* Create a cursor for an in-memory btree. */
struct xfs_btree_cur *
-xfs_rmapbt_init_cursor(
- struct xfs_mount *mp,
+xfs_rmapbt_mem_cursor(
+ struct xfs_perag *pag,
struct xfs_trans *tp,
- struct xfs_buf *agbp,
- struct xfs_perag *pag)
+ struct xfbtree *xfbt)
{
- struct xfs_agf *agf = agbp->b_addr;
struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = pag->pag_mount;
- cur = xfs_rmapbt_init_common(mp, tp, pag);
- cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
- cur->bc_ag.agbp = agbp;
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops,
+ xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache);
+ cur->bc_mem.xfbtree = xfbt;
+ cur->bc_nlevels = xfbt->nlevels;
+
+ cur->bc_mem.pag = xfs_perag_hold(pag);
return cur;
}
-/* Create a new reverse mapping btree cursor with a fake root for staging. */
-struct xfs_btree_cur *
-xfs_rmapbt_stage_cursor(
+/* Create an in-memory rmap btree. */
+int
+xfs_rmapbt_mem_init(
struct xfs_mount *mp,
- struct xbtree_afakeroot *afake,
- struct xfs_perag *pag)
+ struct xfbtree *xfbt,
+ struct xfs_buftarg *btp,
+ xfs_agnumber_t agno)
{
- struct xfs_btree_cur *cur;
+ xfbt->owner = agno;
+ return xfbtree_init(mp, xfbt, btp, &xfs_rmapbt_mem_ops);
+}
- cur = xfs_rmapbt_init_common(mp, NULL, pag);
- xfs_btree_stage_afakeroot(cur, afake);
- return cur;
+/* Compute the max possible height for reverse mapping btrees in memory. */
+static unsigned int
+xfs_rmapbt_mem_maxlevels(void)
+{
+ unsigned int minrecs[2];
+ unsigned int blocklen;
+
+ blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
+
+ minrecs[0] = xfs_rmapbt_mem_block_maxrecs(blocklen, true) / 2;
+ minrecs[1] = xfs_rmapbt_mem_block_maxrecs(blocklen, false) / 2;
+
+ /*
+ * How tall can an in-memory rmap btree become if we filled the entire
+ * AG with rmap records?
+ */
+ return xfs_btree_compute_maxlevels(minrecs,
+ XFS_MAX_AG_BYTES / sizeof(struct xfs_rmap_rec));
}
+#else
+# define xfs_rmapbt_mem_maxlevels() (0)
+#endif /* CONFIG_XFS_BTREE_IN_MEM */
/*
* Install a new reverse mapping btree root. Caller is responsible for
@@ -563,12 +703,12 @@ xfs_rmapbt_commit_staged_btree(
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
- agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
- agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
+ agf->agf_rmap_root = cpu_to_be32(afake->af_root);
+ agf->agf_rmap_level = cpu_to_be32(afake->af_levels);
agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
XFS_AGF_RMAP_BLOCKS);
- xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
+ xfs_btree_commit_afakeroot(cur, tp, agbp);
}
/* Calculate number of records in a reverse mapping btree block. */
@@ -618,7 +758,8 @@ xfs_rmapbt_maxlevels_ondisk(void)
* like if it consumes almost all the blocks in the AG due to maximal
* sharing factor.
*/
- return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
+ return max(xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS),
+ xfs_rmapbt_mem_maxlevels());
}
/* Compute the maximum height of an rmap btree. */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 3244715dd111..eb90d89e8086 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -10,6 +10,7 @@ struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
struct xbtree_afakeroot;
+struct xfbtree;
/* rmaps only exist on crc enabled filesystems */
#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
@@ -44,8 +45,6 @@ struct xbtree_afakeroot;
struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_perag *pag);
-struct xfs_btree_cur *xfs_rmapbt_stage_cursor(struct xfs_mount *mp,
- struct xbtree_afakeroot *afake, struct xfs_perag *pag);
void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, struct xfs_buf *agbp);
int xfs_rmapbt_maxrecs(int blocklen, int leaf);
@@ -64,4 +63,9 @@ unsigned int xfs_rmapbt_maxlevels_ondisk(void);
int __init xfs_rmapbt_init_cur_cache(void);
void xfs_rmapbt_destroy_cur_cache(void);
+struct xfs_btree_cur *xfs_rmapbt_mem_cursor(struct xfs_perag *pag,
+ struct xfs_trans *tp, struct xfbtree *xfbtree);
+int xfs_rmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
+ struct xfs_buftarg *btp, xfs_agnumber_t agno);
+
#endif /* __XFS_RMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index e31663cb7b43..386b672c5058 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -17,6 +17,7 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rtbitmap.h"
+#include "xfs_health.h"
/*
* Realtime allocator bitmap functions shared with userspace.
@@ -115,13 +116,19 @@ xfs_rtbuf_get(
if (error)
return error;
- if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map)))
+ if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) {
+ xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
+ XFS_SICK_RT_BITMAP);
return -EFSCORRUPTED;
+ }
ASSERT(map.br_startblock != NULLFSBLOCK);
error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map.br_startblock),
mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
+ XFS_SICK_RT_BITMAP);
if (error)
return error;
@@ -934,7 +941,7 @@ xfs_rtfree_extent(
struct timespec64 atime;
ASSERT(mp->m_rbmip->i_itemp != NULL);
- ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(mp->m_rbmip, XFS_ILOCK_EXCL);
error = xfs_rtcheck_alloc_range(&args, start, len);
if (error)
@@ -1161,3 +1168,60 @@ xfs_rtsummary_wordcount(
blocks = xfs_rtsummary_blockcount(mp, rsumlevels, rbmblocks);
return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG;
}
+
+/*
+ * Lock both realtime free space metadata inodes for a freespace update. If a
+ * transaction is given, the inodes will be joined to the transaction and the
+ * ILOCKs will be released on transaction commit.
+ */
+void
+xfs_rtbitmap_lock(
+ struct xfs_trans *tp,
+ struct xfs_mount *mp)
+{
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
+ if (tp)
+ xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+
+ xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
+ if (tp)
+ xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+}
+
+/* Unlock both realtime free space metadata inodes after a freespace update. */
+void
+xfs_rtbitmap_unlock(
+ struct xfs_mount *mp)
+{
+ xfs_iunlock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
+}
+
+/*
+ * Lock the realtime free space metadata inodes for a freespace scan. Callers
+ * must walk metadata blocks in order of increasing file offset.
+ */
+void
+xfs_rtbitmap_lock_shared(
+ struct xfs_mount *mp,
+ unsigned int rbmlock_flags)
+{
+ if (rbmlock_flags & XFS_RBMLOCK_BITMAP)
+ xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+
+ if (rbmlock_flags & XFS_RBMLOCK_SUMMARY)
+ xfs_ilock(mp->m_rsumip, XFS_ILOCK_SHARED | XFS_ILOCK_RTSUM);
+}
+
+/* Unlock the realtime free space metadata inodes after a freespace scan. */
+void
+xfs_rtbitmap_unlock_shared(
+ struct xfs_mount *mp,
+ unsigned int rbmlock_flags)
+{
+ if (rbmlock_flags & XFS_RBMLOCK_SUMMARY)
+ xfs_iunlock(mp->m_rsumip, XFS_ILOCK_SHARED | XFS_ILOCK_RTSUM);
+
+ if (rbmlock_flags & XFS_RBMLOCK_BITMAP)
+ xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
index 152a66750af5..6186585f2c37 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -360,6 +360,19 @@ xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp,
unsigned int rsumlevels, xfs_extlen_t rbmblocks);
unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
unsigned int rsumlevels, xfs_extlen_t rbmblocks);
+
+void xfs_rtbitmap_lock(struct xfs_trans *tp, struct xfs_mount *mp);
+void xfs_rtbitmap_unlock(struct xfs_mount *mp);
+
+/* Lock the rt bitmap inode in shared mode */
+#define XFS_RBMLOCK_BITMAP (1U << 0)
+/* Lock the rt summary inode in shared mode */
+#define XFS_RBMLOCK_SUMMARY (1U << 1)
+
+void xfs_rtbitmap_lock_shared(struct xfs_mount *mp,
+ unsigned int rbmlock_flags);
+void xfs_rtbitmap_unlock_shared(struct xfs_mount *mp,
+ unsigned int rbmlock_flags);
#else /* CONFIG_XFS_RT */
# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
@@ -378,6 +391,10 @@ xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
# define xfs_rtbitmap_wordcount(mp, r) (0)
# define xfs_rtsummary_blockcount(mp, l, b) (0)
# define xfs_rtsummary_wordcount(mp, l, b) (0)
+# define xfs_rtbitmap_lock(tp, mp) do { } while (0)
+# define xfs_rtbitmap_unlock(mp) do { } while (0)
+# define xfs_rtbitmap_lock_shared(mp, lf) do { } while (0)
+# define xfs_rtbitmap_unlock_shared(mp, lf) do { } while (0)
#endif /* CONFIG_XFS_RT */
#endif /* __XFS_RTBITMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 5bb6e2bd6dee..09e4bf949bf8 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -26,6 +26,7 @@
#include "xfs_health.h"
#include "xfs_ag.h"
#include "xfs_rtbitmap.h"
+#include "xfs_exchrange.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -175,6 +176,10 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_NEEDSREPAIR;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NREXT64)
features |= XFS_FEAT_NREXT64;
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)
+ features |= XFS_FEAT_EXCHANGE_RANGE;
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT)
+ features |= XFS_FEAT_PARENT;
return features;
}
@@ -530,7 +535,8 @@ xfs_validate_sb_common(
}
if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit),
- XFS_FSB_TO_B(mp, sbp->sb_width), 0, false))
+ XFS_FSB_TO_B(mp, sbp->sb_width), 0,
+ xfs_buf_daddr(bp) == XFS_SB_DADDR, false))
return -EFSCORRUPTED;
/*
@@ -1250,6 +1256,8 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME;
if (xfs_has_inobtcounts(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT;
+ if (xfs_has_parent(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_PARENT;
if (xfs_has_sector(mp)) {
geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
geo->logsectsize = sbp->sb_logsectsize;
@@ -1258,6 +1266,8 @@ xfs_fs_geometry(
}
if (xfs_has_large_extent_counts(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64;
+ if (xfs_has_exchange_range(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE;
geo->rtsectsize = sbp->sb_blocksize;
geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
@@ -1290,6 +1300,8 @@ xfs_sb_read_secondary(
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_agno_mark_sick(mp, agno, XFS_SICK_AG_SB);
if (error)
return error;
xfs_buf_set_ref(bp, XFS_SSB_REF);
@@ -1321,8 +1333,10 @@ xfs_sb_get_secondary(
}
/*
- * sunit, swidth, sectorsize(optional with 0) should be all in bytes,
- * so users won't be confused by values in error messages.
+ * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users
+ * won't be confused by values in error messages. This function returns false
+ * if the stripe geometry is invalid and the caller is unable to repair the
+ * stripe configuration later in the mount process.
*/
bool
xfs_validate_stripe_geometry(
@@ -1330,20 +1344,21 @@ xfs_validate_stripe_geometry(
__s64 sunit,
__s64 swidth,
int sectorsize,
+ bool may_repair,
bool silent)
{
if (swidth > INT_MAX) {
if (!silent)
xfs_notice(mp,
"stripe width (%lld) is too large", swidth);
- return false;
+ goto check_override;
}
if (sunit > swidth) {
if (!silent)
xfs_notice(mp,
"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth);
- return false;
+ goto check_override;
}
if (sectorsize && (int)sunit % sectorsize) {
@@ -1351,21 +1366,21 @@ xfs_validate_stripe_geometry(
xfs_notice(mp,
"stripe unit (%lld) must be a multiple of the sector size (%d)",
sunit, sectorsize);
- return false;
+ goto check_override;
}
if (sunit && !swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe unit (%lld) and stripe width of 0", sunit);
- return false;
+ goto check_override;
}
if (!sunit && swidth) {
if (!silent)
xfs_notice(mp,
"invalid stripe width (%lld) and stripe unit of 0", swidth);
- return false;
+ goto check_override;
}
if (sunit && (int)swidth % (int)sunit) {
@@ -1373,9 +1388,27 @@ xfs_validate_stripe_geometry(
xfs_notice(mp,
"stripe width (%lld) must be a multiple of the stripe unit (%lld)",
swidth, sunit);
- return false;
+ goto check_override;
}
return true;
+
+check_override:
+ if (!may_repair)
+ return false;
+ /*
+ * During mount, mp->m_dalign will not be set unless the sunit mount
+ * option was set. If it was set, ignore the bad stripe alignment values
+ * and allow the validation and overwrite later in the mount process to
+ * attempt to overwrite the bad stripe alignment values with the values
+ * supplied by mount options.
+ */
+ if (!mp->m_dalign)
+ return false;
+ if (!silent)
+ xfs_notice(mp,
+"Will try to correct with specified mount options sunit (%d) and swidth (%d)",
+ BBTOB(mp->m_dalign), BBTOB(mp->m_swidth));
+ return true;
}
/*
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 2e8e8d63d4eb..37b1ed1bc209 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp,
struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf **bpp);
-extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
- __s64 sunit, __s64 swidth, int sectorsize, bool silent);
+bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
+ __s64 sunit, __s64 swidth, int sectorsize, bool may_repair,
+ bool silent);
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 4220d3584c1b..34f104ed372c 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -43,6 +43,60 @@ extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
+/* btree ops */
+extern const struct xfs_btree_ops xfs_bnobt_ops;
+extern const struct xfs_btree_ops xfs_cntbt_ops;
+extern const struct xfs_btree_ops xfs_inobt_ops;
+extern const struct xfs_btree_ops xfs_finobt_ops;
+extern const struct xfs_btree_ops xfs_bmbt_ops;
+extern const struct xfs_btree_ops xfs_refcountbt_ops;
+extern const struct xfs_btree_ops xfs_rmapbt_ops;
+extern const struct xfs_btree_ops xfs_rmapbt_mem_ops;
+
+static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_bnobt_ops;
+}
+
+static inline bool xfs_btree_is_cnt(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_cntbt_ops;
+}
+
+static inline bool xfs_btree_is_bmap(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_bmbt_ops;
+}
+
+static inline bool xfs_btree_is_ino(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_inobt_ops;
+}
+
+static inline bool xfs_btree_is_fino(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_finobt_ops;
+}
+
+static inline bool xfs_btree_is_refcount(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_refcountbt_ops;
+}
+
+static inline bool xfs_btree_is_rmap(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_rmapbt_ops;
+}
+
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_rmapbt_mem_ops;
+}
+#else
+# define xfs_btree_is_mem_rmap(...) (false)
+#endif
+
/* log size calculation functions */
int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
int xfs_log_calc_minimum_size(struct xfs_mount *);
@@ -70,7 +124,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_TRANS_RES_FDBLKS (1u << 6)
/* Transaction contains an intent done log item */
#define XFS_TRANS_HAS_INTENT_DONE (1u << 7)
-
/*
* LOWMODE is used by the allocator to activate the lowspace algorithm - when
* free space is running low the extent allocator may choose to allocate an
@@ -82,7 +135,10 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
* for free space from AG 0. If the correct transaction reservations have been
* made then this algorithm will eventually find all the space it needs.
*/
-#define XFS_TRANS_LOWMODE 0x100 /* allocate in low space mode */
+#define XFS_TRANS_LOWMODE (1u << 8)
+
+/* Transaction has locked the rtbitmap and rtsum inodes */
+#define XFS_TRANS_RTBITMAP_LOCKED (1u << 9)
/*
* Field values for xfs_trans_mod_sb.
@@ -128,19 +184,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */
#define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */
-
-/*
- * Symlink decoding/encoding functions
- */
-int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
-int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
- uint32_t size, struct xfs_buf *bp);
-bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
- uint32_t size, struct xfs_buf *bp);
-void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
- struct xfs_inode *ip, struct xfs_ifork *ifp);
-xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size);
-
/* Computed inode geometry for the filesystem. */
struct xfs_ino_geometry {
/* Maximum inode count in this filesystem. */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 160aa20aa441..f228127a88ff 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -16,7 +16,10 @@
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
-
+#include "xfs_symlink_remote.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_health.h"
/*
* Each contiguous block has a header, so it is not just a simple pathlen
@@ -166,7 +169,8 @@ xfs_symlink_local_to_remote(
struct xfs_trans *tp,
struct xfs_buf *bp,
struct xfs_inode *ip,
- struct xfs_ifork *ifp)
+ struct xfs_ifork *ifp,
+ void *priv)
{
struct xfs_mount *mp = ip->i_mount;
char *buf;
@@ -227,3 +231,200 @@ xfs_symlink_shortform_verify(
return __this_address;
return NULL;
}
+
+/* Read a remote symlink target into the buffer. */
+int
+xfs_symlink_remote_read(
+ struct xfs_inode *ip,
+ char *link)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
+ struct xfs_buf *bp;
+ xfs_daddr_t d;
+ char *cur_chunk;
+ int pathlen = ip->i_disk_size;
+ int nmaps = XFS_SYMLINK_MAPS;
+ int byte_cnt;
+ int n;
+ int error = 0;
+ int fsblocks = 0;
+ int offset;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
+
+ fsblocks = xfs_symlink_blocks(mp, pathlen);
+ error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0);
+ if (error)
+ goto out;
+
+ offset = 0;
+ for (n = 0; n < nmaps; n++) {
+ d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
+ byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
+
+ error = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
+ &bp, &xfs_symlink_buf_ops);
+ if (xfs_metadata_is_sick(error))
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK);
+ if (error)
+ return error;
+ byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
+ if (pathlen < byte_cnt)
+ byte_cnt = pathlen;
+
+ cur_chunk = bp->b_addr;
+ if (xfs_has_crc(mp)) {
+ if (!xfs_symlink_hdr_ok(ip->i_ino, offset,
+ byte_cnt, bp)) {
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK);
+ error = -EFSCORRUPTED;
+ xfs_alert(mp,
+"symlink header does not match required off/len/owner (0x%x/0x%x,0x%llx)",
+ offset, byte_cnt, ip->i_ino);
+ xfs_buf_relse(bp);
+ goto out;
+
+ }
+
+ cur_chunk += sizeof(struct xfs_dsymlink_hdr);
+ }
+
+ memcpy(link + offset, cur_chunk, byte_cnt);
+
+ pathlen -= byte_cnt;
+ offset += byte_cnt;
+
+ xfs_buf_relse(bp);
+ }
+ ASSERT(pathlen == 0);
+
+ link[ip->i_disk_size] = '\0';
+ error = 0;
+
+ out:
+ return error;
+}
+
+/* Write the symlink target into the inode. */
+int
+xfs_symlink_write_target(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ xfs_ino_t owner,
+ const char *target_path,
+ int pathlen,
+ xfs_fsblock_t fs_blocks,
+ uint resblks)
+{
+ struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
+ struct xfs_mount *mp = tp->t_mountp;
+ const char *cur_chunk;
+ struct xfs_buf *bp;
+ xfs_daddr_t d;
+ int byte_cnt;
+ int nmaps;
+ int offset = 0;
+ int n;
+ int error;
+
+ /*
+ * If the symlink will fit into the inode, write it inline.
+ */
+ if (pathlen <= xfs_inode_data_fork_size(ip)) {
+ xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen);
+
+ ip->i_disk_size = pathlen;
+ ip->i_df.if_format = XFS_DINODE_FMT_LOCAL;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
+ return 0;
+ }
+
+ nmaps = XFS_SYMLINK_MAPS;
+ error = xfs_bmapi_write(tp, ip, 0, fs_blocks, XFS_BMAPI_METADATA,
+ resblks, mval, &nmaps);
+ if (error)
+ return error;
+
+ ip->i_disk_size = pathlen;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+ cur_chunk = target_path;
+ offset = 0;
+ for (n = 0; n < nmaps; n++) {
+ char *buf;
+
+ d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
+ byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+ BTOBB(byte_cnt), 0, &bp);
+ if (error)
+ return error;
+ bp->b_ops = &xfs_symlink_buf_ops;
+
+ byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
+ byte_cnt = min(byte_cnt, pathlen);
+
+ buf = bp->b_addr;
+ buf += xfs_symlink_hdr_set(mp, owner, offset, byte_cnt, bp);
+
+ memcpy(buf, cur_chunk, byte_cnt);
+
+ cur_chunk += byte_cnt;
+ pathlen -= byte_cnt;
+ offset += byte_cnt;
+
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF);
+ xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
+ (char *)bp->b_addr);
+ }
+ ASSERT(pathlen == 0);
+ return 0;
+}
+
+/* Remove all the blocks from a symlink and invalidate buffers. */
+int
+xfs_symlink_remote_truncate(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS];
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *bp;
+ int nmaps = XFS_SYMLINK_MAPS;
+ int done = 0;
+ int i;
+ int error;
+
+ /* Read mappings and invalidate buffers. */
+ error = xfs_bmapi_read(ip, 0, XFS_MAX_FILEOFF, mval, &nmaps, 0);
+ if (error)
+ return error;
+
+ for (i = 0; i < nmaps; i++) {
+ if (!xfs_bmap_is_real_extent(&mval[i]))
+ break;
+
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
+ XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0,
+ &bp);
+ if (error)
+ return error;
+
+ xfs_trans_binval(tp, bp);
+ }
+
+ /* Unmap the remote blocks. */
+ error = xfs_bunmapi(tp, ip, 0, XFS_MAX_FILEOFF, 0, nmaps, &done);
+ if (error)
+ return error;
+ if (!done) {
+ ASSERT(done);
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK);
+ return -EFSCORRUPTED;
+ }
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.h b/fs/xfs/libxfs/xfs_symlink_remote.h
new file mode 100644
index 000000000000..c1672fe1f17b
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_symlink_remote.h
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_SYMLINK_REMOTE_H
+#define __XFS_SYMLINK_REMOTE_H
+
+/*
+ * Symlink decoding/encoding functions
+ */
+int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
+int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+ uint32_t size, struct xfs_buf *bp);
+bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset,
+ uint32_t size, struct xfs_buf *bp);
+void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
+ struct xfs_inode *ip, struct xfs_ifork *ifp,
+ void *priv);
+xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size);
+int xfs_symlink_remote_read(struct xfs_inode *ip, char *link);
+int xfs_symlink_write_target(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_ino_t owner, const char *target_path, int pathlen,
+ xfs_fsblock_t fs_blocks, uint resblks);
+int xfs_symlink_remote_truncate(struct xfs_trans *tp, struct xfs_inode *ip);
+
+#endif /* __XFS_SYMLINK_REMOTE_H */
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 70e97ea6eee7..69fc5b981352 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -31,7 +31,7 @@ xfs_trans_ijoin(
{
struct xfs_inode_log_item *iip;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
if (ip->i_itemp == NULL)
xfs_inode_item_init(ip, ip->i_mount);
iip = ip->i_itemp;
@@ -60,7 +60,7 @@ xfs_trans_ichgtime(
struct timespec64 tv;
ASSERT(tp);
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
tv = current_time(inode);
@@ -90,7 +90,7 @@ xfs_trans_log_inode(
struct inode *inode = VFS_I(ip);
ASSERT(iip);
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
tp->t_flags |= XFS_TRANS_DIRTY;
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 6cd45e8c118d..6dbe6e7251e7 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -20,6 +20,9 @@
#include "xfs_qm.h"
#include "xfs_trans_space.h"
#include "xfs_rtbitmap.h"
+#include "xfs_attr_item.h"
+#include "xfs_log.h"
+#include "xfs_da_format.h"
#define _ALLOC true
#define _FREE false
@@ -422,29 +425,110 @@ xfs_calc_itruncate_reservation_minlogsize(
return xfs_calc_itruncate_reservation(mp, true);
}
+static inline unsigned int xfs_calc_pptr_link_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+static inline unsigned int xfs_calc_pptr_replace_overhead(void)
+{
+ return sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1) +
+ xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
+ xlog_calc_iovec_len(MAXNAMELEN - 1);
+}
+
/*
* In renaming a files we can modify:
* the five inodes involved: 5 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
- * of bmap blocks) giving:
+ * of bmap blocks) giving (t2):
* the agf for the ags in which the blocks live: 3 * sector size
* the agfl for the ags in which the blocks live: 3 * sector size
* the superblock for the free block count: sector size
* the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ * If parent pointers are enabled (t3), then each transaction in the chain
+ * must be capable of setting or removing the extended attribute
+ * containing the parent information. It must also be able to handle
+ * the three xattr intent items that track the progress of the parent
+ * pointer update.
*/
STATIC uint
xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 5) +
- xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ t1 = xfs_calc_inode_res(mp, 5) +
+ xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
+ XFS_FSB_TO_B(mp, 1));
+
+ t2 = xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
+ XFS_FSB_TO_B(mp, 1));
+
+ if (xfs_has_parent(mp)) {
+ unsigned int rename_overhead, exchange_overhead;
+
+ t3 = max(resp->tr_attrsetm.tr_logres,
+ resp->tr_attrrm.tr_logres);
+
+ /*
+ * For a standard rename, the three xattr intent log items
+ * are (1) replacing the pptr for the source file; (2)
+ * removing the pptr on the dest file; and (3) adding a
+ * pptr for the whiteout file in the src dir.
+ *
+ * For an RENAME_EXCHANGE, there are two xattr intent
+ * items to replace the pptr for both src and dest
+ * files. Link counts don't change and there is no
+ * whiteout.
+ *
+ * In the worst case we can end up relogging all log
+ * intent items to allow the log tail to move ahead, so
+ * they become overhead added to each transaction in a
+ * processing chain.
+ */
+ rename_overhead = xfs_calc_pptr_replace_overhead() +
+ xfs_calc_pptr_unlink_overhead() +
+ xfs_calc_pptr_link_overhead();
+ exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
+
+ overhead += max(rename_overhead, exchange_overhead);
+ }
+
+ return overhead + max3(t1, t2, t3);
+}
+
+static inline unsigned int
+xfs_rename_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ /* One for the rename, one more for freeing blocks */
+ unsigned int ret = XFS_RENAME_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to remove or add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += max(resp->tr_attrsetm.tr_logcount,
+ resp->tr_attrrm.tr_logcount);
+
+ return ret;
}
/*
@@ -461,6 +545,23 @@ xfs_calc_iunlink_remove_reservation(
2 * M_IGEO(mp)->inode_cluster_size;
}
+static inline unsigned int
+xfs_link_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_LINK_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
/*
* For creating a link to an inode:
* the parent directory inode: inode size
@@ -477,14 +578,23 @@ STATIC uint
xfs_calc_link_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- xfs_calc_iunlink_remove_reservation(mp) +
- max((xfs_calc_inode_res(mp, 2) +
- xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ overhead += xfs_calc_iunlink_remove_reservation(mp);
+ t1 = xfs_calc_inode_res(mp, 2) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrsetm.tr_logres;
+ overhead += xfs_calc_pptr_link_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
/*
@@ -499,6 +609,23 @@ xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
M_IGEO(mp)->inode_cluster_size;
}
+static inline unsigned int
+xfs_remove_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_REMOVE_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrrm.tr_logcount;
+
+ return ret;
+}
+
/*
* For removing a directory entry we can modify:
* the parent directory inode: inode size
@@ -515,14 +642,24 @@ STATIC uint
xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- xfs_calc_iunlink_add_reservation(mp) +
- max((xfs_calc_inode_res(mp, 2) +
- xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ overhead += xfs_calc_iunlink_add_reservation(mp);
+
+ t1 = xfs_calc_inode_res(mp, 2) +
+ xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+ t2 = xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
+ XFS_FSB_TO_B(mp, 1));
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrrm.tr_logres;
+ overhead += xfs_calc_pptr_unlink_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
/*
@@ -571,12 +708,40 @@ xfs_calc_icreate_resv_alloc(
xfs_calc_finobt_res(mp);
}
+static inline unsigned int
+xfs_icreate_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_CREATE_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
STATIC uint
-xfs_calc_icreate_reservation(xfs_mount_t *mp)
+xfs_calc_icreate_reservation(
+ struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max(xfs_calc_icreate_resv_alloc(mp),
- xfs_calc_create_resv_modify(mp));
+ struct xfs_trans_resv *resp = M_RES(mp);
+ unsigned int overhead = XFS_DQUOT_LOGRES(mp);
+ unsigned int t1, t2, t3 = 0;
+
+ t1 = xfs_calc_icreate_resv_alloc(mp);
+ t2 = xfs_calc_create_resv_modify(mp);
+
+ if (xfs_has_parent(mp)) {
+ t3 = resp->tr_attrsetm.tr_logres;
+ overhead += xfs_calc_pptr_link_overhead();
+ }
+
+ return overhead + max3(t1, t2, t3);
}
STATIC uint
@@ -589,6 +754,23 @@ xfs_calc_create_tmpfile_reservation(
return res + xfs_calc_iunlink_add_reservation(mp);
}
+static inline unsigned int
+xfs_mkdir_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_MKDIR_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
+
/*
* Making a new directory is the same as creating a new file.
*/
@@ -599,6 +781,22 @@ xfs_calc_mkdir_reservation(
return xfs_calc_icreate_reservation(mp);
}
+static inline unsigned int
+xfs_symlink_log_count(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ unsigned int ret = XFS_SYMLINK_LOG_COUNT;
+
+ /*
+ * Pre-reserve enough log reservation to handle the transaction
+ * rolling needed to add one parent pointer.
+ */
+ if (xfs_has_parent(mp))
+ ret += resp->tr_attrsetm.tr_logcount;
+
+ return ret;
+}
/*
* Making a new symplink is the same as creating a new file, but
@@ -911,54 +1109,76 @@ xfs_calc_sb_reservation(
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
-void
-xfs_trans_resv_calc(
+/*
+ * Namespace reservations.
+ *
+ * These get tricky when parent pointers are enabled as we have attribute
+ * modifications occurring from within these transactions. Rather than confuse
+ * each of these reservation calculations with the conditional attribute
+ * reservations, add them here in a clear and concise manner. This requires that
+ * the attribute reservations have already been calculated.
+ *
+ * Note that we only include the static attribute reservation here; the runtime
+ * reservation will have to be modified by the size of the attributes being
+ * added/removed/modified. See the comments on the attribute reservation
+ * calculations for more details.
+ */
+STATIC void
+xfs_calc_namespace_reservations(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
- int logcount_adj = 0;
-
- /*
- * The following transactions are logged in physical format and
- * require a permanent reservation on space.
- */
- resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
- resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
- resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
- resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
- resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
- resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ ASSERT(resp->tr_attrsetm.tr_logres > 0);
resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
- resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
+ resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
- resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
+ resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
- resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
+ resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
- resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
+ resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
- resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
+ resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
+ resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
+ resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+}
+
+void
+xfs_trans_resv_calc(
+ struct xfs_mount *mp,
+ struct xfs_trans_resv *resp)
+{
+ int logcount_adj = 0;
+
+ /*
+ * The following transactions are logged in physical format and
+ * require a permanent reservation on space.
+ */
+ resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
+ resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
+ resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+ resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
+ resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
+ resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
resp->tr_create_tmpfile.tr_logres =
xfs_calc_create_tmpfile_reservation(mp);
resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
- resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
- resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
- resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
-
resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -988,6 +1208,8 @@ xfs_trans_resv_calc(
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ xfs_calc_namespace_reservations(mp, resp);
+
/*
* The following transactions are logged in logical format with
* a default log count.
diff --git a/fs/xfs/libxfs/xfs_trans_space.c b/fs/xfs/libxfs/xfs_trans_space.c
new file mode 100644
index 000000000000..b9dc3752f702
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_trans_space.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+
+/* Calculate the disk space required to add a parent pointer. */
+unsigned int
+xfs_parent_calc_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ /*
+ * Parent pointers are always the first attr in an attr tree, and never
+ * larger than a block
+ */
+ return XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) +
+ XFS_NEXTENTADD_SPACE_RES(mp, namelen, XFS_ATTR_FORK);
+}
+
+unsigned int
+xfs_create_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ unsigned int ret;
+
+ ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen);
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_mkdir_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ return xfs_create_space_res(mp, namelen);
+}
+
+unsigned int
+xfs_link_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ unsigned int ret;
+
+ ret = XFS_DIRENTER_SPACE_RES(mp, namelen);
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_symlink_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen,
+ unsigned int fsblocks)
+{
+ unsigned int ret;
+
+ ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen) +
+ fsblocks;
+
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_remove_space_res(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ unsigned int ret = XFS_DIRREMOVE_SPACE_RES(mp);
+
+ if (xfs_has_parent(mp))
+ ret += xfs_parent_calc_space_res(mp, namelen);
+
+ return ret;
+}
+
+unsigned int
+xfs_rename_space_res(
+ struct xfs_mount *mp,
+ unsigned int src_namelen,
+ bool target_exists,
+ unsigned int target_namelen,
+ bool has_whiteout)
+{
+ unsigned int ret;
+
+ ret = XFS_DIRREMOVE_SPACE_RES(mp) +
+ XFS_DIRENTER_SPACE_RES(mp, target_namelen);
+
+ if (xfs_has_parent(mp)) {
+ if (has_whiteout)
+ ret += xfs_parent_calc_space_res(mp, src_namelen);
+ ret += 2 * xfs_parent_calc_space_res(mp, target_namelen);
+ }
+
+ if (target_exists)
+ ret += xfs_parent_calc_space_res(mp, target_namelen);
+
+ return ret;
+}
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 87b31c69a773..1155ff2d37e2 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -10,6 +10,10 @@
* Components of space reservations.
*/
+/* Worst case number of bmaps that can be held in a block. */
+#define XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp) \
+ (((mp)->m_bmap_dmxr[0]) - ((mp)->m_bmap_dmnr[0]))
+
/* Worst case number of rmaps that can be held in a block. */
#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \
(((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
@@ -76,31 +80,32 @@
/* This macro is not used - see inline code in xfs_attr_set */
#define XFS_ATTRSET_SPACE_RES(mp, v) \
(XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v))
-#define XFS_CREATE_SPACE_RES(mp,nl) \
- (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_DIOSTRAT_SPACE_RES(mp, v) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
#define XFS_GROWFS_SPACE_RES(mp) \
(2 * (mp)->m_alloc_maxlevels)
#define XFS_GROWFSRT_SPACE_RES(mp,b) \
((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
-#define XFS_LINK_SPACE_RES(mp,nl) \
- XFS_DIRENTER_SPACE_RES(mp,nl)
-#define XFS_MKDIR_SPACE_RES(mp,nl) \
- (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_QM_DQALLOC_SPACE_RES(mp) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \
XFS_DQUOT_CLUSTER_SIZE_FSB)
#define XFS_QM_QINOCREATE_SPACE_RES(mp) \
XFS_IALLOC_SPACE_RES(mp)
-#define XFS_REMOVE_SPACE_RES(mp) \
- XFS_DIRREMOVE_SPACE_RES(mp)
-#define XFS_RENAME_SPACE_RES(mp,nl) \
- (XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
-#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
- (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
#define XFS_IFREE_SPACE_RES(mp) \
(xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0)
+unsigned int xfs_parent_calc_space_res(struct xfs_mount *mp,
+ unsigned int namelen);
+
+unsigned int xfs_create_space_res(struct xfs_mount *mp, unsigned int namelen);
+unsigned int xfs_mkdir_space_res(struct xfs_mount *mp, unsigned int namelen);
+unsigned int xfs_link_space_res(struct xfs_mount *mp, unsigned int namelen);
+unsigned int xfs_symlink_space_res(struct xfs_mount *mp, unsigned int namelen,
+ unsigned int fsblocks);
+unsigned int xfs_remove_space_res(struct xfs_mount *mp, unsigned int namelen);
+
+unsigned int xfs_rename_space_res(struct xfs_mount *mp,
+ unsigned int src_namelen, bool target_exists,
+ unsigned int target_namelen, bool has_whiteout);
#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 62e02d5380ad..76eb9e328835 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -80,11 +80,13 @@ typedef void * xfs_failaddr_t;
/*
* Inode fork identifiers.
*/
-#define XFS_DATA_FORK 0
-#define XFS_ATTR_FORK 1
-#define XFS_COW_FORK 2
+#define XFS_STAGING_FORK (-1) /* fake fork for staging a btree */
+#define XFS_DATA_FORK (0)
+#define XFS_ATTR_FORK (1)
+#define XFS_COW_FORK (2)
#define XFS_WHICHFORK_STRINGS \
+ { XFS_STAGING_FORK, "staging" }, \
{ XFS_DATA_FORK, "data" }, \
{ XFS_ATTR_FORK, "attr" }, \
{ XFS_COW_FORK, "cow" }
@@ -114,24 +116,6 @@ typedef enum {
{ XFS_LOOKUP_LEi, "le" }, \
{ XFS_LOOKUP_GEi, "ge" }
-/*
- * This enum is used in string mapping in xfs_trace.h and scrub/trace.h;
- * please keep the TRACE_DEFINE_ENUMs for it up to date.
- */
-typedef enum {
- XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi,
- XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_REFCi, XFS_BTNUM_MAX
-} xfs_btnum_t;
-
-#define XFS_BTNUM_STRINGS \
- { XFS_BTNUM_BNOi, "bnobt" }, \
- { XFS_BTNUM_CNTi, "cntbt" }, \
- { XFS_BTNUM_RMAPi, "rmapbt" }, \
- { XFS_BTNUM_BMAPi, "bmbt" }, \
- { XFS_BTNUM_INOi, "inobt" }, \
- { XFS_BTNUM_FINOi, "finobt" }, \
- { XFS_BTNUM_REFCi, "refcbt" }
-
struct xfs_name {
const unsigned char *name;
int len;