aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c45
1 files changed, 44 insertions, 1 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e839907e8492..e36124546d0d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -37,6 +37,32 @@ static kmem_zone_t *xfs_buf_zone;
#define xb_to_gfp(flags) \
((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
+/*
+ * Locking orders
+ *
+ * xfs_buf_ioacct_inc:
+ * xfs_buf_ioacct_dec:
+ * b_sema (caller holds)
+ * b_lock
+ *
+ * xfs_buf_stale:
+ * b_sema (caller holds)
+ * b_lock
+ * lru_lock
+ *
+ * xfs_buf_rele:
+ * b_lock
+ * pag_buf_lock
+ * lru_lock
+ *
+ * xfs_buftarg_wait_rele
+ * lru_lock
+ * b_lock (trylock due to inversion)
+ *
+ * xfs_buftarg_isolate
+ * lru_lock
+ * b_lock (trylock due to inversion)
+ */
static inline int
xfs_buf_is_vmapped(
@@ -1006,8 +1032,18 @@ xfs_buf_rele(
ASSERT(atomic_read(&bp->b_hold) > 0);
- release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
+ /*
+ * We grab the b_lock here first to serialise racing xfs_buf_rele()
+ * calls. The pag_buf_lock being taken on the last reference only
+ * serialises against racing lookups in xfs_buf_find(). IOWs, the second
+ * to last reference we drop here is not serialised against the last
+ * reference until we take bp->b_lock. Hence if we don't grab b_lock
+ * first, the last "release" reference can win the race to the lock and
+ * free the buffer before the second-to-last reference is processed,
+ * leading to a use-after-free scenario.
+ */
spin_lock(&bp->b_lock);
+ release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
if (!release) {
/*
* Drop the in-flight state if the buffer is already on the LRU
@@ -1989,6 +2025,13 @@ xfs_buf_delwri_submit_buffers(
* is only safely useable for callers that can track I/O completion by higher
* level means, e.g. AIL pushing as the @buffer_list is consumed in this
* function.
+ *
+ * Note: this function will skip buffers it would block on, and in doing so
+ * leaves them on @buffer_list so they can be retried on a later pass. As such,
+ * it is up to the caller to ensure that the buffer list is fully submitted or
+ * cancelled appropriately when they are finished with the list. Failure to
+ * cancel or resubmit the list until it is empty will result in leaked buffers
+ * at unmount time.
*/
int
xfs_buf_delwri_submit_nowait(