diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_buf.c | 45 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_ail.c | 28 |
2 files changed, 66 insertions, 7 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index e839907e8492..e36124546d0d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -37,6 +37,32 @@ static kmem_zone_t *xfs_buf_zone; #define xb_to_gfp(flags) \ ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) +/* + * Locking orders + * + * xfs_buf_ioacct_inc: + * xfs_buf_ioacct_dec: + * b_sema (caller holds) + * b_lock + * + * xfs_buf_stale: + * b_sema (caller holds) + * b_lock + * lru_lock + * + * xfs_buf_rele: + * b_lock + * pag_buf_lock + * lru_lock + * + * xfs_buftarg_wait_rele + * lru_lock + * b_lock (trylock due to inversion) + * + * xfs_buftarg_isolate + * lru_lock + * b_lock (trylock due to inversion) + */ static inline int xfs_buf_is_vmapped( @@ -1006,8 +1032,18 @@ xfs_buf_rele( ASSERT(atomic_read(&bp->b_hold) > 0); - release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); + /* + * We grab the b_lock here first to serialise racing xfs_buf_rele() + * calls. The pag_buf_lock being taken on the last reference only + * serialises against racing lookups in xfs_buf_find(). IOWs, the second + * to last reference we drop here is not serialised against the last + * reference until we take bp->b_lock. Hence if we don't grab b_lock + * first, the last "release" reference can win the race to the lock and + * free the buffer before the second-to-last reference is processed, + * leading to a use-after-free scenario. + */ spin_lock(&bp->b_lock); + release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); if (!release) { /* * Drop the in-flight state if the buffer is already on the LRU @@ -1989,6 +2025,13 @@ xfs_buf_delwri_submit_buffers( * is only safely useable for callers that can track I/O completion by higher * level means, e.g. AIL pushing as the @buffer_list is consumed in this * function. + * + * Note: this function will skip buffers it would block on, and in doing so + * leaves them on @buffer_list so they can be retried on a later pass. As such, + * it is up to the caller to ensure that the buffer list is fully submitted or + * cancelled appropriately when they are finished with the list. Failure to + * cancel or resubmit the list until it is empty will result in leaked buffers + * at unmount time. */ int xfs_buf_delwri_submit_nowait( diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 55326f971cb3..d3a4e89bf4a0 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -531,17 +531,33 @@ xfsaild( set_current_state(TASK_INTERRUPTIBLE); /* - * Check kthread_should_stop() after we set the task state - * to guarantee that we either see the stop bit and exit or - * the task state is reset to runnable such that it's not - * scheduled out indefinitely and detects the stop bit at - * next iteration. - * + * Check kthread_should_stop() after we set the task state to + * guarantee that we either see the stop bit and exit or the + * task state is reset to runnable such that it's not scheduled + * out indefinitely and detects the stop bit at next iteration. * A memory barrier is included in above task state set to * serialize again kthread_stop(). */ if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); + + /* + * The caller forces out the AIL before stopping the + * thread in the common case, which means the delwri + * queue is drained. In the shutdown case, the queue may + * still hold relogged buffers that haven't been + * submitted because they were pinned since added to the + * queue. + * + * Log I/O error processing stales the underlying buffer + * and clears the delwri state, expecting the buf to be + * removed on the next submission attempt. That won't + * happen if we're shutting down, so this is the last + * opportunity to release such buffers from the queue. + */ + ASSERT(list_empty(&ailp->ail_buf_list) || + XFS_FORCED_SHUTDOWN(ailp->ail_mount)); + xfs_buf_delwri_cancel(&ailp->ail_buf_list); break; } |