aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c134
1 files changed, 51 insertions, 83 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 530c0fe14229..a7724ba45b43 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -263,7 +263,6 @@ out_uevent_exit:
static void local_exit(void)
{
- flush_scheduled_work();
destroy_workqueue(deferred_remove_workqueue);
unregister_blkdev(_major, _name);
@@ -676,19 +675,20 @@ static void start_io_acct(struct dm_io *io)
false, 0, &io->stats_aux);
}
-static void end_io_acct(struct dm_io *io)
+static void end_io_acct(struct mapped_device *md, struct bio *bio,
+ unsigned long start_time, struct dm_stats_aux *stats_aux)
{
- struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
- unsigned long duration = jiffies - io->start_time;
-
- generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
- io->start_time);
+ unsigned long duration = jiffies - start_time;
if (unlikely(dm_stats_used(&md->stats)))
dm_stats_account_io(&md->stats, bio_data_dir(bio),
bio->bi_iter.bi_sector, bio_sectors(bio),
- true, duration, &io->stats_aux);
+ true, duration, stats_aux);
+
+ smp_wmb();
+
+ generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
+ start_time);
/* nudge anyone waiting on suspend queue */
if (unlikely(wq_has_sleeper(&md->wait)))
@@ -909,6 +909,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
blk_status_t io_error;
struct bio *bio;
struct mapped_device *md = io->md;
+ unsigned long start_time = 0;
+ struct dm_stats_aux stats_aux;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
@@ -935,8 +937,10 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
io_error = io->status;
bio = io->orig_bio;
- end_io_acct(io);
+ start_time = io->start_time;
+ stats_aux = io->stats_aux;
free_io(md, io);
+ end_io_acct(md, bio, start_time, &stats_aux);
if (io_error == BLK_STS_DM_REQUEUE)
return;
@@ -995,7 +999,7 @@ static void clone_endio(struct bio *bio)
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
- if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) {
+ if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_DISCARD &&
!bio->bi_disk->queue->limits.max_discard_sectors)
disable_discard(md);
@@ -1320,7 +1324,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
sector = clone->bi_iter.bi_sector;
if (unlikely(swap_bios_limit(ti, clone))) {
- struct mapped_device *md = io->md;
int latch = get_swap_bios();
if (unlikely(latch != md->swap_bios))
__set_swap_bios_limit(md, latch);
@@ -1335,24 +1338,17 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
/* the bio has been remapped so dispatch it */
trace_block_bio_remap(clone->bi_disk->queue, clone,
bio_dev(io->orig_bio), sector);
- if (md->type == DM_TYPE_NVME_BIO_BASED)
- ret = direct_make_request(clone);
- else
- ret = generic_make_request(clone);
+ ret = generic_make_request(clone);
break;
case DM_MAPIO_KILL:
- if (unlikely(swap_bios_limit(ti, clone))) {
- struct mapped_device *md = io->md;
+ if (unlikely(swap_bios_limit(ti, clone)))
up(&md->swap_bios_semaphore);
- }
free_tio(tio);
dec_pending(io, BLK_STS_IOERR);
break;
case DM_MAPIO_REQUEUE:
- if (unlikely(swap_bios_limit(ti, clone))) {
- struct mapped_device *md = io->md;
+ if (unlikely(swap_bios_limit(ti, clone)))
up(&md->swap_bios_semaphore);
- }
free_tio(tio);
dec_pending(io, BLK_STS_DM_REQUEUE);
break;
@@ -1727,51 +1723,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
return ret;
}
-/*
- * Optimized variant of __split_and_process_bio that leverages the
- * fact that targets that use it do _not_ have a need to split bios.
- */
-static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map,
- struct bio *bio, struct dm_target *ti)
-{
- struct clone_info ci;
- blk_qc_t ret = BLK_QC_T_NONE;
- int error = 0;
-
- init_clone_info(&ci, md, map, bio);
-
- if (bio->bi_opf & REQ_PREFLUSH) {
- struct bio flush_bio;
-
- /*
- * Use an on-stack bio for this, it's safe since we don't
- * need to reference it after submit. It's just used as
- * the basis for the clone(s).
- */
- bio_init(&flush_bio, NULL, 0);
- flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
- ci.bio = &flush_bio;
- ci.sector_count = 0;
- error = __send_empty_flush(&ci);
- bio_uninit(ci.bio);
- /* dec_pending submits any data associated with flush */
- } else {
- struct dm_target_io *tio;
-
- ci.bio = bio;
- ci.sector_count = bio_sectors(bio);
- if (__process_abnormal_io(&ci, ti, &error))
- goto out;
-
- tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
- ret = __clone_and_map_simple_bio(&ci, tio, NULL);
- }
-out:
- /* drop the extra reference count */
- dec_pending(ci.io, errno_to_blk_status(error));
- return ret;
-}
-
static blk_qc_t dm_process_bio(struct mapped_device *md,
struct dm_table *map, struct bio *bio)
{
@@ -1802,8 +1753,6 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
/* regular IO is split by __split_and_process_bio */
}
- if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
- return __process_bio(md, map, bio, ti);
return __split_and_process_bio(md, map, bio);
}
@@ -2045,7 +1994,9 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->bdev)
goto bad;
- dm_stats_init(&md->stats);
+ r = dm_stats_init(&md->stats);
+ if (r < 0)
+ goto bad;
/* Populate the mapping, nobody knows we exist yet */
spin_lock(&_minor_lock);
@@ -2195,12 +2146,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
if (request_based)
dm_stop_queue(q);
- if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) {
+ if (request_based) {
/*
- * Leverage the fact that request-based DM targets and
- * NVMe bio based targets are immutable singletons
- * - used to optimize both dm_request_fn and dm_mq_queue_rq;
- * and __process_bio.
+ * Leverage the fact that request-based DM targets are
+ * immutable singletons - used to optimize dm_mq_queue_rq.
*/
md->immutable_target = dm_table_get_immutable_target(t);
}
@@ -2329,7 +2278,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
break;
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
- case DM_TYPE_NVME_BIO_BASED:
dm_init_congested_fn(md);
break;
case DM_TYPE_NONE:
@@ -2491,6 +2439,8 @@ static int dm_wait_for_completion(struct mapped_device *md, long task_state)
}
finish_wait(&md->wait, &wait);
+ smp_rmb();
+
return r;
}
@@ -2859,6 +2809,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
static void __dm_internal_resume(struct mapped_device *md)
{
+ int r;
+ struct dm_table *map;
+
BUG_ON(!md->internal_suspend_count);
if (--md->internal_suspend_count)
@@ -2867,12 +2820,23 @@ static void __dm_internal_resume(struct mapped_device *md)
if (dm_suspended_md(md))
goto done; /* resume from nested suspend */
- /*
- * NOTE: existing callers don't need to call dm_table_resume_targets
- * (which may fail -- so best to avoid it for now by passing NULL map)
- */
- (void) __dm_resume(md, NULL);
-
+ map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
+ r = __dm_resume(md, map);
+ if (r) {
+ /*
+ * If a preresume method of some target failed, we are in a
+ * tricky situation. We can't return an error to the caller. We
+ * can't fake success because then the "resume" and
+ * "postsuspend" methods would not be paired correctly, and it
+ * would break various targets, for example it would cause list
+ * corruption in the "origin" target.
+ *
+ * So, we fake normal suspend here, to make sure that the
+ * "resume" and "postsuspend" methods will be paired correctly.
+ */
+ DMERR("Preresume method failed: %d", r);
+ set_bit(DMF_SUSPENDED, &md->flags);
+ }
done:
clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
smp_mb__after_atomic();
@@ -3063,7 +3027,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
switch (type) {
case DM_TYPE_BIO_BASED:
case DM_TYPE_DAX_BIO_BASED:
- case DM_TYPE_NVME_BIO_BASED:
pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio);
@@ -3132,6 +3095,11 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
goto out;
ti = dm_table_get_target(table, 0);
+ if (dm_suspended_md(md)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
ret = -EINVAL;
if (!ti->type->iterate_devices)
goto out;