diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 134 |
1 files changed, 51 insertions, 83 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 530c0fe14229..a7724ba45b43 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -263,7 +263,6 @@ out_uevent_exit: static void local_exit(void) { - flush_scheduled_work(); destroy_workqueue(deferred_remove_workqueue); unregister_blkdev(_major, _name); @@ -676,19 +675,20 @@ static void start_io_acct(struct dm_io *io) false, 0, &io->stats_aux); } -static void end_io_acct(struct dm_io *io) +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - unsigned long duration = jiffies - io->start_time; - - generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, - io->start_time); + unsigned long duration = jiffies - start_time; if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, &io->stats_aux); + true, duration, stats_aux); + + smp_wmb(); + + generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, + start_time); /* nudge anyone waiting on suspend queue */ if (unlikely(wq_has_sleeper(&md->wait))) @@ -909,6 +909,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error) blk_status_t io_error; struct bio *bio; struct mapped_device *md = io->md; + unsigned long start_time = 0; + struct dm_stats_aux stats_aux; /* Push-back supersedes any I/O errors */ if (unlikely(error)) { @@ -935,8 +937,10 @@ static void dec_pending(struct dm_io *io, blk_status_t error) io_error = io->status; bio = io->orig_bio; - end_io_acct(io); + start_time = io->start_time; + stats_aux = io->stats_aux; free_io(md, io); + end_io_acct(md, bio, start_time, &stats_aux); if (io_error == BLK_STS_DM_REQUEUE) return; @@ -995,7 +999,7 @@ static void clone_endio(struct bio *bio) struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; - if (unlikely(error == BLK_STS_TARGET) && md->type != DM_TYPE_NVME_BIO_BASED) { + if (unlikely(error == BLK_STS_TARGET)) { if (bio_op(bio) == REQ_OP_DISCARD && !bio->bi_disk->queue->limits.max_discard_sectors) disable_discard(md); @@ -1320,7 +1324,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) sector = clone->bi_iter.bi_sector; if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; int latch = get_swap_bios(); if (unlikely(latch != md->swap_bios)) __set_swap_bios_limit(md, latch); @@ -1335,24 +1338,17 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) /* the bio has been remapped so dispatch it */ trace_block_bio_remap(clone->bi_disk->queue, clone, bio_dev(io->orig_bio), sector); - if (md->type == DM_TYPE_NVME_BIO_BASED) - ret = direct_make_request(clone); - else - ret = generic_make_request(clone); + ret = generic_make_request(clone); break; case DM_MAPIO_KILL: - if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; + if (unlikely(swap_bios_limit(ti, clone))) up(&md->swap_bios_semaphore); - } free_tio(tio); dec_pending(io, BLK_STS_IOERR); break; case DM_MAPIO_REQUEUE: - if (unlikely(swap_bios_limit(ti, clone))) { - struct mapped_device *md = io->md; + if (unlikely(swap_bios_limit(ti, clone))) up(&md->swap_bios_semaphore); - } free_tio(tio); dec_pending(io, BLK_STS_DM_REQUEUE); break; @@ -1727,51 +1723,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, return ret; } -/* - * Optimized variant of __split_and_process_bio that leverages the - * fact that targets that use it do _not_ have a need to split bios. - */ -static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map, - struct bio *bio, struct dm_target *ti) -{ - struct clone_info ci; - blk_qc_t ret = BLK_QC_T_NONE; - int error = 0; - - init_clone_info(&ci, md, map, bio); - - if (bio->bi_opf & REQ_PREFLUSH) { - struct bio flush_bio; - - /* - * Use an on-stack bio for this, it's safe since we don't - * need to reference it after submit. It's just used as - * the basis for the clone(s). - */ - bio_init(&flush_bio, NULL, 0); - flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; - ci.bio = &flush_bio; - ci.sector_count = 0; - error = __send_empty_flush(&ci); - bio_uninit(ci.bio); - /* dec_pending submits any data associated with flush */ - } else { - struct dm_target_io *tio; - - ci.bio = bio; - ci.sector_count = bio_sectors(bio); - if (__process_abnormal_io(&ci, ti, &error)) - goto out; - - tio = alloc_tio(&ci, ti, 0, GFP_NOIO); - ret = __clone_and_map_simple_bio(&ci, tio, NULL); - } -out: - /* drop the extra reference count */ - dec_pending(ci.io, errno_to_blk_status(error)); - return ret; -} - static blk_qc_t dm_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { @@ -1802,8 +1753,6 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, /* regular IO is split by __split_and_process_bio */ } - if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) - return __process_bio(md, map, bio, ti); return __split_and_process_bio(md, map, bio); } @@ -2045,7 +1994,9 @@ static struct mapped_device *alloc_dev(int minor) if (!md->bdev) goto bad; - dm_stats_init(&md->stats); + r = dm_stats_init(&md->stats); + if (r < 0) + goto bad; /* Populate the mapping, nobody knows we exist yet */ spin_lock(&_minor_lock); @@ -2195,12 +2146,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (request_based) dm_stop_queue(q); - if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) { + if (request_based) { /* - * Leverage the fact that request-based DM targets and - * NVMe bio based targets are immutable singletons - * - used to optimize both dm_request_fn and dm_mq_queue_rq; - * and __process_bio. + * Leverage the fact that request-based DM targets are + * immutable singletons - used to optimize dm_mq_queue_rq. */ md->immutable_target = dm_table_get_immutable_target(t); } @@ -2329,7 +2278,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: - case DM_TYPE_NVME_BIO_BASED: dm_init_congested_fn(md); break; case DM_TYPE_NONE: @@ -2491,6 +2439,8 @@ static int dm_wait_for_completion(struct mapped_device *md, long task_state) } finish_wait(&md->wait, &wait); + smp_rmb(); + return r; } @@ -2859,6 +2809,9 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla static void __dm_internal_resume(struct mapped_device *md) { + int r; + struct dm_table *map; + BUG_ON(!md->internal_suspend_count); if (--md->internal_suspend_count) @@ -2867,12 +2820,23 @@ static void __dm_internal_resume(struct mapped_device *md) if (dm_suspended_md(md)) goto done; /* resume from nested suspend */ - /* - * NOTE: existing callers don't need to call dm_table_resume_targets - * (which may fail -- so best to avoid it for now by passing NULL map) - */ - (void) __dm_resume(md, NULL); - + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + r = __dm_resume(md, map); + if (r) { + /* + * If a preresume method of some target failed, we are in a + * tricky situation. We can't return an error to the caller. We + * can't fake success because then the "resume" and + * "postsuspend" methods would not be paired correctly, and it + * would break various targets, for example it would cause list + * corruption in the "origin" target. + * + * So, we fake normal suspend here, to make sure that the + * "resume" and "postsuspend" methods will be paired correctly. + */ + DMERR("Preresume method failed: %d", r); + set_bit(DMF_SUSPENDED, &md->flags); + } done: clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); smp_mb__after_atomic(); @@ -3063,7 +3027,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu switch (type) { case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: - case DM_TYPE_NVME_BIO_BASED: pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); io_front_pad = roundup(front_pad, __alignof__(struct dm_io)) + offsetof(struct dm_io, tio); @@ -3132,6 +3095,11 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, goto out; ti = dm_table_get_target(table, 0); + if (dm_suspended_md(md)) { + ret = -EAGAIN; + goto out; + } + ret = -EINVAL; if (!ti->type->iterate_devices) goto out; |