diff options
Diffstat (limited to 'drivers/md/dm-snap.c')
-rw-r--r-- | drivers/md/dm-snap.c | 286 |
1 files changed, 250 insertions, 36 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 3107f2b1988b..4fb1a40e68a0 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1,6 +1,4 @@ /* - * dm-snapshot.c - * * Copyright (C) 2001-2002 Sistina Software (UK) Limited. * * This file is released under the GPL. @@ -20,7 +18,6 @@ #include <linux/vmalloc.h> #include <linux/log2.h> #include <linux/dm-kcopyd.h> -#include <linux/semaphore.h> #include "dm.h" @@ -109,8 +106,8 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; - /* Maximum number of in-flight COW jobs. */ - struct semaphore cow_count; + unsigned in_progress; + struct wait_queue_head in_progress_wait; struct dm_kcopyd_client *kcopyd_client; @@ -134,7 +131,10 @@ struct dm_snapshot { * - I/O error while merging * => stop merging; set merge_failed; process I/O normally. */ - int merge_failed; + bool merge_failed:1; + + bool discard_zeroes_cow:1; + bool discard_passdown_origin:1; /* * Incoming bios that overlap with chunks being merged must wait @@ -161,8 +161,8 @@ struct dm_snapshot { */ #define DEFAULT_COW_THRESHOLD 2048 -static int cow_threshold = DEFAULT_COW_THRESHOLD; -module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +static unsigned cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, @@ -1173,12 +1173,64 @@ static void stop_merge(struct dm_snapshot *s) clear_bit(SHUTDOWN_MERGE, &s->state_bits); } +static int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s, + struct dm_target *ti) +{ + int r; + unsigned argc; + const char *arg_name; + + static const struct dm_arg _args[] = { + {0, 2, "Invalid number of feature arguments"}, + }; + + /* + * No feature arguments supplied. + */ + if (!as->argc) + return 0; + + r = dm_read_arg_group(_args, as, &argc, &ti->error); + if (r) + return -EINVAL; + + while (argc && !r) { + arg_name = dm_shift_arg(as); + argc--; + + if (!strcasecmp(arg_name, "discard_zeroes_cow")) + s->discard_zeroes_cow = true; + + else if (!strcasecmp(arg_name, "discard_passdown_origin")) + s->discard_passdown_origin = true; + + else { + ti->error = "Unrecognised feature requested"; + r = -EINVAL; + break; + } + } + + if (!s->discard_zeroes_cow && s->discard_passdown_origin) { + /* + * TODO: really these are disjoint.. but ti->num_discard_bios + * and dm_bio_get_target_bio_nr() require rigid constraints. + */ + ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow"; + r = -EINVAL; + } + + return r; +} + /* - * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> + * Construct a snapshot mapping: + * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*] */ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dm_snapshot *s; + struct dm_arg_set as; int i; int r = -EINVAL; char *origin_path, *cow_path; @@ -1186,8 +1238,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; - if (argc != 4) { - ti->error = "requires exactly 4 arguments"; + if (argc < 4) { + ti->error = "requires 4 or more arguments"; r = -EINVAL; goto bad; } @@ -1204,6 +1256,13 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + as.argc = argc; + as.argv = argv; + dm_consume_args(&as, 4); + r = parse_snapshot_features(&as, s, ti); + if (r) + goto bad_features; + origin_path = argv[0]; argv++; argc--; @@ -1267,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + init_waitqueue_head(&s->in_progress_wait); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { @@ -1289,6 +1348,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_bios = num_flush_bios; + if (s->discard_zeroes_cow) + ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1); ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1336,29 +1397,22 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) bad_read_metadata: unregister_snapshot(s); - bad_load_and_register: mempool_exit(&s->pending_pool); - bad_pending_pool: dm_kcopyd_client_destroy(s->kcopyd_client); - bad_kcopyd: dm_exception_table_exit(&s->pending, pending_cache); dm_exception_table_exit(&s->complete, exception_cache); - bad_hash_tables: dm_exception_store_destroy(s->store); - bad_store: dm_put_device(ti, s->cow); - bad_cow: dm_put_device(ti, s->origin); - bad_origin: +bad_features: kfree(s); - bad: return r; } @@ -1454,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti) dm_put_device(ti, s->origin); + WARN_ON(s->in_progress); + kfree(s); } +static void account_start_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + s->in_progress++; + spin_unlock(&s->in_progress_wait.lock); +} + +static void account_end_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + BUG_ON(!s->in_progress); + s->in_progress--; + if (likely(s->in_progress <= cow_threshold) && + unlikely(waitqueue_active(&s->in_progress_wait))) + wake_up_locked(&s->in_progress_wait); + spin_unlock(&s->in_progress_wait.lock); +} + +static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) +{ + if (unlikely(s->in_progress > cow_threshold)) { + spin_lock(&s->in_progress_wait.lock); + if (likely(s->in_progress > cow_threshold)) { + /* + * NOTE: this throttle doesn't account for whether + * the caller is servicing an IO that will trigger a COW + * so excess throttling may result for chunks not required + * to be COW'd. But if cow_threshold was reached, extra + * throttling is unlikely to negatively impact performance. + */ + DECLARE_WAITQUEUE(wait, current); + __add_wait_queue(&s->in_progress_wait, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&s->in_progress_wait.lock); + if (unlock_origins) + up_read(&_origins_lock); + io_schedule(); + remove_wait_queue(&s->in_progress_wait, &wait); + return false; + } + spin_unlock(&s->in_progress_wait.lock); + } + return true; +} + /* * Flush a list of buffers. */ @@ -1472,7 +1573,7 @@ static void flush_bios(struct bio *bio) } } -static int do_origin(struct dm_dev *origin, struct bio *bio); +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); /* * Flush a list of buffers. @@ -1485,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - r = do_origin(s->origin, bio); + r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED) generic_make_request(bio); bio = n; @@ -1677,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) rb_link_node(&pe->out_of_order_node, parent, p); rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); } - up(&s->cow_count); + account_end_copy(s); } /* @@ -1701,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - down(&s->cow_count); + account_start_copy(s); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1721,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; - down(&s->cow_count); + account_start_copy(s); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); @@ -1806,6 +1907,37 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, (bio->bi_iter.bi_sector & s->store->chunk_mask); } +static void zero_callback(int read_err, unsigned long write_err, void *context) +{ + struct bio *bio = context; + struct dm_snapshot *s = bio->bi_private; + + account_end_copy(s); + bio->bi_status = write_err ? BLK_STS_IOERR : 0; + bio_endio(bio); +} + +static void zero_exception(struct dm_snapshot *s, struct dm_exception *e, + struct bio *bio, chunk_t chunk) +{ + struct dm_io_region dest; + + dest.bdev = s->cow->bdev; + dest.sector = bio->bi_iter.bi_sector; + dest.count = s->store->chunk_size; + + account_start_copy(s); + WARN_ON_ONCE(bio->bi_private); + bio->bi_private = s; + dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio); +} + +static bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio) +{ + return bio->bi_iter.bi_size == + (s->store->chunk_size << SECTOR_SHIFT); +} + static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; @@ -1830,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return DM_MAPIO_KILL; + if (bio_data_dir(bio) == WRITE) { + while (unlikely(!wait_for_in_progress(s, false))) + ; /* wait_for_in_progress() has slept */ + } + down_read(&s->lock); dm_exception_table_lock(&lock); @@ -1839,10 +1976,43 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) goto out_unlock; } + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) { + /* + * passdown discard to origin (without triggering + * snapshot exceptions via do_origin; doing so would + * defeat the goal of freeing space in origin that is + * implied by the "discard_passdown_origin" feature) + */ + bio_set_dev(bio, s->origin->bdev); + track_chunk(s, bio, chunk); + goto out_unlock; + } + /* discard to snapshot (target_bio_nr == 0) zeroes exceptions */ + } + /* If the block is already remapped - use that, else remap it */ e = dm_lookup_exception(&s->complete, chunk); if (e) { remap_exception(s, e, bio, chunk); + if (unlikely(bio_op(bio) == REQ_OP_DISCARD) && + io_overlaps_chunk(s, bio)) { + dm_exception_table_unlock(&lock); + up_read(&s->lock); + zero_exception(s, e, bio, chunk); + r = DM_MAPIO_SUBMITTED; /* discard is not issued */ + goto out; + } + goto out_unlock; + } + + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + /* + * If no exception exists, complete discard immediately + * otherwise it'll trigger copy-out. + */ + bio_endio(bio); + r = DM_MAPIO_SUBMITTED; goto out_unlock; } @@ -1890,9 +2060,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) r = DM_MAPIO_SUBMITTED; - if (!pe->started && - bio->bi_iter.bi_size == - (s->store->chunk_size << SECTOR_SHIFT)) { + if (!pe->started && io_overlaps_chunk(s, bio)) { pe->started = 1; dm_exception_table_unlock(&lock); @@ -1955,6 +2123,12 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) { + /* Once merging, discards no longer effect change */ + bio_endio(bio); + return DM_MAPIO_SUBMITTED; + } + chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); down_write(&s->lock); @@ -1989,7 +2163,7 @@ redirect_to_origin: if (bio_data_dir(bio) == WRITE) { up_write(&s->lock); - return do_origin(s->origin, bio); + return do_origin(s->origin, bio, false); } out_unlock: @@ -2138,6 +2312,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, { unsigned sz = 0; struct dm_snapshot *snap = ti->private; + unsigned num_features; switch (type) { case STATUSTYPE_INFO: @@ -2178,8 +2353,16 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, * make sense. */ DMEMIT("%s %s", snap->origin->name, snap->cow->name); - snap->store->type->status(snap->store, type, result + sz, - maxlen - sz); + sz += snap->store->type->status(snap->store, type, result + sz, + maxlen - sz); + num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin; + if (num_features) { + DMEMIT(" %u", num_features); + if (snap->discard_zeroes_cow) + DMEMIT(" discard_zeroes_cow"); + if (snap->discard_passdown_origin) + DMEMIT(" discard_passdown_origin"); + } break; } } @@ -2198,6 +2381,26 @@ static int snapshot_iterate_devices(struct dm_target *ti, return r; } +static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct dm_snapshot *snap = ti->private; + + if (snap->discard_zeroes_cow) { + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; + + down_read(&_origins_lock); + + (void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL); + if (snap_src && snap_dest) + snap = snap_src; + + /* All discards are split on chunk_size boundary */ + limits->discard_granularity = snap->store->chunk_size; + limits->max_discard_sectors = snap->store->chunk_size; + + up_read(&_origins_lock); + } +} /*----------------------------------------------------------------- * Origin methods @@ -2335,15 +2538,24 @@ next_snapshot: /* * Called on a write from the origin driver. */ -static int do_origin(struct dm_dev *origin, struct bio *bio) +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) { struct origin *o; int r = DM_MAPIO_REMAPPED; +again: down_read(&_origins_lock); o = __lookup_origin(origin->bdev); - if (o) + if (o) { + if (limit) { + struct dm_snapshot *s; + list_for_each_entry(s, &o->snapshots, list) + if (unlikely(!wait_for_in_progress(s, true))) + goto again; + } + r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); + } up_read(&_origins_lock); return r; @@ -2456,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, available_sectors); /* Only tell snapshots if this is a write */ - return do_origin(o->dev, bio); + return do_origin(o->dev, bio, true); } /* @@ -2522,7 +2734,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 15, 0}, + .version = {1, 16, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2532,11 +2744,12 @@ static struct target_type snapshot_target = { .resume = snapshot_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2547,6 +2760,7 @@ static struct target_type merge_target = { .resume = snapshot_merge_resume, .status = snapshot_status, .iterate_devices = snapshot_iterate_devices, + .io_hints = snapshot_io_hints, }; static int __init dm_snapshot_init(void) |