aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-cache-metadata.c9
-rw-r--r--drivers/md/dm-crypt.c9
-rw-r--r--drivers/md/dm-delay.c3
-rw-r--r--drivers/md/dm-integrity.c4
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm-zoned-metadata.c5
-rw-r--r--drivers/md/md.c180
-rw-r--r--drivers/md/md.h25
-rw-r--r--drivers/md/raid5.c29
9 files changed, 133 insertions, 133 deletions
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 6fc93834da44..151aa95775be 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1167,11 +1167,18 @@ static int __load_discards(struct dm_cache_metadata *cmd,
if (r)
return r;
- for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
+ for (b = 0; ; b++) {
r = fn(context, cmd->discard_block_size, to_dblock(b),
dm_bitset_cursor_get_value(&c));
if (r)
break;
+
+ if (b >= (from_dblock(cmd->discard_nr_blocks) - 1))
+ break;
+
+ r = dm_bitset_cursor_next(&c);
+ if (r)
+ break;
}
dm_bitset_cursor_end(&c);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index dd538e6b2748..df39b07de800 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -949,6 +949,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
{
#ifdef CONFIG_BLK_DEV_INTEGRITY
struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk);
+ struct mapped_device *md = dm_table_get_md(ti->table);
/* From now we require underlying device with our integrity profile */
if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) {
@@ -968,7 +969,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
if (crypt_integrity_aead(cc)) {
cc->integrity_tag_size = cc->on_disk_tag_size - cc->integrity_iv_size;
- DMINFO("Integrity AEAD, tag size %u, IV size %u.",
+ DMDEBUG("%s: Integrity AEAD, tag size %u, IV size %u.", dm_device_name(md),
cc->integrity_tag_size, cc->integrity_iv_size);
if (crypto_aead_setauthsize(any_tfm_aead(cc), cc->integrity_tag_size)) {
@@ -976,7 +977,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
return -EINVAL;
}
} else if (cc->integrity_iv_size)
- DMINFO("Additional per-sector space %u bytes for IV.",
+ DMDEBUG("%s: Additional per-sector space %u bytes for IV.", dm_device_name(md),
cc->integrity_iv_size);
if ((cc->integrity_tag_size + cc->integrity_iv_size) != bi->tag_size) {
@@ -1890,7 +1891,7 @@ static int crypt_alloc_tfms_skcipher(struct crypt_config *cc, char *ciphermode)
* algorithm implementation is used. Help people debug performance
* problems by logging the ->cra_driver_name.
*/
- DMINFO("%s using implementation \"%s\"", ciphermode,
+ DMDEBUG_LIMIT("%s using implementation \"%s\"", ciphermode,
crypto_skcipher_alg(any_tfm(cc))->base.cra_driver_name);
return 0;
}
@@ -1910,7 +1911,7 @@ static int crypt_alloc_tfms_aead(struct crypt_config *cc, char *ciphermode)
return err;
}
- DMINFO("%s using implementation \"%s\"", ciphermode,
+ DMDEBUG_LIMIT("%s using implementation \"%s\"", ciphermode,
crypto_aead_alg(any_tfm_aead(cc))->base.cra_driver_name);
return 0;
}
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index fddffe251bf6..f496213f8b67 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -121,7 +121,8 @@ static void delay_dtr(struct dm_target *ti)
{
struct delay_c *dc = ti->private;
- destroy_workqueue(dc->kdelayd_wq);
+ if (dc->kdelayd_wq)
+ destroy_workqueue(dc->kdelayd_wq);
if (dc->read.dev)
dm_put_device(ti, dc->read.dev);
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index f535fd8ac82d..a4fe187d50d0 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2568,7 +2568,7 @@ static int calculate_device_limits(struct dm_integrity_c *ic)
if (last_sector < ic->start || last_sector >= ic->meta_device_sectors)
return -EINVAL;
} else {
- __u64 meta_size = ic->provided_data_sectors * ic->tag_size;
+ __u64 meta_size = (ic->provided_data_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
meta_size = (meta_size + ((1U << (ic->log2_buffer_sectors + SECTOR_SHIFT)) - 1))
>> (ic->log2_buffer_sectors + SECTOR_SHIFT);
meta_size <<= ic->log2_buffer_sectors;
@@ -3439,7 +3439,7 @@ try_smaller_buffer:
DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
DEBUG_print(" journal_entries %u\n", ic->journal_entries);
DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
- DEBUG_print(" device_sectors 0x%llx\n", (unsigned long long)ic->device_sectors);
+ DEBUG_print(" data_device_sectors 0x%llx\n", (unsigned long long)ic->data_device_sectors);
DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 2ee5e357a0a7..cc5173dfd466 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -882,6 +882,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
if (attached_handler_name || m->hw_handler_name) {
INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error);
+ kfree(attached_handler_name);
if (r) {
dm_put_device(ti, p->path.dev);
goto bad;
@@ -896,7 +897,6 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
return p;
bad:
- kfree(attached_handler_name);
free_pgpath(p);
return ERR_PTR(r);
}
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index fa68336560c3..d8334cd45d7c 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1169,6 +1169,9 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
goto out;
}
+ if (!nr_blkz)
+ break;
+
/* Process report */
for (i = 0; i < nr_blkz; i++) {
ret = dmz_init_zone(zmd, zone, &blkz[i]);
@@ -1204,6 +1207,8 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
/* Get zone information from disk */
ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone),
&blkz, &nr_blkz, GFP_NOIO);
+ if (!nr_blkz)
+ ret = -EIO;
if (ret) {
dmz_dev_err(zmd->dev, "Get zone %u report failed",
dmz_id(zmd, zone));
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 05ffffb8b769..295ff09cff4c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -132,24 +132,6 @@ static inline int speed_max(struct mddev *mddev)
mddev->sync_speed_max : sysctl_speed_limit_max;
}
-static void * flush_info_alloc(gfp_t gfp_flags, void *data)
-{
- return kzalloc(sizeof(struct flush_info), gfp_flags);
-}
-static void flush_info_free(void *flush_info, void *data)
-{
- kfree(flush_info);
-}
-
-static void * flush_bio_alloc(gfp_t gfp_flags, void *data)
-{
- return kzalloc(sizeof(struct flush_bio), gfp_flags);
-}
-static void flush_bio_free(void *flush_bio, void *data)
-{
- kfree(flush_bio);
-}
-
static struct ctl_table_header *raid_table_header;
static struct ctl_table raid_table[] = {
@@ -423,54 +405,31 @@ static int md_congested(void *data, int bits)
/*
* Generic flush handling for md
*/
-static void submit_flushes(struct work_struct *ws)
-{
- struct flush_info *fi = container_of(ws, struct flush_info, flush_work);
- struct mddev *mddev = fi->mddev;
- struct bio *bio = fi->bio;
-
- bio->bi_opf &= ~REQ_PREFLUSH;
- md_handle_request(mddev, bio);
-
- mempool_free(fi, mddev->flush_pool);
-}
-static void md_end_flush(struct bio *fbio)
+static void md_end_flush(struct bio *bio)
{
- struct flush_bio *fb = fbio->bi_private;
- struct md_rdev *rdev = fb->rdev;
- struct flush_info *fi = fb->fi;
- struct bio *bio = fi->bio;
- struct mddev *mddev = fi->mddev;
+ struct md_rdev *rdev = bio->bi_private;
+ struct mddev *mddev = rdev->mddev;
rdev_dec_pending(rdev, mddev);
- if (atomic_dec_and_test(&fi->flush_pending)) {
- if (bio->bi_iter.bi_size == 0) {
- /* an empty barrier - all done */
- bio_endio(bio);
- mempool_free(fi, mddev->flush_pool);
- } else {
- INIT_WORK(&fi->flush_work, submit_flushes);
- queue_work(md_wq, &fi->flush_work);
- }
+ if (atomic_dec_and_test(&mddev->flush_pending)) {
+ /* The pre-request flush has finished */
+ queue_work(md_wq, &mddev->flush_work);
}
-
- mempool_free(fb, mddev->flush_bio_pool);
- bio_put(fbio);
+ bio_put(bio);
}
-void md_flush_request(struct mddev *mddev, struct bio *bio)
+static void md_submit_flush_data(struct work_struct *ws);
+
+static void submit_flushes(struct work_struct *ws)
{
+ struct mddev *mddev = container_of(ws, struct mddev, flush_work);
struct md_rdev *rdev;
- struct flush_info *fi;
-
- fi = mempool_alloc(mddev->flush_pool, GFP_NOIO);
-
- fi->bio = bio;
- fi->mddev = mddev;
- atomic_set(&fi->flush_pending, 1);
+ mddev->start_flush = ktime_get_boottime();
+ INIT_WORK(&mddev->flush_work, md_submit_flush_data);
+ atomic_set(&mddev->flush_pending, 1);
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
@@ -480,37 +439,74 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
* we reclaim rcu_read_lock
*/
struct bio *bi;
- struct flush_bio *fb;
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
-
- fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO);
- fb->fi = fi;
- fb->rdev = rdev;
-
bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
- bio_set_dev(bi, rdev->bdev);
bi->bi_end_io = md_end_flush;
- bi->bi_private = fb;
+ bi->bi_private = rdev;
+ bio_set_dev(bi, rdev->bdev);
bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
- atomic_inc(&fi->flush_pending);
+ atomic_inc(&mddev->flush_pending);
submit_bio(bi);
-
rcu_read_lock();
rdev_dec_pending(rdev, mddev);
}
rcu_read_unlock();
+ if (atomic_dec_and_test(&mddev->flush_pending))
+ queue_work(md_wq, &mddev->flush_work);
+}
+
+static void md_submit_flush_data(struct work_struct *ws)
+{
+ struct mddev *mddev = container_of(ws, struct mddev, flush_work);
+ struct bio *bio = mddev->flush_bio;
+
+ /*
+ * must reset flush_bio before calling into md_handle_request to avoid a
+ * deadlock, because other bios passed md_handle_request suspend check
+ * could wait for this and below md_handle_request could wait for those
+ * bios because of suspend check
+ */
+ mddev->last_flush = mddev->start_flush;
+ mddev->flush_bio = NULL;
+ wake_up(&mddev->sb_wait);
+
+ if (bio->bi_iter.bi_size == 0) {
+ /* an empty barrier - all done */
+ bio_endio(bio);
+ } else {
+ bio->bi_opf &= ~REQ_PREFLUSH;
+ md_handle_request(mddev, bio);
+ }
+}
- if (atomic_dec_and_test(&fi->flush_pending)) {
- if (bio->bi_iter.bi_size == 0) {
+void md_flush_request(struct mddev *mddev, struct bio *bio)
+{
+ ktime_t start = ktime_get_boottime();
+ spin_lock_irq(&mddev->lock);
+ wait_event_lock_irq(mddev->sb_wait,
+ !mddev->flush_bio ||
+ ktime_after(mddev->last_flush, start),
+ mddev->lock);
+ if (!ktime_after(mddev->last_flush, start)) {
+ WARN_ON(mddev->flush_bio);
+ mddev->flush_bio = bio;
+ bio = NULL;
+ }
+ spin_unlock_irq(&mddev->lock);
+
+ if (!bio) {
+ INIT_WORK(&mddev->flush_work, submit_flushes);
+ queue_work(md_wq, &mddev->flush_work);
+ } else {
+ /* flush was performed for some other bio while we waited. */
+ if (bio->bi_iter.bi_size == 0)
/* an empty barrier - all done */
bio_endio(bio);
- mempool_free(fi, mddev->flush_pool);
- } else {
- INIT_WORK(&fi->flush_work, submit_flushes);
- queue_work(md_wq, &fi->flush_work);
+ else {
+ bio->bi_opf &= ~REQ_PREFLUSH;
+ mddev->pers->make_request(mddev, bio);
}
}
}
@@ -560,6 +556,7 @@ void mddev_init(struct mddev *mddev)
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->active_io, 0);
spin_lock_init(&mddev->lock);
+ atomic_set(&mddev->flush_pending, 0);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
@@ -2855,8 +2852,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
err = 0;
}
} else if (cmd_match(buf, "re-add")) {
- if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
- rdev->saved_raid_disk >= 0) {
+ if (!rdev->mddev->pers)
+ err = -EINVAL;
+ else if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) &&
+ rdev->saved_raid_disk >= 0) {
/* clear_bit is performed _after_ all the devices
* have their local Faulty bit cleared. If any writes
* happen in the meantime in the local node, they
@@ -5511,22 +5510,6 @@ int md_run(struct mddev *mddev)
if (err)
return err;
}
- if (mddev->flush_pool == NULL) {
- mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc,
- flush_info_free, mddev);
- if (!mddev->flush_pool) {
- err = -ENOMEM;
- goto abort;
- }
- }
- if (mddev->flush_bio_pool == NULL) {
- mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc,
- flush_bio_free, mddev);
- if (!mddev->flush_bio_pool) {
- err = -ENOMEM;
- goto abort;
- }
- }
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
@@ -5686,11 +5669,8 @@ int md_run(struct mddev *mddev)
return 0;
abort:
- mempool_destroy(mddev->flush_bio_pool);
- mddev->flush_bio_pool = NULL;
- mempool_destroy(mddev->flush_pool);
- mddev->flush_pool = NULL;
-
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
return err;
}
EXPORT_SYMBOL_GPL(md_run);
@@ -5894,14 +5874,6 @@ static void __md_stop(struct mddev *mddev)
mddev->to_remove = &md_redundancy_group;
module_put(pers->owner);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- if (mddev->flush_bio_pool) {
- mempool_destroy(mddev->flush_bio_pool);
- mddev->flush_bio_pool = NULL;
- }
- if (mddev->flush_pool) {
- mempool_destroy(mddev->flush_pool);
- mddev->flush_pool = NULL;
- }
}
void md_stop(struct mddev *mddev)
@@ -9257,7 +9229,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
* reshape is happening in the remote node, we need to
* update reshape_position and call start_reshape.
*/
- mddev->reshape_position = sb->reshape_position;
+ mddev->reshape_position = le64_to_cpu(sb->reshape_position);
if (mddev->pers->update_reshape_pos)
mddev->pers->update_reshape_pos(mddev);
if (mddev->pers->start_reshape)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index c52afb52c776..257cb4c9e22b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -252,19 +252,6 @@ enum mddev_sb_flags {
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
};
-#define NR_FLUSH_INFOS 8
-#define NR_FLUSH_BIOS 64
-struct flush_info {
- struct bio *bio;
- struct mddev *mddev;
- struct work_struct flush_work;
- atomic_t flush_pending;
-};
-struct flush_bio {
- struct flush_info *fi;
- struct md_rdev *rdev;
-};
-
struct mddev {
void *private;
struct md_personality *pers;
@@ -470,8 +457,16 @@ struct mddev {
* metadata and bitmap writes
*/
- mempool_t *flush_pool;
- mempool_t *flush_bio_pool;
+ /* Generic flush handling.
+ * The last to finish preflush schedules a worker to submit
+ * the rest of the request (without the REQ_PREFLUSH flag).
+ */
+ struct bio *flush_bio;
+ atomic_t flush_pending;
+ ktime_t start_flush, last_flush; /* last_flush is when the last completed
+ * flush was started.
+ */
+ struct work_struct flush_work;
struct work_struct event_work; /* used by dm to report failure event */
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ae13c06b200..f9c90ab220b9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4197,7 +4197,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
/* now write out any block on a failed drive,
* or P or Q if they were recomputed
*/
- BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
+ dev = NULL;
if (s->failed == 2) {
dev = &sh->dev[s->failed_num[1]];
s->locked++;
@@ -4222,6 +4222,14 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantwrite, &dev->flags);
}
+ if (WARN_ONCE(dev && !test_bit(R5_UPTODATE, &dev->flags),
+ "%s: disk%td not up to date\n",
+ mdname(conf->mddev),
+ dev - (struct r5dev *) &sh->dev)) {
+ clear_bit(R5_LOCKED, &dev->flags);
+ clear_bit(R5_Wantwrite, &dev->flags);
+ s->locked--;
+ }
clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state);
@@ -4233,15 +4241,26 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
case check_state_check_result:
sh->check_state = check_state_idle;
- if (s->failed > 1)
- break;
/* handle a successful check operation, if parity is correct
* we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK
*/
if (sh->ops.zero_sum_result == 0) {
- /* Any parity checked was correct */
- set_bit(STRIPE_INSYNC, &sh->state);
+ /* both parities are correct */
+ if (!s->failed)
+ set_bit(STRIPE_INSYNC, &sh->state);
+ else {
+ /* in contrast to the raid5 case we can validate
+ * parity, but still have a failure to write
+ * back
+ */
+ sh->check_state = check_state_compute_result;
+ /* Returning at this point means that we may go
+ * off and bring p and/or q uptodate again so
+ * we make sure to check zero_sum_result again
+ * to verify if p or q need writeback
+ */
+ }
} else {
atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches);
if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) {