aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/raid56.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r--fs/btrfs/raid56.c74
1 files changed, 57 insertions, 17 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 7ac679ed2b6c..226a17a335da 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -334,6 +334,9 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
{
bio_list_merge(&dest->bio_list, &victim->bio_list);
dest->bio_list_bytes += victim->bio_list_bytes;
+ /* Also inherit the bitmaps from @victim. */
+ bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
+ dest->stripe_npages);
dest->generic_bio_cnt += victim->generic_bio_cnt;
bio_list_init(&victim->bio_list);
}
@@ -878,6 +881,12 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
+ /*
+ * Clear the data bitmap, as the rbio may be cached for later usage.
+ * do this before before unlock_stripe() so there will be no new bio
+ * for this bio.
+ */
+ bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);
/*
* At this moment, rbio->bio_list is empty, however since rbio does not
@@ -1212,6 +1221,9 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
else
BUG();
+ /* We should have at least one data sector. */
+ ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));
+
/* at this point we either have a full stripe,
* or we've read the full stripe from the drive.
* recalculate the parity and write the new results.
@@ -1285,6 +1297,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
+
+ /* This vertical stripe has no data, skip it. */
+ if (!test_bit(pagenr, rbio->dbitmap))
+ continue;
+
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
if (!page)
@@ -1309,6 +1326,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
+
+ /* This vertical stripe has no data, skip it. */
+ if (!test_bit(pagenr, rbio->dbitmap))
+ continue;
+
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
if (!page)
@@ -1748,6 +1770,33 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
run_plug(plug);
}
+/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
+static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
+{
+ const struct btrfs_fs_info *fs_info = rbio->fs_info;
+ const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ const u64 full_stripe_start = rbio->bbio->raid_map[0];
+ const u32 orig_len = orig_bio->bi_iter.bi_size;
+ const u32 sectorsize = fs_info->sectorsize;
+ u64 cur_logical;
+
+ ASSERT(orig_logical >= full_stripe_start &&
+ orig_logical + orig_len <= full_stripe_start +
+ rbio->nr_data * rbio->stripe_len);
+
+ bio_list_add(&rbio->bio_list, orig_bio);
+ rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
+
+ /* Update the dbitmap. */
+ for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
+ cur_logical += sectorsize) {
+ int bit = ((u32)(cur_logical - full_stripe_start) >>
+ PAGE_SHIFT) % rbio->stripe_npages;
+
+ set_bit(bit, rbio->dbitmap);
+ }
+}
+
/*
* our main entry point for writes from the rest of the FS.
*/
@@ -1764,9 +1813,8 @@ int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
btrfs_put_bbio(bbio);
return PTR_ERR(rbio);
}
- bio_list_add(&rbio->bio_list, bio);
- rbio->bio_list_bytes = bio->bi_iter.bi_size;
rbio->operation = BTRFS_RBIO_WRITE;
+ rbio_add_bio(rbio, bio);
btrfs_bio_counter_inc_noblocked(fs_info);
rbio->generic_bio_cnt = 1;
@@ -2068,9 +2116,12 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
atomic_set(&rbio->error, 0);
/*
- * read everything that hasn't failed. Thanks to the
- * stripe cache, it is possible that some or all of these
- * pages are going to be uptodate.
+ * Read everything that hasn't failed. However this time we will
+ * not trust any cached sector.
+ * As we may read out some stale data but higher layer is not reading
+ * that stale part.
+ *
+ * So here we always re-read everything in recovery path.
*/
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
if (rbio->faila == stripe || rbio->failb == stripe) {
@@ -2079,16 +2130,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
}
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
- struct page *p;
-
- /*
- * the rmw code may have already read this
- * page in
- */
- p = rbio_stripe_page(rbio, stripe, pagenr);
- if (PageUptodate(p))
- continue;
-
ret = rbio_add_io_page(rbio, &bio_list,
rbio_stripe_page(rbio, stripe, pagenr),
stripe, pagenr, rbio->stripe_len);
@@ -2170,8 +2211,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
}
rbio->operation = BTRFS_RBIO_READ_REBUILD;
- bio_list_add(&rbio->bio_list, bio);
- rbio->bio_list_bytes = bio->bi_iter.bi_size;
+ rbio_add_bio(rbio, bio);
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {