diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 119 |
1 files changed, 58 insertions, 61 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2a196bb134d9..e985e820724e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -629,7 +629,21 @@ cont: btrfs_free_reserved_data_space_noquota(inode, start, end - start + 1); - goto free_pages_out; + + /* + * Ensure we only free the compressed pages if we have + * them allocated, as we can still reach here with + * inode_need_compress() == false. + */ + if (pages) { + for (i = 0; i < nr_pages; i++) { + WARN_ON(pages[i]->mapping); + put_page(pages[i]); + } + kfree(pages); + } + + return; } } @@ -708,13 +722,6 @@ cleanup_and_bail_uncompressed: *num_added += 1; return; - -free_pages_out: - for (i = 0; i < nr_pages; i++) { - WARN_ON(pages[i]->mapping); - put_page(pages[i]); - } - kfree(pages); } static void free_async_extent_pages(struct async_extent *async_extent) @@ -978,8 +985,8 @@ static noinline int cow_file_range(struct inode *inode, u64 alloc_hint = 0; u64 num_bytes; unsigned long ram_size; - u64 disk_num_bytes; u64 cur_alloc_size = 0; + u64 min_alloc_size; u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; @@ -996,7 +1003,6 @@ static noinline int cow_file_range(struct inode *inode, num_bytes = ALIGN(end - start + 1, blocksize); num_bytes = max(blocksize, num_bytes); - disk_num_bytes = num_bytes; inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K); @@ -1023,17 +1029,32 @@ static noinline int cow_file_range(struct inode *inode, } } - BUG_ON(disk_num_bytes > - btrfs_super_total_bytes(fs_info->super_copy)); + BUG_ON(num_bytes > btrfs_super_total_bytes(fs_info->super_copy)); alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); btrfs_drop_extent_cache(BTRFS_I(inode), start, start + num_bytes - 1, 0); - while (disk_num_bytes > 0) { - cur_alloc_size = disk_num_bytes; + /* + * Relocation relies on the relocated extents to have exactly the same + * size as the original extents. Normally writeback for relocation data + * extents follows a NOCOW path because relocation preallocates the + * extents. However, due to an operation such as scrub turning a block + * group to RO mode, it may fallback to COW mode, so we must make sure + * an extent allocated during COW has exactly the requested size and can + * not be split into smaller extents, otherwise relocation breaks and + * fails during the stage where it updates the bytenr of file extent + * items. + */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + min_alloc_size = num_bytes; + else + min_alloc_size = fs_info->sectorsize; + + while (num_bytes > 0) { + cur_alloc_size = num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, - fs_info->sectorsize, 0, alloc_hint, + min_alloc_size, 0, alloc_hint, &ins, 1, 1); if (ret < 0) goto out_unlock; @@ -1097,11 +1118,10 @@ static noinline int cow_file_range(struct inode *inode, delalloc_end, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC, page_ops); - if (disk_num_bytes < cur_alloc_size) - disk_num_bytes = 0; + if (num_bytes < cur_alloc_size) + num_bytes = 0; else - disk_num_bytes -= cur_alloc_size; - num_bytes -= cur_alloc_size; + num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; start += cur_alloc_size; extent_reserved = false; @@ -1139,8 +1159,8 @@ out_unlock: */ if (extent_reserved) { extent_clear_unlock_delalloc(inode, start, - start + cur_alloc_size, - start + cur_alloc_size, + start + cur_alloc_size - 1, + start + cur_alloc_size - 1, locked_page, clear_bits, page_ops); @@ -1315,7 +1335,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 disk_num_bytes; u64 ram_bytes; int extent_type; - int ret, err; + int ret; int type; int nocow; int check_prev = 1; @@ -1440,11 +1460,8 @@ next_slot: * if there are pending snapshots for this root, * we fall into common COW way. */ - if (!nolock) { - err = btrfs_start_write_no_snapshotting(root); - if (!err) - goto out_check; - } + if (!nolock && atomic_read(&root->snapshot_force_cow)) + goto out_check; /* * force cow if csum exists in the range. * this ensure that csum for a given extent are @@ -1453,9 +1470,6 @@ next_slot: ret = csum_exist_in_range(fs_info, disk_bytenr, num_bytes); if (ret) { - if (!nolock) - btrfs_end_write_no_snapshotting(root); - /* * ret could be -EIO if the above fails to read * metadata. @@ -1468,11 +1482,8 @@ next_slot: WARN_ON_ONCE(nolock); goto out_check; } - if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) { - if (!nolock) - btrfs_end_write_no_snapshotting(root); + if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) goto out_check; - } nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + @@ -1485,8 +1496,6 @@ next_slot: out_check: if (extent_end <= start) { path->slots[0]++; - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); if (nocow) btrfs_dec_nocow_writers(fs_info, disk_bytenr); goto next_slot; @@ -1508,8 +1517,6 @@ out_check: end, page_started, nr_written, 1, NULL); if (ret) { - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); if (nocow) btrfs_dec_nocow_writers(fs_info, disk_bytenr); @@ -1529,8 +1536,6 @@ out_check: ram_bytes, BTRFS_COMPRESS_NONE, BTRFS_ORDERED_PREALLOC); if (IS_ERR(em)) { - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); if (nocow) btrfs_dec_nocow_writers(fs_info, disk_bytenr); @@ -1569,8 +1574,6 @@ out_check: EXTENT_CLEAR_DATA_RESV, PAGE_UNLOCK | PAGE_SET_PRIVATE2); - if (!nolock && nocow) - btrfs_end_write_no_snapshotting(root); cur_offset = extent_end; /* @@ -8707,7 +8710,6 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip) /* bio split */ ASSERT(map_length <= INT_MAX); - atomic_inc(&dip->pending_bios); do { clone_len = min_t(int, submit_len, map_length); @@ -8758,7 +8760,8 @@ submit: if (!status) return 0; - bio_put(bio); + if (bio != orig_bio) + bio_put(bio); out_err: dip->errors = 1; /* @@ -8798,7 +8801,7 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode, bio->bi_private = dip; dip->orig_bio = bio; dip->dio_bio = dio_bio; - atomic_set(&dip->pending_bios, 0); + atomic_set(&dip->pending_bios, 1); io_bio = btrfs_io_bio(bio); io_bio->logical = file_offset; @@ -8947,9 +8950,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) dio_data.overwrite = 1; inode_unlock(inode); relock = true; - } else if (iocb->ki_flags & IOCB_NOWAIT) { - ret = -EAGAIN; - goto out; } ret = btrfs_delalloc_reserve_space(inode, &data_reserved, offset, count); @@ -9187,20 +9187,17 @@ again: /* * Qgroup reserved space handler * Page here will be either - * 1) Already written to disk - * In this case, its reserved space is released from data rsv map - * and will be freed by delayed_ref handler finally. - * So even we call qgroup_free_data(), it won't decrease reserved - * space. - * 2) Not written to disk - * This means the reserved space should be freed here. However, - * if a truncate invalidates the page (by clearing PageDirty) - * and the page is accounted for while allocating extent - * in btrfs_check_data_free_space() we let delayed_ref to - * free the entire extent. + * 1) Already written to disk or ordered extent already submitted + * Then its QGROUP_RESERVED bit in io_tree is already cleaned. + * Qgroup will be handled by its qgroup_record then. + * btrfs_qgroup_free_data() call will do nothing here. + * + * 2) Not written to disk yet + * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED + * bit of its io_tree, and free the qgroup reserved data space. + * Since the IO will never happen for this page. */ - if (PageDirty(page)) - btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); + btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); if (!inode_evicting) { clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DIRTY | |