diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 234 |
1 files changed, 151 insertions, 83 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 447aa17c804e..8875fac9f958 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/nospec.h> #include <linux/backing-dev.h> +#include <linux/freezer.h> #include <trace/events/ext4.h> #ifdef CONFIG_EXT4_DEBUG @@ -3089,6 +3090,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, struct ext4_allocation_request *ar) { struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); + struct ext4_super_block *es = sbi->s_es; int bsbits, max; ext4_lblk_t end; loff_t size, start_off; @@ -3170,6 +3172,19 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = size >> bsbits; start = start_off >> bsbits; + /* + * For tiny groups (smaller than 8MB) the chosen allocation + * alignment may be larger than group size. Make sure the + * alignment does not move allocation to a different group which + * makes mballoc fail assertions later. + */ + start = max(start, rounddown(ac->ac_o_ex.fe_logical, + (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb))); + + /* avoid unnecessary preallocation that may trigger assertions */ + if (start + size > EXT_MAX_BLOCKS) + size = EXT_MAX_BLOCKS - start; + /* don't cover already allocated blocks in selected range */ if (ar->pleft && start <= ar->lleft) { size -= ar->lleft + 1 - start; @@ -3260,18 +3275,21 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size); /* define goal start in order to merge */ - if (ar->pright && (ar->lright == (start + size))) { + if (ar->pright && (ar->lright == (start + size)) && + ar->pright >= size && + ar->pright - size >= le32_to_cpu(es->s_first_data_block)) { /* merge to the right */ ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size, - &ac->ac_f_ex.fe_group, - &ac->ac_f_ex.fe_start); + &ac->ac_g_ex.fe_group, + &ac->ac_g_ex.fe_start); ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; } - if (ar->pleft && (ar->lleft + 1 == start)) { + if (ar->pleft && (ar->lleft + 1 == start) && + ar->pleft + 1 < ext4_blocks_count(es)) { /* merge to the left */ ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1, - &ac->ac_f_ex.fe_group, - &ac->ac_f_ex.fe_start); + &ac->ac_g_ex.fe_group, + &ac->ac_g_ex.fe_start); ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; } @@ -3363,6 +3381,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, BUG_ON(start < pa->pa_pstart); BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len)); BUG_ON(pa->pa_free < len); + BUG_ON(ac->ac_b_ex.fe_len <= 0); pa->pa_free -= len; mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa); @@ -3667,10 +3686,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) return -ENOMEM; if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) { - int winl; - int wins; - int win; - int offs; + int new_bex_start; + int new_bex_end; /* we can't allocate as much as normalizer wants. * so, found space must get proper lstart @@ -3678,26 +3695,40 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical); BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len); - /* we're limited by original request in that - * logical block must be covered any way - * winl is window we can move our chunk within */ - winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical; + /* + * Use the below logic for adjusting best extent as it keeps + * fragmentation in check while ensuring logical range of best + * extent doesn't overflow out of goal extent: + * + * 1. Check if best ex can be kept at end of goal and still + * cover original start + * 2. Else, check if best ex can be kept at start of goal and + * still cover original start + * 3. Else, keep the best ex at start of original request. + */ + new_bex_end = ac->ac_g_ex.fe_logical + + EXT4_C2B(sbi, ac->ac_g_ex.fe_len); + new_bex_start = new_bex_end - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); + if (ac->ac_o_ex.fe_logical >= new_bex_start) + goto adjust_bex; - /* also, we should cover whole original request */ - wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len); + new_bex_start = ac->ac_g_ex.fe_logical; + new_bex_end = + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); + if (ac->ac_o_ex.fe_logical < new_bex_end) + goto adjust_bex; - /* the smallest one defines real window */ - win = min(winl, wins); + new_bex_start = ac->ac_o_ex.fe_logical; + new_bex_end = + new_bex_start + EXT4_C2B(sbi, ac->ac_b_ex.fe_len); - offs = ac->ac_o_ex.fe_logical % - EXT4_C2B(sbi, ac->ac_b_ex.fe_len); - if (offs && offs < win) - win = offs; +adjust_bex: + ac->ac_b_ex.fe_logical = new_bex_start; - ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - - EXT4_NUM_B2C(sbi, win); BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); + BUG_ON(new_bex_end > (ac->ac_g_ex.fe_logical + + EXT4_C2B(sbi, ac->ac_g_ex.fe_len))); } /* preallocation can change ac_b_ex, thus we store actually @@ -3884,7 +3915,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, trace_ext4_mb_release_group_pa(sb, pa); BUG_ON(pa->pa_deleted == 0); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); - BUG_ON(group != e4b->bd_group && pa->pa_len != 0); + if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) { + ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu", + e4b->bd_group, group, pa->pa_pstart); + return 0; + } mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len); atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded); trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len); @@ -4918,8 +4953,8 @@ do_more: * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count, - NULL); + err = ext4_issue_discard(sb, block_group, bit, + count_clusters, NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%d block:%d count:%lu failed" @@ -5119,19 +5154,19 @@ error_return: * @sb: super block for the file system * @start: starting block of the free extent in the alloc. group * @count: number of blocks to TRIM - * @group: alloc. group we are working with * @e4b: ext4 buddy for the group * * Trim "count" blocks starting at "start" in the "group". To assure that no * one will allocate those blocks, mark it as used in buddy bitmap. This must * be called with under the group lock. */ -static int ext4_trim_extent(struct super_block *sb, int start, int count, - ext4_group_t group, struct ext4_buddy *e4b) +static int ext4_trim_extent(struct super_block *sb, + int start, int count, struct ext4_buddy *e4b) __releases(bitlock) __acquires(bitlock) { struct ext4_free_extent ex; + ext4_group_t group = e4b->bd_group; int ret = 0; trace_ext4_trim_extent(sb, group, start, count); @@ -5154,6 +5189,71 @@ __acquires(bitlock) return ret; } +static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb, + ext4_group_t grp) +{ + if (grp < ext4_get_groups_count(sb)) + return EXT4_CLUSTERS_PER_GROUP(sb) - 1; + return (ext4_blocks_count(EXT4_SB(sb)->s_es) - + ext4_group_first_block_no(sb, grp) - 1) >> + EXT4_CLUSTER_BITS(sb); +} + +static bool ext4_trim_interrupted(void) +{ + return fatal_signal_pending(current) || freezing(current); +} + +static int ext4_try_to_trim_range(struct super_block *sb, + struct ext4_buddy *e4b, ext4_grpblk_t start, + ext4_grpblk_t max, ext4_grpblk_t minblocks) +{ + ext4_grpblk_t next, count, free_count; + bool set_trimmed = false; + void *bitmap; + + bitmap = e4b->bd_bitmap; + if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group)) + set_trimmed = true; + start = max(e4b->bd_info->bb_first_free, start); + count = 0; + free_count = 0; + + while (start <= max) { + start = mb_find_next_zero_bit(bitmap, max + 1, start); + if (start > max) + break; + next = mb_find_next_bit(bitmap, max + 1, start); + + if ((next - start) >= minblocks) { + int ret = ext4_trim_extent(sb, start, next - start, e4b); + + if (ret && ret != -EOPNOTSUPP) + return count; + count += next - start; + } + free_count += next - start; + start = next + 1; + + if (ext4_trim_interrupted()) + return count; + + if (need_resched()) { + ext4_unlock_group(sb, e4b->bd_group); + cond_resched(); + ext4_lock_group(sb, e4b->bd_group); + } + + if ((e4b->bd_info->bb_free - free_count) < minblocks) + break; + } + + if (set_trimmed) + EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info); + + return count; +} + /** * ext4_trim_all_free -- function to trim all free space in alloc. group * @sb: super block for file system @@ -5177,10 +5277,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) { - void *bitmap; - ext4_grpblk_t next, count = 0, free_count = 0; struct ext4_buddy e4b; - int ret = 0; + int ret; trace_ext4_trim_all_free(sb, group, start, max); @@ -5190,58 +5288,20 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ret, group); return ret; } - bitmap = e4b.bd_bitmap; ext4_lock_group(sb, group); - if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) && - minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks)) - goto out; - - start = (e4b.bd_info->bb_first_free > start) ? - e4b.bd_info->bb_first_free : start; - - while (start <= max) { - start = mb_find_next_zero_bit(bitmap, max + 1, start); - if (start > max) - break; - next = mb_find_next_bit(bitmap, max + 1, start); - if ((next - start) >= minblocks) { - ret = ext4_trim_extent(sb, start, - next - start, group, &e4b); - if (ret && ret != -EOPNOTSUPP) - break; - ret = 0; - count += next - start; - } - free_count += next - start; - start = next + 1; - - if (fatal_signal_pending(current)) { - count = -ERESTARTSYS; - break; - } - - if (need_resched()) { - ext4_unlock_group(sb, group); - cond_resched(); - ext4_lock_group(sb, group); - } - - if ((e4b.bd_info->bb_free - free_count) < minblocks) - break; - } + if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || + minblocks < EXT4_SB(sb)->s_last_trim_minblks) + ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); + else + ret = 0; - if (!ret) { - ret = count; - EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); - } -out: ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); ext4_debug("trimmed %d blocks in the group %d\n", - count, group); + ret, group); return ret; } @@ -5260,6 +5320,7 @@ out: */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; @@ -5278,7 +5339,14 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start >= max_blks || range->len < sb->s_blocksize) return -EINVAL; - if (end >= max_blks) + /* No point to try to trim less than discard granularity */ + if (range->minlen < q->limits.discard_granularity) { + minlen = EXT4_NUM_B2C(EXT4_SB(sb), + q->limits.discard_granularity >> sb->s_blocksize_bits); + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) + goto out; + } + if (end >= max_blks - 1) end = max_blks - 1; if (end <= first_data_blk) goto out; @@ -5295,6 +5363,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; for (group = first_group; group <= last_group; group++) { + if (ext4_trim_interrupted()) + break; grp = ext4_get_group_info(sb, group); /* We only do this if the grp has never been initialized */ if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { @@ -5311,10 +5381,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) */ if (group == last_group) end = last_cluster; - if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen); if (cnt < 0) { ret = cnt; break; @@ -5330,7 +5399,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) } if (!ret) - atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); + EXT4_SB(sb)->s_last_trim_minblks = minlen; out: range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits; @@ -5359,8 +5428,7 @@ ext4_mballoc_query_range( ext4_lock_group(sb, group); - start = (e4b.bd_info->bb_first_free > start) ? - e4b.bd_info->bb_first_free : start; + start = max(e4b.bd_info->bb_first_free, start); if (end >= EXT4_CLUSTERS_PER_GROUP(sb)) end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; |