aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/acl.h5
-rw-r--r--fs/ext4/balloc.c25
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/extents.c31
-rw-r--r--fs/ext4/extents_status.c343
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsmap.c10
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/indirect.c17
-rw-r--r--fs/ext4/inline.c44
-rw-r--r--fs/ext4/inode.c142
-rw-r--r--fs/ext4/ioctl.c19
-rw-r--r--fs/ext4/mballoc.c281
-rw-r--r--fs/ext4/mballoc.h14
-rw-r--r--fs/ext4/migrate.c7
-rw-r--r--fs/ext4/move_extent.c6
-rw-r--r--fs/ext4/namei.c160
-rw-r--r--fs/ext4/page-io.c10
-rw-r--r--fs/ext4/resize.c88
-rw-r--r--fs/ext4/super.c98
-rw-r--r--fs/ext4/sysfs.c7
-rw-r--r--fs/ext4/verity.c7
-rw-r--r--fs/ext4/xattr.c319
-rw-r--r--fs/ext4/xattr.h20
24 files changed, 1102 insertions, 571 deletions
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9b63f5416a2f..7f3b25b3fa6d 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -67,6 +67,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
+ /* usually, the umask is applied by posix_acl_create(), but if
+ ext4 ACL support is disabled at compile time, we need to do
+ it here, because posix_acl_create() will never be called */
+ inode->i_mode &= ~current_umask();
+
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 031ff3f19018..b68cee75f5c5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -303,6 +303,22 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
return desc;
}
+static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
+ ext4_group_t block_group,
+ struct buffer_head *bh)
+{
+ ext4_grpblk_t next_zero_bit;
+ unsigned long bitmap_size = sb->s_blocksize * 8;
+ unsigned int offset = num_clusters_in_group(sb, block_group);
+
+ if (bitmap_size <= offset)
+ return 0;
+
+ next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
+
+ return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
+}
+
/*
* Return the block number which was discovered to be invalid, or 0 if
* the block bitmap is valid.
@@ -395,6 +411,15 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
return -EFSCORRUPTED;
}
+ blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
+ if (unlikely(blk != 0)) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
+ block_group, blk);
+ ext4_mark_group_bitmap_corrupted(sb, block_group,
+ EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ return -EFSCORRUPTED;
+ }
set_buffer_verified(bh);
verified:
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e932e8482371..4d02116193de 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -501,7 +501,7 @@ enum {
*
* It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/
-#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
+#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG))
#define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
static inline void ext4_check_flag_values(void)
@@ -938,11 +938,13 @@ do { \
* where the second inode has larger inode number
* than the first
* I_DATA_SEM_QUOTA - Used for quota inodes only
+ * I_DATA_SEM_EA - Used for ea_inodes only
*/
enum {
I_DATA_SEM_NORMAL = 0,
I_DATA_SEM_OTHER,
I_DATA_SEM_QUOTA,
+ I_DATA_SEM_EA
};
@@ -1439,7 +1441,7 @@ struct ext4_sb_info {
unsigned long s_commit_interval;
u32 s_max_batch_time;
u32 s_min_batch_time;
- struct block_device *journal_bdev;
+ struct block_device *s_journal_bdev;
#ifdef CONFIG_QUOTA
/* Names of quota files with journalled quota */
char __rcu *s_qf_names[EXT4_MAXQUOTAS];
@@ -1527,7 +1529,7 @@ struct ext4_sb_info {
struct task_struct *s_mmp_tsk;
/* record the last minlen when FITRIM is called. */
- atomic_t s_last_trim_minblks;
+ unsigned long s_last_trim_minblks;
/* Reference to checksum algorithm driver via cryptoapi */
struct crypto_shash *s_chksum_driver;
@@ -2608,7 +2610,9 @@ int do_journal_get_write_access(handle_t *handle,
typedef enum {
EXT4_IGET_NORMAL = 0,
EXT4_IGET_SPECIAL = 0x0001, /* OK to iget a system inode */
- EXT4_IGET_HANDLE = 0x0002 /* Inode # is from a handle */
+ EXT4_IGET_HANDLE = 0x0002, /* Inode # is from a handle */
+ EXT4_IGET_BAD = 0x0004, /* Allow to iget a bad inode */
+ EXT4_IGET_EA_INODE = 0x0008 /* Inode should contain an EA value */
} ext4_iget_flags;
extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f1bbce4350c4..98e1b1ddb4ec 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -500,6 +500,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
error_msg = "invalid eh_entries";
goto corrupted;
}
+ if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
+ error_msg = "eh_entries is 0 but eh_depth is > 0";
+ goto corrupted;
+ }
if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
error_msg = "invalid extent entries";
goto corrupted;
@@ -1032,6 +1036,11 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
ix = curp->p_idx;
}
+ if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
+ EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
+ return -EFSCORRUPTED;
+ }
+
len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
BUG_ON(len < 0);
if (len > 0) {
@@ -1041,11 +1050,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
}
- if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
- EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
- return -EFSCORRUPTED;
- }
-
ix->ei_block = cpu_to_le32(logical);
ext4_idx_store_pblock(ix, ptr);
le16_add_cpu(&curp->p_hdr->eh_entries, 1);
@@ -4932,13 +4936,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
- if (mode & FALLOC_FL_PUNCH_HOLE)
- return ext4_punch_hole(inode, offset, len);
-
+ inode_lock(inode);
ret = ext4_convert_inline_data(inode);
+ inode_unlock(inode);
if (ret)
return ret;
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ return ext4_punch_hole(inode, offset, len);
+
if (mode & FALLOC_FL_COLLAPSE_RANGE)
return ext4_collapse_range(inode, offset, len);
@@ -6016,6 +6022,15 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
struct ext4_extent *extent;
ext4_lblk_t first_lblk, first_lclu, last_lclu;
+ /*
+ * if data can be stored inline, the logical cluster isn't
+ * mapped - no physical clusters have been allocated, and the
+ * file has no extents
+ */
+ if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) ||
+ ext4_has_inline_data(inode))
+ return 0;
+
/* search for the extent closest to the first block in the cluster */
path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
if (IS_ERR(path)) {
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 43fba01da6c3..3346a9252063 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -144,14 +144,17 @@
static struct kmem_cache *ext4_es_cachep;
static struct kmem_cache *ext4_pending_cachep;
-static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
+ struct extent_status *prealloc);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t end, int *reserved);
+ ext4_lblk_t end, int *reserved,
+ struct extent_status *prealloc);
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
struct ext4_inode_info *locked_ei);
-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len);
+static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len,
+ struct pending_reservation **prealloc);
int __init ext4_init_es(void)
{
@@ -269,14 +272,12 @@ static void __es_find_extent_range(struct inode *inode,
/* see if the extent has been cached */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u) %llu %x\n",
- lblk, es1->es_lblk, es1->es_len,
- ext4_es_pblock(es1), ext4_es_status(es1));
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u) %llu %x\n",
+ lblk, es1->es_lblk, es1->es_len,
+ ext4_es_pblock(es1), ext4_es_status(es1));
+ goto out;
}
es1 = __es_tree_search(&tree->root, lblk);
@@ -295,7 +296,7 @@ out:
}
if (es1 && matching_fn(es1)) {
- tree->cache_es = es1;
+ WRITE_ONCE(tree->cache_es, es1);
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
@@ -441,22 +442,49 @@ static void ext4_es_list_del(struct inode *inode)
spin_unlock(&sbi->s_es_lock);
}
-static struct extent_status *
-ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
- ext4_fsblk_t pblk)
+static inline struct pending_reservation *__alloc_pending(bool nofail)
+{
+ if (!nofail)
+ return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
+
+ return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
+}
+
+static inline void __free_pending(struct pending_reservation *pr)
+{
+ kmem_cache_free(ext4_pending_cachep, pr);
+}
+
+/*
+ * Returns true if we cannot fail to allocate memory for this extent_status
+ * entry and cannot reclaim it until its status changes.
+ */
+static inline bool ext4_es_must_keep(struct extent_status *es)
+{
+ /* fiemap, bigalloc, and seek_data/hole need to use it. */
+ if (ext4_es_is_delayed(es))
+ return true;
+
+ return false;
+}
+
+static inline struct extent_status *__es_alloc_extent(bool nofail)
+{
+ if (!nofail)
+ return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
+
+ return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL);
+}
+
+static void ext4_es_init_extent(struct inode *inode, struct extent_status *es,
+ ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)
{
- struct extent_status *es;
- es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC);
- if (es == NULL)
- return NULL;
es->es_lblk = lblk;
es->es_len = len;
es->es_pblk = pblk;
- /*
- * We don't count delayed extent because we never try to reclaim them
- */
- if (!ext4_es_is_delayed(es)) {
+ /* We never try to reclaim a must kept extent, so we don't count it. */
+ if (!ext4_es_must_keep(es)) {
if (!EXT4_I(inode)->i_es_shk_nr++)
ext4_es_list_add(inode);
percpu_counter_inc(&EXT4_SB(inode->i_sb)->
@@ -465,8 +493,11 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
EXT4_I(inode)->i_es_all_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+}
- return es;
+static inline void __es_free_extent(struct extent_status *es)
+{
+ kmem_cache_free(ext4_es_cachep, es);
}
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
@@ -474,8 +505,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
EXT4_I(inode)->i_es_all_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
- /* Decrease the shrink counter when this es is not delayed */
- if (!ext4_es_is_delayed(es)) {
+ /* Decrease the shrink counter when we can reclaim the extent. */
+ if (!ext4_es_must_keep(es)) {
BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);
if (!--EXT4_I(inode)->i_es_shk_nr)
ext4_es_list_del(inode);
@@ -483,7 +514,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
s_es_stats.es_stats_shk_cnt);
}
- kmem_cache_free(ext4_es_cachep, es);
+ __es_free_extent(es);
}
/*
@@ -745,7 +776,8 @@ static inline void ext4_es_insert_extent_check(struct inode *inode,
}
#endif
-static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
+static int __es_insert_extent(struct inode *inode, struct extent_status *newes,
+ struct extent_status *prealloc)
{
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
struct rb_node **p = &tree->root.rb_node;
@@ -785,10 +817,15 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
}
}
- es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len,
- newes->es_pblk);
+ if (prealloc)
+ es = prealloc;
+ else
+ es = __es_alloc_extent(false);
if (!es)
return -ENOMEM;
+ ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len,
+ newes->es_pblk);
+
rb_link_node(&es->rb_node, parent, p);
rb_insert_color(&es->rb_node, &tree->root);
@@ -809,8 +846,12 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
- int err = 0;
+ int err1 = 0, err2 = 0, err3 = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct extent_status *es1 = NULL;
+ struct extent_status *es2 = NULL;
+ struct pending_reservation *pr = NULL;
+ bool revise_pending = false;
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
lblk, len, pblk, status, inode->i_ino);
@@ -835,29 +876,57 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_es_insert_extent_check(inode, &newes);
+ revise_pending = sbi->s_cluster_ratio > 1 &&
+ test_opt(inode->i_sb, DELALLOC) &&
+ (status & (EXTENT_STATUS_WRITTEN |
+ EXTENT_STATUS_UNWRITTEN));
+retry:
+ if (err1 && !es1)
+ es1 = __es_alloc_extent(true);
+ if ((err1 || err2) && !es2)
+ es2 = __es_alloc_extent(true);
+ if ((err1 || err2 || err3) && revise_pending && !pr)
+ pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);
- err = __es_remove_extent(inode, lblk, end, NULL);
- if (err != 0)
+
+ err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
+ if (err1 != 0)
goto error;
-retry:
- err = __es_insert_extent(inode, &newes);
- if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
- 128, EXT4_I(inode)))
- goto retry;
- if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
- err = 0;
+ /* Free preallocated extent if it didn't get used. */
+ if (es1) {
+ if (!es1->es_len)
+ __es_free_extent(es1);
+ es1 = NULL;
+ }
- if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
- (status & EXTENT_STATUS_WRITTEN ||
- status & EXTENT_STATUS_UNWRITTEN))
- __revise_pending(inode, lblk, len);
+ err2 = __es_insert_extent(inode, &newes, es2);
+ if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
+ err2 = 0;
+ if (err2 != 0)
+ goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es2) {
+ if (!es2->es_len)
+ __es_free_extent(es2);
+ es2 = NULL;
+ }
+ if (revise_pending) {
+ err3 = __revise_pending(inode, lblk, len, &pr);
+ if (err3 != 0)
+ goto error;
+ if (pr) {
+ __free_pending(pr);
+ pr = NULL;
+ }
+ }
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err1 || err2 || err3)
+ goto retry;
ext4_es_print_tree(inode);
-
- return err;
+ return 0;
}
/*
@@ -887,7 +956,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
if (!es || es->es_lblk > end)
- __es_insert_extent(inode, &newes);
+ __es_insert_extent(inode, &newes, NULL);
write_unlock(&EXT4_I(inode)->i_es_lock);
}
@@ -916,14 +985,12 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
/* find extent in cache firstly */
es->es_lblk = es->es_len = es->es_pblk = 0;
- if (tree->cache_es) {
- es1 = tree->cache_es;
- if (in_range(lblk, es1->es_lblk, es1->es_len)) {
- es_debug("%u cached by [%u/%u)\n",
- lblk, es1->es_lblk, es1->es_len);
- found = 1;
- goto out;
- }
+ es1 = READ_ONCE(tree->cache_es);
+ if (es1 && in_range(lblk, es1->es_lblk, es1->es_len)) {
+ es_debug("%u cached by [%u/%u)\n",
+ lblk, es1->es_lblk, es1->es_len);
+ found = 1;
+ goto out;
}
node = tree->root.rb_node;
@@ -1257,7 +1324,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
rc->ndelonly--;
node = rb_next(&pr->rb_node);
rb_erase(&pr->rb_node, &tree->root);
- kmem_cache_free(ext4_pending_cachep, pr);
+ __free_pending(pr);
if (!node)
break;
pr = rb_entry(node, struct pending_reservation,
@@ -1276,6 +1343,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
* @lblk - first block in range
* @end - last block in range
* @reserved - number of cluster reservations released
+ * @prealloc - pre-allocated es to avoid memory allocation failures
*
* If @reserved is not NULL and delayed allocation is enabled, counts
* block/cluster reservations freed by removing range and if bigalloc
@@ -1283,7 +1351,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
* error code on failure.
*/
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t end, int *reserved)
+ ext4_lblk_t end, int *reserved,
+ struct extent_status *prealloc)
{
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
struct rb_node *node;
@@ -1291,14 +1360,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status orig_es;
ext4_lblk_t len1, len2;
ext4_fsblk_t block;
- int err;
+ int err = 0;
bool count_reserved = true;
struct rsvd_count rc;
if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))
count_reserved = false;
-retry:
- err = 0;
es = __es_tree_search(&tree->root, lblk);
if (!es)
@@ -1332,14 +1399,13 @@ retry:
orig_es.es_len - len2;
ext4_es_store_pblock_status(&newes, block,
ext4_es_status(&orig_es));
- err = __es_insert_extent(inode, &newes);
+ err = __es_insert_extent(inode, &newes, prealloc);
if (err) {
+ if (!ext4_es_must_keep(&newes))
+ return 0;
+
es->es_lblk = orig_es.es_lblk;
es->es_len = orig_es.es_len;
- if ((err == -ENOMEM) &&
- __es_shrink(EXT4_SB(inode->i_sb),
- 128, EXT4_I(inode)))
- goto retry;
goto out;
}
} else {
@@ -1352,9 +1418,9 @@ retry:
}
}
if (count_reserved)
- count_rsvd(inode, lblk, orig_es.es_len - len1 - len2,
- &orig_es, &rc);
- goto out;
+ count_rsvd(inode, orig_es.es_lblk + len1,
+ orig_es.es_len - len1 - len2, &orig_es, &rc);
+ goto out_get_reserved;
}
if (len1 > 0) {
@@ -1396,6 +1462,7 @@ retry:
}
}
+out_get_reserved:
if (count_reserved)
*reserved = get_rsvd(inode, end, es, &rc);
out:
@@ -1418,6 +1485,7 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end;
int err = 0;
int reserved = 0;
+ struct extent_status *es = NULL;
trace_ext4_es_remove_extent(inode, lblk, len);
es_debug("remove [%u/%u) from extent status tree of inode %lu\n",
@@ -1429,17 +1497,29 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
end = lblk + len - 1;
BUG_ON(end < lblk);
+retry:
+ if (err && !es)
+ es = __es_alloc_extent(true);
/*
* ext4_clear_inode() depends on us taking i_es_lock unconditionally
* so that we are sure __es_shrink() is done with the inode before it
* is reclaimed.
*/
write_lock(&EXT4_I(inode)->i_es_lock);
- err = __es_remove_extent(inode, lblk, end, &reserved);
+ err = __es_remove_extent(inode, lblk, end, &reserved, es);
+ /* Free preallocated extent if it didn't get used. */
+ if (es) {
+ if (!es->es_len)
+ __es_free_extent(es);
+ es = NULL;
+ }
write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err)
+ goto retry;
+
ext4_es_print_tree(inode);
ext4_da_release_space(inode, reserved);
- return err;
+ return 0;
}
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
@@ -1686,11 +1766,8 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,
(*nr_to_scan)--;
node = rb_next(&es->rb_node);
- /*
- * We can't reclaim delayed extent from status tree because
- * fiemap, bigallic, and seek_data/hole need to use it.
- */
- if (ext4_es_is_delayed(es))
+
+ if (ext4_es_must_keep(es))
goto next;
if (ext4_es_is_referenced(es)) {
ext4_es_clear_referenced(es);
@@ -1754,7 +1831,7 @@ void ext4_clear_inode_es(struct inode *inode)
while (node) {
es = rb_entry(node, struct extent_status, rb_node);
node = rb_next(node);
- if (!ext4_es_is_delayed(es)) {
+ if (!ext4_es_must_keep(es)) {
rb_erase(&es->rb_node, &tree->root);
ext4_es_free_extent(inode, es);
}
@@ -1841,11 +1918,13 @@ static struct pending_reservation *__get_pending(struct inode *inode,
*
* @inode - file containing the cluster
* @lblk - logical block in the cluster to be added
+ * @prealloc - preallocated pending entry
*
* Returns 0 on successful insertion and -ENOMEM on failure. If the
* pending reservation is already in the set, returns successfully.
*/
-static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
+static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
+ struct pending_reservation **prealloc)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
@@ -1871,10 +1950,15 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
}
}
- pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
- if (pr == NULL) {
- ret = -ENOMEM;
- goto out;
+ if (likely(*prealloc == NULL)) {
+ pr = __alloc_pending(false);
+ if (!pr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ pr = *prealloc;
+ *prealloc = NULL;
}
pr->lclu = lclu;
@@ -1904,7 +1988,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
if (pr != NULL) {
tree = &EXT4_I(inode)->i_pending_tree;
rb_erase(&pr->rb_node, &tree->root);
- kmem_cache_free(ext4_pending_cachep, pr);
+ __free_pending(pr);
}
}
@@ -1965,7 +2049,10 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
bool allocated)
{
struct extent_status newes;
- int err = 0;
+ int err1 = 0, err2 = 0, err3 = 0;
+ struct extent_status *es1 = NULL;
+ struct extent_status *es2 = NULL;
+ struct pending_reservation *pr = NULL;
es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
lblk, inode->i_ino);
@@ -1977,29 +2064,52 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
ext4_es_insert_extent_check(inode, &newes);
+retry:
+ if (err1 && !es1)
+ es1 = __es_alloc_extent(true);
+ if ((err1 || err2) && !es2)
+ es2 = __es_alloc_extent(true);
+ if ((err1 || err2 || err3) && allocated && !pr)
+ pr = __alloc_pending(true);
write_lock(&EXT4_I(inode)->i_es_lock);
- err = __es_remove_extent(inode, lblk, lblk, NULL);
- if (err != 0)
- goto error;
-retry:
- err = __es_insert_extent(inode, &newes);
- if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb),
- 128, EXT4_I(inode)))
- goto retry;
- if (err != 0)
+ err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
+ if (err1 != 0)
goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es1) {
+ if (!es1->es_len)
+ __es_free_extent(es1);
+ es1 = NULL;
+ }
- if (allocated)
- __insert_pending(inode, lblk);
+ err2 = __es_insert_extent(inode, &newes, es2);
+ if (err2 != 0)
+ goto error;
+ /* Free preallocated extent if it didn't get used. */
+ if (es2) {
+ if (!es2->es_len)
+ __es_free_extent(es2);
+ es2 = NULL;
+ }
+ if (allocated) {
+ err3 = __insert_pending(inode, lblk, &pr);
+ if (err3 != 0)
+ goto error;
+ if (pr) {
+ __free_pending(pr);
+ pr = NULL;
+ }
+ }
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
+ if (err1 || err2 || err3)
+ goto retry;
ext4_es_print_tree(inode);
ext4_print_pending_tree(inode);
-
- return err;
+ return 0;
}
/*
@@ -2100,21 +2210,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
* @inode - file containing the range
* @lblk - logical block defining the start of range
* @len - length of range in blocks
+ * @prealloc - preallocated pending entry
*
* Used after a newly allocated extent is added to the extents status tree.
* Requires that the extents in the range have either written or unwritten
* status. Must be called while holding i_es_lock.
*/
-static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len)
+static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len,
+ struct pending_reservation **prealloc)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t end = lblk + len - 1;
ext4_lblk_t first, last;
bool f_del = false, l_del = false;
+ int ret = 0;
if (len == 0)
- return;
+ return 0;
/*
* Two cases - block range within single cluster and block range
@@ -2135,7 +2248,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
first, lblk - 1);
if (f_del) {
- __insert_pending(inode, first);
+ ret = __insert_pending(inode, first, prealloc);
+ if (ret < 0)
+ goto out;
} else {
last = EXT4_LBLK_CMASK(sbi, end) +
sbi->s_cluster_ratio - 1;
@@ -2143,9 +2258,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
l_del = __es_scan_range(inode,
&ext4_es_is_delonly,
end + 1, last);
- if (l_del)
- __insert_pending(inode, last);
- else
+ if (l_del) {
+ ret = __insert_pending(inode, last, prealloc);
+ if (ret < 0)
+ goto out;
+ } else
__remove_pending(inode, last);
}
} else {
@@ -2153,18 +2270,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
if (first != lblk)
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
first, lblk - 1);
- if (f_del)
- __insert_pending(inode, first);
- else
+ if (f_del) {
+ ret = __insert_pending(inode, first, prealloc);
+ if (ret < 0)
+ goto out;
+ } else
__remove_pending(inode, first);
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
if (last != end)
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
end + 1, last);
- if (l_del)
- __insert_pending(inode, last);
- else
+ if (l_del) {
+ ret = __insert_pending(inode, last, prealloc);
+ if (ret < 0)
+ goto out;
+ } else
__remove_pending(inode, last);
}
+out:
+ return ret;
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 1513e90fb6d2..7ac55d071eb2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -505,6 +505,12 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
inode_unlock_shared(inode);
break;
}
+ /*
+ * Make sure inline data cannot be created anymore since we are going
+ * to allocate blocks for DIO. We know the inode does not have any
+ * inline data now because ext4_dio_supported() checked for that.
+ */
+ ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
if (offset < 0)
return offset;
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 37347ba868b7..d18c4cd4c63f 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
+ if (keys[1].fmr_physical < keys[0].fmr_physical)
+ return 0;
start_fsb = keys[0].fmr_physical;
end_fsb = keys[1].fmr_physical;
@@ -574,8 +576,8 @@ static bool ext4_getfsmap_is_valid_device(struct super_block *sb,
if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev))
return true;
- if (EXT4_SB(sb)->journal_bdev &&
- fm->fmr_device == new_encode_dev(EXT4_SB(sb)->journal_bdev->bd_dev))
+ if (EXT4_SB(sb)->s_journal_bdev &&
+ fm->fmr_device == new_encode_dev(EXT4_SB(sb)->s_journal_bdev->bd_dev))
return true;
return false;
}
@@ -645,9 +647,9 @@ int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head,
memset(handlers, 0, sizeof(handlers));
handlers[0].gfd_dev = new_encode_dev(sb->s_bdev->bd_dev);
handlers[0].gfd_fn = ext4_getfsmap_datadev;
- if (EXT4_SB(sb)->journal_bdev) {
+ if (EXT4_SB(sb)->s_journal_bdev) {
handlers[1].gfd_dev = new_encode_dev(
- EXT4_SB(sb)->journal_bdev->bd_dev);
+ EXT4_SB(sb)->s_journal_bdev->bd_dev);
handlers[1].gfd_fn = ext4_getfsmap_logdev;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 83846cc81485..cbde5a096c7b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -500,7 +500,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
goto fallback;
}
- max_dirs = ndirs / ngroups + inodes_per_group / 16;
+ max_dirs = ndirs / ngroups + inodes_per_group*flex_size / 16;
min_inodes = avefreei - inodes_per_group*flex_size / 4;
if (min_inodes < 1)
min_inodes = 1;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 36699a131168..25532bac7777 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
+ unsigned int key;
int ret = -EIO;
*err = 0;
@@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
if (!p->key)
goto no_block;
while (--depth) {
- bh = sb_getblk(sb, le32_to_cpu(p->key));
+ key = le32_to_cpu(p->key);
+ if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) {
+ /* the block was out of range */
+ ret = -EFSCORRUPTED;
+ goto failure;
+ }
+ bh = sb_getblk(sb, key);
if (unlikely(!bh)) {
ret = -ENOMEM;
goto failure;
@@ -629,6 +636,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_update_inode_fsync_trans(handle, inode, 1);
count = ar.len;
+
+ /*
+ * Update reserved blocks/metadata blocks after successful block
+ * allocation which had been deferred till now.
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, count, 1);
+
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8f665aa1d706..549ceac9099c 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -32,8 +32,12 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
struct ext4_inode *raw_inode;
+ void *end;
int free, min_offs;
+ if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+ return 0;
+
min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
EXT4_GOOD_OLD_INODE_SIZE -
EXT4_I(inode)->i_extra_isize -
@@ -52,14 +56,23 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
+ end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
/* Compute min_offs. */
- for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
+ while (!IS_LAST_ENTRY(entry)) {
+ void *next = EXT4_XATTR_NEXT(entry);
+
+ if (next >= end) {
+ EXT4_ERROR_INODE(inode,
+ "corrupt xattr in inline inode");
+ return 0;
+ }
if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
}
+ entry = next;
}
free = min_offs -
((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
@@ -154,7 +167,6 @@ int ext4_find_inline_data_nolock(struct inode *inode)
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size);
- ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
}
out:
brelse(is.iloc.bh);
@@ -204,7 +216,7 @@ out:
/*
* write the buffer to the inline inode.
* If 'create' is set, we don't need to do the extra copy in the xattr
- * value since it is already handled by ext4_xattr_ibody_inline_set.
+ * value since it is already handled by ext4_xattr_ibody_set.
* That saves us one memcpy.
*/
static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
@@ -286,7 +298,7 @@ static int ext4_create_inline_data(handle_t *handle,
BUG_ON(!is.s.not_found);
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error) {
if (error == -ENOSPC)
ext4_clear_inode_state(inode,
@@ -346,7 +358,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
- if (error == -ENODATA)
+ if (error < 0)
goto out;
BUFFER_TRACE(is.iloc.bh, "get_write_access");
@@ -358,7 +370,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
i.value = value;
i.value_len = len;
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
@@ -431,7 +443,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
if (error)
goto out;
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
@@ -1170,6 +1182,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
ext4_initialize_dirent_tail(dir_block,
inode->i_sb->s_blocksize);
set_buffer_uptodate(dir_block);
+ unlock_buffer(dir_block);
err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
if (err)
return err;
@@ -1243,6 +1256,7 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
if (!S_ISDIR(inode->i_mode)) {
memcpy(data_bh->b_data, buf, inline_size);
set_buffer_uptodate(data_bh);
+ unlock_buffer(data_bh);
error = ext4_handle_dirty_metadata(handle,
inode, data_bh);
} else {
@@ -1250,7 +1264,6 @@ static int ext4_convert_inline_data_nolock(handle_t *handle,
buf, inline_size);
}
- unlock_buffer(data_bh);
out_restore:
if (error)
ext4_restore_inline_data(handle, inode, iloc, buf, inline_size);
@@ -1967,8 +1980,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
i.value = value;
i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
- err = ext4_xattr_ibody_inline_set(handle, inode,
- &i, &is);
+ err = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (err)
goto out_error;
}
@@ -2013,6 +2025,18 @@ int ext4_convert_inline_data(struct inode *inode)
if (!ext4_has_inline_data(inode)) {
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
return 0;
+ } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
+ /*
+ * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is
+ * cleared. This means we are in the middle of moving of
+ * inline data to delay allocated block. Just force writeout
+ * here to finish conversion.
+ */
+ error = filemap_flush(inode->i_mapping);
+ if (error)
+ return error;
+ if (!ext4_has_inline_data(inode))
+ return 0;
}
needed_blocks = ext4_writepage_trans_blocks(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 00686fbe3c27..8a0bca3b653b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -207,6 +207,8 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
+ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
+ ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
/*
* When journalling data dirty buffers are tracked only in the
@@ -667,16 +669,6 @@ found:
*/
ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
}
-
- /*
- * Update reserved blocks/metadata blocks after successful
- * block allocation which had been deferred till now. We don't
- * support fallocate for non extent files. So we can update
- * reserve space here.
- */
- if ((retval > 0) &&
- (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
- ext4_da_update_reserve_space(inode, retval, 1);
}
if (retval > 0) {
@@ -1317,6 +1309,13 @@ retry_grab:
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
+ /*
+ * The same as page allocation, we prealloc buffer heads before
+ * starting the handle.
+ */
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, inode->i_sb->s_blocksize, 0);
+
unlock_page(page);
retry_journal:
@@ -1430,7 +1429,8 @@ static int ext4_write_end(struct file *file,
bool verity = ext4_verity_in_progress(inode);
trace_ext4_write_end(inode, pos, len, copied);
- if (inline_data) {
+ if (inline_data &&
+ ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
@@ -1717,7 +1717,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
ext4_lblk_t start, last;
start = index << (PAGE_SHIFT - inode->i_blkbits);
last = end << (PAGE_SHIFT - inode->i_blkbits);
+
+ /*
+ * avoid racing with extent status tree scans made by
+ * ext4_insert_delayed_block()
+ */
+ down_write(&EXT4_I(inode)->i_data_sem);
ext4_es_remove_extent(inode, start, last - start + 1);
+ up_write(&EXT4_I(inode)->i_data_sem);
}
pagevec_init(&pvec);
@@ -4322,15 +4329,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
trace_ext4_punch_hole(inode, offset, length, 0);
- ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
- if (ext4_has_inline_data(inode)) {
- down_write(&EXT4_I(inode)->i_mmap_sem);
- ret = ext4_convert_inline_data(inode);
- up_write(&EXT4_I(inode)->i_mmap_sem);
- if (ret)
- return ret;
- }
-
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
@@ -4532,7 +4530,7 @@ int ext4_truncate(struct inode *inode)
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
- return 0;
+ goto out_trace;
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
@@ -4543,16 +4541,15 @@ int ext4_truncate(struct inode *inode)
int has_inline = 1;
err = ext4_inline_data_truncate(inode, &has_inline);
- if (err)
- return err;
- if (has_inline)
- return 0;
+ if (err || has_inline)
+ goto out_trace;
}
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
- if (ext4_inode_attach_jinode(inode) < 0)
- return 0;
+ err = ext4_inode_attach_jinode(inode);
+ if (err)
+ goto out_trace;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4561,8 +4558,10 @@ int ext4_truncate(struct inode *inode)
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_trace;
+ }
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4611,6 +4610,7 @@ out_stop:
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
+out_trace:
trace_ext4_truncate_exit(inode);
return err;
}
@@ -4647,9 +4647,17 @@ static int __ext4_get_inode_loc(struct inode *inode,
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
inode_offset = ((inode->i_ino - 1) %
EXT4_INODES_PER_GROUP(sb));
- block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+ block = ext4_inode_table(sb, gdp);
+ if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) ||
+ (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) {
+ ext4_error(sb, "Invalid inode table block %llu in "
+ "block_group %u", block, iloc->block_group);
+ return -EFSCORRUPTED;
+ }
+ block += (inode_offset / inodes_per_block);
+
bh = sb_getblk(sb, block);
if (unlikely(!bh))
return -ENOMEM;
@@ -4843,11 +4851,15 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
__le32 *magic = (void *)raw_inode +
EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
- if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
- EXT4_INODE_SIZE(inode->i_sb) &&
+ if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+ int err;
+
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- return ext4_find_inline_data_nolock(inode);
+ err = ext4_find_inline_data_nolock(inode);
+ if (!err && ext4_has_inline_data(inode))
+ ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ return err;
} else
EXT4_I(inode)->i_inline_off = 0;
return 0;
@@ -4881,6 +4893,24 @@ static inline u64 ext4_inode_peek_iversion(const struct inode *inode)
return inode_peek_iversion(inode);
}
+static const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
+
+{
+ if (flags & EXT4_IGET_EA_INODE) {
+ if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+ return "missing EA_INODE flag";
+ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
+ EXT4_I(inode)->i_file_acl)
+ return "ea_inode with extended attributes";
+ } else {
+ if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
+ return "unexpected EA_INODE flag";
+ }
+ if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
+ return "unexpected bad inode w/o EXT4_IGET_BAD";
+ return NULL;
+}
+
struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ext4_iget_flags flags, const char *function,
unsigned int line)
@@ -4889,6 +4919,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
struct ext4_inode *raw_inode;
struct ext4_inode_info *ei;
struct inode *inode;
+ const char *err_str;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
loff_t size;
@@ -4912,8 +4943,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode = iget_locked(sb, ino);
if (!inode)
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (!(inode->i_state & I_NEW)) {
+ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
+ ext4_error_inode(inode, function, line, 0, err_str);
+ iput(inode);
+ return ERR_PTR(-EFSCORRUPTED);
+ }
return inode;
+ }
ei = EXT4_I(inode);
iloc.bh = NULL;
@@ -4923,13 +4960,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
goto bad_inode;
raw_inode = ext4_raw_inode(&iloc);
- if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
- ext4_error_inode(inode, function, line, 0,
- "iget: root inode unallocated");
- ret = -EFSCORRUPTED;
- goto bad_inode;
- }
-
if ((flags & EXT4_IGET_HANDLE) &&
(raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
ret = -ESTALE;
@@ -5000,11 +5030,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* NeilBrown 1999oct15
*/
if (inode->i_nlink == 0) {
- if ((inode->i_mode == 0 ||
+ if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
ino != EXT4_BOOT_LOADER_INO) {
- /* this inode is deleted */
- ret = -ESTALE;
+ /* this inode is deleted or unallocated */
+ if (flags & EXT4_IGET_SPECIAL) {
+ ext4_error_inode(inode, function, line, 0,
+ "iget: special inode unallocated");
+ ret = -EFSCORRUPTED;
+ } else
+ ret = -ESTALE;
goto bad_inode;
}
/* The only unlinked inodes we let through here have
@@ -5180,8 +5215,13 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
- brelse(iloc.bh);
+ if ((err_str = check_igot_inode(inode, flags)) != NULL) {
+ ext4_error_inode(inode, function, line, 0, err_str);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ brelse(iloc.bh);
unlock_new_inode(inode);
return inode;
@@ -5668,6 +5708,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE) {
handle_t *handle;
loff_t oldsize = inode->i_size;
+ loff_t old_disksize;
int shrink = (attr->ia_size < inode->i_size);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
@@ -5723,6 +5764,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_ctime = inode->i_mtime;
}
down_write(&EXT4_I(inode)->i_data_sem);
+ old_disksize = EXT4_I(inode)->i_disksize;
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
if (!error)
@@ -5734,6 +5776,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
*/
if (!error)
i_size_write(inode, attr->ia_size);
+ else
+ EXT4_I(inode)->i_disksize = old_disksize;
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
if (error)
@@ -6037,6 +6081,14 @@ static int __ext4_expand_extra_isize(struct inode *inode,
return 0;
}
+ /*
+ * We may need to allocate external xattr block so we need quotas
+ * initialized. Here we can be called with various locks held so we
+ * cannot affort to initialize quotas ourselves. So just bail.
+ */
+ if (dquot_initialize_needed(inode))
+ return -EAGAIN;
+
/* try to expand with EAs present */
error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
raw_inode, handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9fa20f9ba52b..ae47505964c4 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -121,7 +121,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
blkcnt_t blocks;
unsigned short bytes;
- inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
+ inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO,
+ EXT4_IGET_SPECIAL | EXT4_IGET_BAD);
if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl);
@@ -169,7 +170,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl);
- if (inode_bl->i_nlink == 0) {
+ if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) {
/* this inode has never been used as a BOOT_LOADER */
set_nlink(inode_bl, 1);
i_uid_write(inode_bl, 0);
@@ -178,6 +179,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ei_bl->i_flags = 0;
inode_set_iversion(inode_bl, 1);
i_size_write(inode_bl, 0);
+ EXT4_I(inode_bl)->i_disksize = inode_bl->i_size;
inode_bl->i_mode = S_IFREG;
if (ext4_has_feature_extents(sb)) {
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
@@ -460,6 +462,10 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
if (ext4_is_quota_file(inode))
return err;
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
err = ext4_get_inode_loc(inode, &iloc);
if (err)
return err;
@@ -475,10 +481,6 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
brelse(iloc.bh);
}
- err = dquot_initialize(inode);
- if (err)
- return err;
-
handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
EXT4_QUOTA_INIT_BLOCKS(sb) +
EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
@@ -571,6 +573,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
__u32 flags;
+ struct super_block *ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -589,7 +592,9 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg)
switch (flags) {
case EXT4_GOING_FLAGS_DEFAULT:
- freeze_bdev(sb->s_bdev);
+ ret = freeze_bdev(sb->s_bdev);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
thaw_bdev(sb->s_bdev, sb);
break;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0307702d114d..b2e7b1907d41 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <linux/nospec.h>
#include <linux/backing-dev.h>
+#include <linux/freezer.h>
#include <trace/events/ext4.h>
#ifdef CONFIG_EXT4_DEBUG
@@ -1801,6 +1802,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return err;
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@@ -1808,6 +1812,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -1834,12 +1839,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (err)
return err;
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
- ext4_mb_unload_buddy(e4b);
- return 0;
- }
-
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
ex.fe_logical = 0xDEADFA11; /* debug value */
@@ -1872,6 +1875,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ac->ac_b_ex = ex;
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -3091,9 +3095,9 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_allocation_request *ar)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ struct ext4_super_block *es = sbi->s_es;
int bsbits, max;
- ext4_lblk_t end;
- loff_t size, start_off;
+ loff_t size, start_off, end;
loff_t orig_size __maybe_unused;
ext4_lblk_t start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
@@ -3122,7 +3126,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* first, let's learn actual file size
* given current request is allocated */
- size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+ size = extent_logical_end(sbi, &ac->ac_o_ex);
size = size << bsbits;
if (size < i_size_read(ac->ac_inode))
size = i_size_read(ac->ac_inode);
@@ -3172,6 +3176,19 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
size = size >> bsbits;
start = start_off >> bsbits;
+ /*
+ * For tiny groups (smaller than 8MB) the chosen allocation
+ * alignment may be larger than group size. Make sure the
+ * alignment does not move allocation to a different group which
+ * makes mballoc fail assertions later.
+ */
+ start = max(start, rounddown(ac->ac_o_ex.fe_logical,
+ (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
+
+ /* avoid unnecessary preallocation that may trigger assertions */
+ if (start + size > EXT_MAX_BLOCKS)
+ size = EXT_MAX_BLOCKS - start;
+
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
size -= ar->lleft + 1 - start;
@@ -3192,7 +3209,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* check we don't cross already preallocated blocks */
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
- ext4_lblk_t pa_end;
+ loff_t pa_end;
if (pa->pa_deleted)
continue;
@@ -3202,8 +3219,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
continue;
}
- pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
- pa->pa_len);
+ pa_end = pa_logical_end(EXT4_SB(ac->ac_sb), pa);
/* PA must not overlap original request */
BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
@@ -3232,12 +3248,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* XXX: extra loop to check we really don't overlap preallocations */
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
- ext4_lblk_t pa_end;
+ loff_t pa_end;
spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0) {
- pa_end = pa->pa_lstart + EXT4_C2B(EXT4_SB(ac->ac_sb),
- pa->pa_len);
+ pa_end = pa_logical_end(EXT4_SB(ac->ac_sb), pa);
BUG_ON(!(start >= pa_end || end <= pa->pa_lstart));
}
spin_unlock(&pa->pa_lock);
@@ -3262,18 +3277,21 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
ac->ac_g_ex.fe_len = EXT4_NUM_B2C(sbi, size);
/* define goal start in order to merge */
- if (ar->pright && (ar->lright == (start + size))) {
+ if (ar->pright && (ar->lright == (start + size)) &&
+ ar->pright >= size &&
+ ar->pright - size >= le32_to_cpu(es->s_first_data_block)) {
/* merge to the right */
ext4_get_group_no_and_offset(ac->ac_sb, ar->pright - size,
- &ac->ac_f_ex.fe_group,
- &ac->ac_f_ex.fe_start);
+ &ac->ac_g_ex.fe_group,
+ &ac->ac_g_ex.fe_start);
ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
}
- if (ar->pleft && (ar->lleft + 1 == start)) {
+ if (ar->pleft && (ar->lleft + 1 == start) &&
+ ar->pleft + 1 < ext4_blocks_count(es)) {
/* merge to the left */
ext4_get_group_no_and_offset(ac->ac_sb, ar->pleft + 1,
- &ac->ac_f_ex.fe_group,
- &ac->ac_f_ex.fe_start);
+ &ac->ac_g_ex.fe_group,
+ &ac->ac_g_ex.fe_start);
ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
}
@@ -3365,6 +3383,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
BUG_ON(start < pa->pa_pstart);
BUG_ON(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len));
BUG_ON(pa->pa_free < len);
+ BUG_ON(ac->ac_b_ex.fe_len <= 0);
pa->pa_free -= len;
mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
@@ -3447,8 +3466,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
/* all fields in this condition don't change,
* so we can skip locking for them */
if (ac->ac_o_ex.fe_logical < pa->pa_lstart ||
- ac->ac_o_ex.fe_logical >= (pa->pa_lstart +
- EXT4_C2B(sbi, pa->pa_len)))
+ ac->ac_o_ex.fe_logical >= pa_logical_end(sbi, pa))
continue;
/* non-extent files can't have physical blocks past 2^32 */
@@ -3669,37 +3687,51 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
return -ENOMEM;
if (ac->ac_b_ex.fe_len < ac->ac_g_ex.fe_len) {
- int winl;
- int wins;
- int win;
- int offs;
-
- /* we can't allocate as much as normalizer wants.
- * so, found space must get proper lstart
- * to cover original request */
+ struct ext4_free_extent ex = {
+ .fe_logical = ac->ac_g_ex.fe_logical,
+ .fe_len = ac->ac_g_ex.fe_len,
+ };
+ loff_t orig_goal_end = extent_logical_end(sbi, &ex);
+ loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex);
+
+ /*
+ * We can't allocate as much as normalizer wants, so we try
+ * to get proper lstart to cover the original request, except
+ * when the goal doesn't cover the original request as below:
+ *
+ * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048
+ * best_ex:0/200(200) -> adjusted: 1848/2048(200)
+ */
BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
- /* we're limited by original request in that
- * logical block must be covered any way
- * winl is window we can move our chunk within */
- winl = ac->ac_o_ex.fe_logical - ac->ac_g_ex.fe_logical;
+ /*
+ * Use the below logic for adjusting best extent as it keeps
+ * fragmentation in check while ensuring logical range of best
+ * extent doesn't overflow out of goal extent:
+ *
+ * 1. Check if best ex can be kept at end of goal and still
+ * cover original start
+ * 2. Else, check if best ex can be kept at start of goal and
+ * still cover original end
+ * 3. Else, keep the best ex at start of original request.
+ */
+ ex.fe_len = ac->ac_b_ex.fe_len;
- /* also, we should cover whole original request */
- wins = EXT4_C2B(sbi, ac->ac_b_ex.fe_len - ac->ac_o_ex.fe_len);
+ ex.fe_logical = orig_goal_end - EXT4_C2B(sbi, ex.fe_len);
+ if (ac->ac_o_ex.fe_logical >= ex.fe_logical)
+ goto adjust_bex;
- /* the smallest one defines real window */
- win = min(winl, wins);
+ ex.fe_logical = ac->ac_g_ex.fe_logical;
+ if (o_ex_end <= extent_logical_end(sbi, &ex))
+ goto adjust_bex;
- offs = ac->ac_o_ex.fe_logical %
- EXT4_C2B(sbi, ac->ac_b_ex.fe_len);
- if (offs && offs < win)
- win = offs;
+ ex.fe_logical = ac->ac_o_ex.fe_logical;
+adjust_bex:
+ ac->ac_b_ex.fe_logical = ex.fe_logical;
- ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
- EXT4_NUM_B2C(sbi, win);
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
- BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
+ BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
}
/* preallocation can change ac_b_ex, thus we store actually
@@ -3886,7 +3918,11 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
- BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
+ if (unlikely(group != e4b->bd_group && pa->pa_len != 0)) {
+ ext4_warning(sb, "bad group: expected %u, group %u, pa_start %llu",
+ e4b->bd_group, group, pa->pa_pstart);
+ return 0;
+ }
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
@@ -4203,7 +4239,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
return;
- size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+ size = extent_logical_end(sbi, &ac->ac_o_ex);
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
@@ -4920,8 +4956,8 @@ do_more:
* them with group lock_held
*/
if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, block_group, bit, count,
- NULL);
+ err = ext4_issue_discard(sb, block_group, bit,
+ count_clusters, NULL);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%lu failed"
@@ -5129,19 +5165,19 @@ error_return:
* @sb: super block for the file system
* @start: starting block of the free extent in the alloc. group
* @count: number of blocks to TRIM
- * @group: alloc. group we are working with
* @e4b: ext4 buddy for the group
*
* Trim "count" blocks starting at "start" in the "group". To assure that no
* one will allocate those blocks, mark it as used in buddy bitmap. This must
* be called with under the group lock.
*/
-static int ext4_trim_extent(struct super_block *sb, int start, int count,
- ext4_group_t group, struct ext4_buddy *e4b)
+static int ext4_trim_extent(struct super_block *sb,
+ int start, int count, struct ext4_buddy *e4b)
__releases(bitlock)
__acquires(bitlock)
{
struct ext4_free_extent ex;
+ ext4_group_t group = e4b->bd_group;
int ret = 0;
trace_ext4_trim_extent(sb, group, start, count);
@@ -5164,6 +5200,81 @@ __acquires(bitlock)
return ret;
}
+static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
+ ext4_group_t grp)
+{
+ unsigned long nr_clusters_in_group;
+
+ if (grp < (ext4_get_groups_count(sb) - 1))
+ nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb);
+ else
+ nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) -
+ ext4_group_first_block_no(sb, grp))
+ >> EXT4_CLUSTER_BITS(sb);
+
+ return nr_clusters_in_group - 1;
+}
+
+static bool ext4_trim_interrupted(void)
+{
+ return fatal_signal_pending(current) || freezing(current);
+}
+
+static int ext4_try_to_trim_range(struct super_block *sb,
+ struct ext4_buddy *e4b, ext4_grpblk_t start,
+ ext4_grpblk_t max, ext4_grpblk_t minblocks)
+{
+ ext4_grpblk_t next, count, free_count, last, origin_start;
+ bool set_trimmed = false;
+ void *bitmap;
+
+ last = ext4_last_grp_cluster(sb, e4b->bd_group);
+ bitmap = e4b->bd_bitmap;
+ if (start == 0 && max >= last)
+ set_trimmed = true;
+ origin_start = start;
+ start = max(e4b->bd_info->bb_first_free, start);
+ count = 0;
+ free_count = 0;
+
+ while (start <= max) {
+ start = mb_find_next_zero_bit(bitmap, max + 1, start);
+ if (start > max)
+ break;
+
+ next = mb_find_next_bit(bitmap, last + 1, start);
+ if (origin_start == 0 && next >= last)
+ set_trimmed = true;
+
+ if ((next - start) >= minblocks) {
+ int ret = ext4_trim_extent(sb, start, next - start, e4b);
+
+ if (ret && ret != -EOPNOTSUPP)
+ return count;
+ count += next - start;
+ }
+ free_count += next - start;
+ start = next + 1;
+
+ if (ext4_trim_interrupted())
+ return count;
+
+ if (need_resched()) {
+ ext4_unlock_group(sb, e4b->bd_group);
+ cond_resched();
+ ext4_lock_group(sb, e4b->bd_group);
+ }
+
+ if ((e4b->bd_info->bb_free - free_count) < minblocks)
+ break;
+ }
+
+ if (set_trimmed)
+ EXT4_MB_GRP_SET_TRIMMED(e4b->bd_info);
+
+ return count;
+}
+
/**
* ext4_trim_all_free -- function to trim all free space in alloc. group
* @sb: super block for file system
@@ -5187,10 +5298,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ext4_grpblk_t start, ext4_grpblk_t max,
ext4_grpblk_t minblocks)
{
- void *bitmap;
- ext4_grpblk_t next, count = 0, free_count = 0;
struct ext4_buddy e4b;
- int ret = 0;
+ int ret;
trace_ext4_trim_all_free(sb, group, start, max);
@@ -5200,58 +5309,20 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ret, group);
return ret;
}
- bitmap = e4b.bd_bitmap;
ext4_lock_group(sb, group);
- if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
- minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
- goto out;
-
- start = (e4b.bd_info->bb_first_free > start) ?
- e4b.bd_info->bb_first_free : start;
-
- while (start <= max) {
- start = mb_find_next_zero_bit(bitmap, max + 1, start);
- if (start > max)
- break;
- next = mb_find_next_bit(bitmap, max + 1, start);
- if ((next - start) >= minblocks) {
- ret = ext4_trim_extent(sb, start,
- next - start, group, &e4b);
- if (ret && ret != -EOPNOTSUPP)
- break;
- ret = 0;
- count += next - start;
- }
- free_count += next - start;
- start = next + 1;
-
- if (fatal_signal_pending(current)) {
- count = -ERESTARTSYS;
- break;
- }
-
- if (need_resched()) {
- ext4_unlock_group(sb, group);
- cond_resched();
- ext4_lock_group(sb, group);
- }
-
- if ((e4b.bd_info->bb_free - free_count) < minblocks)
- break;
- }
+ if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) ||
+ minblocks < EXT4_SB(sb)->s_last_trim_minblks)
+ ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks);
+ else
+ ret = 0;
- if (!ret) {
- ret = count;
- EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
- }
-out:
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
ext4_debug("trimmed %d blocks in the group %d\n",
- count, group);
+ ret, group);
return ret;
}
@@ -5296,7 +5367,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
goto out;
}
- if (end >= max_blks)
+ if (end >= max_blks - 1)
end = max_blks - 1;
if (end <= first_data_blk)
goto out;
@@ -5313,6 +5384,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
for (group = first_group; group <= last_group; group++) {
+ if (ext4_trim_interrupted())
+ break;
grp = ext4_get_group_info(sb, group);
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
@@ -5329,10 +5402,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
*/
if (group == last_group)
end = last_cluster;
-
if (grp->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, group, first_cluster,
- end, minlen);
+ end, minlen);
if (cnt < 0) {
ret = cnt;
break;
@@ -5348,7 +5420,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
}
if (!ret)
- atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
+ EXT4_SB(sb)->s_last_trim_minblks = minlen;
out:
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
@@ -5377,8 +5449,7 @@ ext4_mballoc_query_range(
ext4_lock_group(sb, group);
- start = (e4b.bd_info->bb_first_free > start) ?
- e4b.bd_info->bb_first_free : start;
+ start = max(e4b.bd_info->bb_first_free, start);
if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 88c98f17e3d9..4e442bad6d73 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -199,6 +199,20 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
}
+static inline loff_t extent_logical_end(struct ext4_sb_info *sbi,
+ struct ext4_free_extent *fex)
+{
+ /* Use loff_t to avoid end exceeding ext4_lblk_t max. */
+ return (loff_t)fex->fe_logical + EXT4_C2B(sbi, fex->fe_len);
+}
+
+static inline loff_t pa_logical_end(struct ext4_sb_info *sbi,
+ struct ext4_prealloc_space *pa)
+{
+ /* Use loff_t to avoid end exceeding ext4_lblk_t max. */
+ return (loff_t)pa->pa_lstart + EXT4_C2B(sbi, pa->pa_len);
+}
+
typedef int (*ext4_mballoc_query_range_fn)(
struct super_block *sb,
ext4_group_t agno,
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index c5b2ea1a9372..dbba3c3a2f06 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -435,7 +435,7 @@ int ext4_ext_migrate(struct inode *inode)
struct inode *tmp_inode = NULL;
struct migrate_struct lb;
unsigned long max_entries;
- __u32 goal;
+ __u32 goal, tmp_csum_seed;
uid_t owner[2];
/*
@@ -443,7 +443,8 @@ int ext4_ext_migrate(struct inode *inode)
* already is extent-based, error out.
*/
if (!ext4_has_feature_extents(inode->i_sb) ||
- (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ ext4_has_inline_data(inode))
return -EINVAL;
if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
@@ -483,6 +484,7 @@ int ext4_ext_migrate(struct inode *inode)
* the migration.
*/
ei = EXT4_I(inode);
+ tmp_csum_seed = EXT4_I(tmp_inode)->i_csum_seed;
EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
i_size_write(tmp_inode, i_size_read(inode));
/*
@@ -593,6 +595,7 @@ err_out:
* the inode is not visible to user space.
*/
tmp_inode->i_blocks = 0;
+ EXT4_I(tmp_inode)->i_csum_seed = tmp_csum_seed;
/* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 30ce3dc69378..e71ab64f1df5 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -615,6 +615,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
goto out;
o_end = o_start + len;
+ *moved_len = 0;
while (o_start < o_end) {
struct ext4_extent *ex;
ext4_lblk_t cur_blk, next_blk;
@@ -670,7 +671,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
/* Swap original branches with new branches */
- move_extent_per_page(o_filp, donor_inode,
+ *moved_len += move_extent_per_page(o_filp, donor_inode,
orig_page_index, donor_page_index,
offset_in_page, cur_len,
unwritten, &ret);
@@ -680,9 +681,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
o_start += cur_len;
d_start += cur_len;
}
- *moved_len = o_start - orig_blk;
- if (*moved_len > len)
- *moved_len = len;
out:
if (*moved_len) {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f10307215d58..87c6d619a564 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
struct inode *inode,
ext4_lblk_t *block)
{
+ struct ext4_map_blocks map;
struct buffer_head *bh;
int err;
@@ -63,6 +64,21 @@ static struct buffer_head *ext4_append(handle_t *handle,
return ERR_PTR(-ENOSPC);
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ map.m_lblk = *block;
+ map.m_len = 1;
+
+ /*
+ * We're appending new directory block. Make sure the block is not
+ * allocated yet, otherwise we will end up corrupting the
+ * directory.
+ */
+ err = ext4_map_blocks(NULL, inode, &map, 0);
+ if (err < 0)
+ return ERR_PTR(err);
+ if (err) {
+ EXT4_ERROR_INODE(inode, "Logical block already allocated");
+ return ERR_PTR(-EFSCORRUPTED);
+ }
bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
if (IS_ERR(bh))
@@ -273,9 +289,9 @@ static struct dx_frame *dx_probe(struct ext4_filename *fname,
struct dx_hash_info *hinfo,
struct dx_frame *frame);
static void dx_release(struct dx_frame *frames);
-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
- unsigned blocksize, struct dx_hash_info *hinfo,
- struct dx_map_entry map[]);
+static int dx_make_map(struct inode *dir, struct buffer_head *bh,
+ struct dx_hash_info *hinfo,
+ struct dx_map_entry *map_tail);
static void dx_sort_map(struct dx_map_entry *map, unsigned count);
static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
struct dx_map_entry *offsets, int count, unsigned blocksize);
@@ -309,17 +325,17 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
struct buffer_head *bh)
{
struct ext4_dir_entry_tail *t;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
#ifdef PARANOID
struct ext4_dir_entry *d, *top;
d = (struct ext4_dir_entry *)bh->b_data;
top = (struct ext4_dir_entry *)(bh->b_data +
- (EXT4_BLOCK_SIZE(inode->i_sb) -
- sizeof(struct ext4_dir_entry_tail)));
- while (d < top && d->rec_len)
+ (blocksize - sizeof(struct ext4_dir_entry_tail)));
+ while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize))
d = (struct ext4_dir_entry *)(((void *)d) +
- le16_to_cpu(d->rec_len));
+ ext4_rec_len_from_disk(d->rec_len, blocksize));
if (d != top)
return NULL;
@@ -330,7 +346,8 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
#endif
if (t->det_reserved_zero1 ||
- le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
+ (ext4_rec_len_from_disk(t->det_rec_len, blocksize) !=
+ sizeof(struct ext4_dir_entry_tail)) ||
t->det_reserved_zero2 ||
t->det_reserved_ft != EXT4_FT_DIR_CSUM)
return NULL;
@@ -411,13 +428,14 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
struct ext4_dir_entry *dp;
struct dx_root_info *root;
int count_offset;
+ int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
+ unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
- if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
+ if (rlen == blocksize)
count_offset = 8;
- else if (le16_to_cpu(dirent->rec_len) == 12) {
+ else if (rlen == 12) {
dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
- if (le16_to_cpu(dp->rec_len) !=
- EXT4_BLOCK_SIZE(inode->i_sb) - 12)
+ if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12)
return NULL;
root = (struct dx_root_info *)(((void *)dp + 12));
if (root->reserved_zero ||
@@ -750,12 +768,14 @@ static struct dx_frame *
dx_probe(struct ext4_filename *fname, struct inode *dir,
struct dx_hash_info *hinfo, struct dx_frame *frame_in)
{
- unsigned count, indirect;
+ unsigned count, indirect, level, i;
struct dx_entry *at, *entries, *p, *q, *m;
struct dx_root *root;
struct dx_frame *frame = frame_in;
struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
u32 hash;
+ ext4_lblk_t block;
+ ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
frame->bh = ext4_read_dirblock(dir, 0, INDEX);
@@ -811,6 +831,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
}
dxtrace(printk("Look up %x", hash));
+ level = 0;
+ blocks[0] = 0;
while (1) {
count = dx_get_count(entries);
if (!count || count > dx_get_limit(entries)) {
@@ -852,15 +874,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
dx_get_block(at)));
frame->entries = entries;
frame->at = at;
- if (!indirect--)
+
+ block = dx_get_block(at);
+ for (i = 0; i <= level; i++) {
+ if (blocks[i] == block) {
+ ext4_warning_inode(dir,
+ "dx entry: tree cycle block %u points back to block %u",
+ blocks[level], block);
+ goto fail;
+ }
+ }
+ if (++level > indirect)
return frame;
+ blocks[level] = block;
frame++;
- frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
+ frame->bh = ext4_read_dirblock(dir, block, INDEX);
if (IS_ERR(frame->bh)) {
ret_err = (struct dx_frame *) frame->bh;
frame->bh = NULL;
goto fail;
}
+
entries = ((struct dx_node *) frame->bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit(dir)) {
@@ -1205,25 +1239,34 @@ static inline int search_dirblock(struct buffer_head *bh,
* Create map of hash values, offsets, and sizes, stored at end of block.
* Returns number of entries mapped.
*/
-static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de,
- unsigned blocksize, struct dx_hash_info *hinfo,
+static int dx_make_map(struct inode *dir, struct buffer_head *bh,
+ struct dx_hash_info *hinfo,
struct dx_map_entry *map_tail)
{
int count = 0;
- char *base = (char *) de;
+ struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
+ unsigned int buflen = bh->b_size;
+ char *base = bh->b_data;
struct dx_hash_info h = *hinfo;
+ int blocksize = EXT4_BLOCK_SIZE(dir->i_sb);
+
+ if (ext4_has_metadata_csum(dir->i_sb))
+ buflen -= sizeof(struct ext4_dir_entry_tail);
- while ((char *) de < base + blocksize) {
+ while ((char *) de < base + buflen) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
+ ((char *)de) - base))
+ return -EFSCORRUPTED;
if (de->name_len && de->inode) {
ext4fs_dirhash(dir, de->name, de->name_len, &h);
map_tail--;
map_tail->hash = h.hash;
map_tail->offs = ((char *) de - base)>>2;
- map_tail->size = le16_to_cpu(de->rec_len);
+ map_tail->size = ext4_rec_len_from_disk(de->rec_len,
+ blocksize);
count++;
cond_resched();
}
- /* XXX: do we need to check rec_len == 0 case? -Chris */
de = ext4_next_entry(de, blocksize);
}
return count;
@@ -1463,11 +1506,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
int has_inline_data = 1;
ret = ext4_find_inline_entry(dir, fname, res_dir,
&has_inline_data);
- if (has_inline_data) {
- if (inlined)
- *inlined = 1;
+ if (inlined)
+ *inlined = has_inline_data;
+ if (has_inline_data)
goto cleanup_and_exit;
- }
}
if ((namelen <= 2) && (name[0] == '.') &&
@@ -1813,7 +1855,8 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
struct dx_hash_info *hinfo)
{
unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
+ unsigned continued;
+ int count;
struct buffer_head *bh2;
ext4_lblk_t newblock;
u32 hash2;
@@ -1848,8 +1891,11 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
/* create map in the end of data2 block */
map = (struct dx_map_entry *) (data2 + blocksize);
- count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1,
- blocksize, hinfo, map);
+ count = dx_make_map(dir, *bh, hinfo, map);
+ if (count < 0) {
+ err = count;
+ goto journal_error;
+ }
map -= count;
dx_sort_map(map, count);
/* Ensure that neither split block is over half full */
@@ -2098,8 +2144,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
memcpy(data2, de, len);
de = (struct ext4_dir_entry_2 *) data2;
top = data2 + len;
- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
+ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
+ if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len,
+ (data2 + (blocksize - csum_size) -
+ (char *) de))) {
+ brelse(bh2);
+ brelse(bh);
+ return -EFSCORRUPTED;
+ }
de = de2;
+ }
de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
(char *) de, blocksize);
@@ -2886,11 +2940,8 @@ bool ext4_empty_dir(struct inode *inode)
de = (struct ext4_dir_entry_2 *) (bh->b_data +
(offset & (sb->s_blocksize - 1)));
if (ext4_check_dir_entry(inode, NULL, de, bh,
- bh->b_data, bh->b_size, offset)) {
- offset = (offset | (sb->s_blocksize - 1)) + 1;
- continue;
- }
- if (le32_to_cpu(de->inode)) {
+ bh->b_data, bh->b_size, offset) ||
+ le32_to_cpu(de->inode)) {
brelse(bh);
return false;
}
@@ -3442,6 +3493,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
struct buffer_head *bh;
if (!ext4_has_inline_data(inode)) {
+ struct ext4_dir_entry_2 *de;
+ unsigned int offset;
+
/* The first directory block must not be a hole, so
* treat it as DIRENT_HTREE
*/
@@ -3450,9 +3504,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
*retval = PTR_ERR(bh);
return NULL;
}
- *parent_de = ext4_next_entry(
- (struct ext4_dir_entry_2 *)bh->b_data,
- inode->i_sb->s_blocksize);
+
+ de = (struct ext4_dir_entry_2 *) bh->b_data;
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
+ bh->b_size, 0) ||
+ le32_to_cpu(de->inode) != inode->i_ino ||
+ strcmp(".", de->name)) {
+ EXT4_ERROR_INODE(inode, "directory missing '.'");
+ brelse(bh);
+ *retval = -EFSCORRUPTED;
+ return NULL;
+ }
+ offset = ext4_rec_len_from_disk(de->rec_len,
+ inode->i_sb->s_blocksize);
+ de = ext4_next_entry(de, inode->i_sb->s_blocksize);
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
+ bh->b_size, offset) ||
+ le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
+ EXT4_ERROR_INODE(inode, "directory missing '..'");
+ brelse(bh);
+ *retval = -EFSCORRUPTED;
+ return NULL;
+ }
+ *parent_de = de;
+
return bh;
}
@@ -3558,7 +3633,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
* so the old->de may no longer valid and need to find it again
* before reset old inode info.
*/
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
if (IS_ERR(old.bh))
retval = PTR_ERR(old.bh);
if (!old.bh)
@@ -3708,6 +3784,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = dquot_initialize(old.dir);
if (retval)
return retval;
+ retval = dquot_initialize(old.inode);
+ if (retval)
+ return retval;
retval = dquot_initialize(new.dir);
if (retval)
return retval;
@@ -3720,9 +3799,11 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
return retval;
}
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
if (IS_ERR(old.bh))
return PTR_ERR(old.bh);
+
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@@ -3881,6 +3962,7 @@ release_bh:
brelse(old.dir_bh);
brelse(old.bh);
brelse(new.bh);
+
return retval;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b66b335a0ca6..5858fb9b5cd5 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -380,7 +380,8 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
static int io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
- struct page *page,
+ struct page *pagecache_page,
+ struct page *bounce_page,
struct buffer_head *bh)
{
int ret;
@@ -395,10 +396,11 @@ submit_and_retry:
return ret;
io->io_bio->bi_write_hint = inode->i_write_hint;
}
- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
+ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
+ bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
- wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
+ wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++;
return 0;
}
@@ -511,7 +513,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
+ ret = io_submit_add_bh(io, inode, page, bounce_page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b7f20196439a..d4431ca0c10e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -53,6 +53,16 @@ int ext4_resize_begin(struct super_block *sb)
return -EPERM;
/*
+ * If the reserved GDT blocks is non-zero, the resize_inode feature
+ * should always be set.
+ */
+ if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks &&
+ !ext4_has_feature_resize_inode(sb)) {
+ ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero");
+ return -EFSCORRUPTED;
+ }
+
+ /*
* If we are not using the primary superblock/GDT copy don't resize,
* because the user tools have no way of handling this. Probably a
* bad time to do it anyways.
@@ -217,17 +227,24 @@ struct ext4_new_flex_group_data {
in the flex group */
__u16 *bg_flags; /* block group flags of groups
in @groups */
+ ext4_group_t resize_bg; /* number of allocated
+ new_group_data */
ext4_group_t count; /* number of groups in @groups
*/
};
/*
+ * Avoiding memory allocation failures due to too many groups added each time.
+ */
+#define MAX_RESIZE_BG 16384
+
+/*
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
* @flexbg_size.
*
* Returns NULL on failure otherwise address of the allocated structure.
*/
-static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
+static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size)
{
struct ext4_new_flex_group_data *flex_gd;
@@ -235,17 +252,18 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
if (flex_gd == NULL)
goto out3;
- if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
- goto out2;
- flex_gd->count = flexbg_size;
+ if (unlikely(flexbg_size > MAX_RESIZE_BG))
+ flex_gd->resize_bg = MAX_RESIZE_BG;
+ else
+ flex_gd->resize_bg = flexbg_size;
- flex_gd->groups = kmalloc_array(flexbg_size,
+ flex_gd->groups = kmalloc_array(flex_gd->resize_bg,
sizeof(struct ext4_new_group_data),
GFP_NOFS);
if (flex_gd->groups == NULL)
goto out2;
- flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
+ flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16),
GFP_NOFS);
if (flex_gd->bg_flags == NULL)
goto out1;
@@ -282,7 +300,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
*/
static int ext4_alloc_group_tables(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
- int flexbg_size)
+ unsigned int flexbg_size)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t start_blk;
@@ -383,12 +401,12 @@ next_group:
group = group_data[0].group;
printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
- "%d groups, flexbg size is %d:\n", flex_gd->count,
+ "%u groups, flexbg size is %u:\n", flex_gd->count,
flexbg_size);
for (i = 0; i < flex_gd->count; i++) {
ext4_debug(
- "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
+ "adding %s group %u: %u blocks (%u free, %u mdata blocks)\n",
ext4_bg_has_super(sb, group + i) ? "normal" :
"no-super", group + i,
group_data[i].blocks_count,
@@ -562,13 +580,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
goto handle_itb;
- if (meta_bg == 1) {
- ext4_group_t first_group;
- first_group = ext4_meta_bg_first_group(sb, group);
- if (first_group != group + 1 &&
- first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
- goto handle_itb;
- }
+ if (meta_bg == 1)
+ goto handle_itb;
block = start + ext4_bg_has_super(sb, group);
/* Copy all of the GDT blocks into the backup in this group */
@@ -1473,6 +1486,7 @@ static void ext4_update_super(struct super_block *sb,
* Update the fs overhead information
*/
ext4_calculate_overhead(sb);
+ es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: added group %u:"
@@ -1553,11 +1567,14 @@ exit_journal:
int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
int gdb_num_end = ((group + flex_gd->count - 1) /
EXT4_DESC_PER_BLOCK(sb));
- int meta_bg = ext4_has_feature_meta_bg(sb);
+ int meta_bg = ext4_has_feature_meta_bg(sb) &&
+ gdb_num >= le32_to_cpu(es->s_first_meta_bg);
+ sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr -
+ ext4_group_first_block_no(sb, 0);
sector_t old_gdb = 0;
- update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
- sizeof(struct ext4_super_block), 0);
+ update_backups(sb, ext4_group_first_block_no(sb, 0),
+ (char *)es, sizeof(struct ext4_super_block), 0);
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
@@ -1565,8 +1582,8 @@ exit_journal:
gdb_num);
if (old_gdb == gdb_bh->b_blocknr)
continue;
- update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
- gdb_bh->b_size, meta_bg);
+ update_backups(sb, gdb_bh->b_blocknr - padding_blocks,
+ gdb_bh->b_data, gdb_bh->b_size, meta_bg);
old_gdb = gdb_bh->b_blocknr;
}
}
@@ -1576,8 +1593,7 @@ exit:
static int ext4_setup_next_flex_gd(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
- ext4_fsblk_t n_blocks_count,
- unsigned long flexbg_size)
+ ext4_fsblk_t n_blocks_count)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
@@ -1601,7 +1617,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
BUG_ON(last);
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
- last_group = group | (flexbg_size - 1);
+ last_group = group | (flex_gd->resize_bg - 1);
if (last_group > n_group)
last_group = n_group;
@@ -1764,7 +1780,7 @@ errout:
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
+ update_backups(sb, ext4_group_first_block_no(sb, 0),
(char *)es, sizeof(struct ext4_super_block), 0);
}
return err;
@@ -1927,9 +1943,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
errout:
ret = ext4_journal_stop(handle);
- if (!err)
- err = ret;
- return ret;
+ return err ? err : ret;
invalid_resize_inode:
ext4_error(sb, "corrupted/inconsistent resize inode");
@@ -1957,8 +1971,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_fsblk_t o_blocks_count;
ext4_fsblk_t n_blocks_count_retry = 0;
unsigned long last_update_time = 0;
- int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
+ int err = 0;
int meta_bg;
+ unsigned int flexbg_size = ext4_flex_bg_size(sbi);
/* See if the device is actually as big as what was requested */
bh = sb_bread(sb, n_blocks_count - 1);
@@ -1968,6 +1983,16 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
}
brelse(bh);
+ /*
+ * For bigalloc, trim the requested size to the nearest cluster
+ * boundary to avoid creating an unusable filesystem. We do this
+ * silently, instead of returning an error, to avoid breaking
+ * callers that blindly resize the filesystem to the full size of
+ * the underlying block device.
+ */
+ if (ext4_has_feature_bigalloc(sb))
+ n_blocks_count &= ~((1 << EXT4_CLUSTER_BITS(sb)) - 1);
+
retry:
o_blocks_count = ext4_blocks_count(es);
@@ -2069,7 +2094,7 @@ retry:
goto out;
}
- if (ext4_blocks_count(es) == n_blocks_count)
+ if (ext4_blocks_count(es) == n_blocks_count && n_blocks_count_retry == 0)
goto out;
err = ext4_alloc_flex_bg_array(sb, n_group + 1);
@@ -2089,8 +2114,7 @@ retry:
/* Add flex groups. Note that a regular group is a
* flex group with 1 group.
*/
- while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
- flexbg_size)) {
+ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) {
if (jiffies - last_update_time > HZ * 10) {
if (last_update_time)
ext4_msg(sb, KERN_INFO,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c13879bd2168..8ad3de7846c5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -906,10 +906,16 @@ static void ext4_blkdev_put(struct block_device *bdev)
static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
{
struct block_device *bdev;
- bdev = sbi->journal_bdev;
+ bdev = sbi->s_journal_bdev;
if (bdev) {
+ /*
+ * Invalidate the journal device's buffers. We don't want them
+ * floating about in memory - the physical journal device may
+ * hotswapped, and it breaks the `ro-after' testing code.
+ */
+ invalidate_bdev(bdev);
ext4_blkdev_put(bdev);
- sbi->journal_bdev = NULL;
+ sbi->s_journal_bdev = NULL;
}
}
@@ -1034,14 +1040,8 @@ static void ext4_put_super(struct super_block *sb)
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
- if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
- /*
- * Invalidate the journal device's buffers. We don't want them
- * floating about in memory - the physical journal device may
- * hotswapped, and it breaks the `ro-after' testing code.
- */
- sync_blockdev(sbi->journal_bdev);
- invalidate_bdev(sbi->journal_bdev);
+ if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
+ sync_blockdev(sbi->s_journal_bdev);
ext4_blkdev_remove(sbi);
}
@@ -1085,6 +1085,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
return NULL;
inode_set_iversion(&ei->vfs_inode, 1);
+ ei->i_flags = 0;
spin_lock_init(&ei->i_raw_lock);
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
@@ -1703,6 +1704,7 @@ static const struct mount_opts {
MOPT_EXT4_ONLY | MOPT_CLEAR},
{Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
{Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
+ {Opt_commit, 0, MOPT_NO_EXT2},
{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
MOPT_EXT4_ONLY | MOPT_CLEAR},
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
@@ -2473,11 +2475,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
crc = crc16(crc, (__u8 *)gdp, offset);
offset += sizeof(gdp->bg_checksum); /* skip checksum */
/* for checksum of struct ext4_group_desc do the rest...*/
- if (ext4_has_feature_64bit(sb) &&
- offset < le16_to_cpu(sbi->s_es->s_desc_size))
+ if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
crc = crc16(crc, (__u8 *)gdp + offset,
- le16_to_cpu(sbi->s_es->s_desc_size) -
- offset);
+ sbi->s_desc_size - offset);
out:
return cpu_to_le16(crc);
@@ -3156,6 +3156,7 @@ static int ext4_lazyinit_thread(void *arg)
unsigned long next_wakeup, cur;
BUG_ON(NULL == eli);
+ set_freezable();
cont_thread:
while (true) {
@@ -3581,7 +3582,7 @@ int ext4_calculate_overhead(struct super_block *sb)
* Add the internal journal blocks whether the journal has been
* loaded or not
*/
- if (sbi->s_journal && !sbi->journal_bdev)
+ if (sbi->s_journal && !sbi->s_journal_bdev)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
/* j_inum for internal journal is non-zero */
@@ -4386,30 +4387,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only");
- goto failed_mount_wq;
+ goto failed_mount3a;
} else {
/* Nojournal mode, all journal mount options are illegal */
- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "journal_checksum, fs mounted w/o journal");
- goto failed_mount_wq;
- }
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_async_commit, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
+ }
+
+ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_checksum, fs mounted w/o journal");
+ goto failed_mount3a;
}
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"commit=%lu, fs mounted w/o journal",
sbi->s_commit_interval / HZ);
- goto failed_mount_wq;
+ goto failed_mount3a;
}
if (EXT4_MOUNT_DATA_FLAGS &
(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"data=, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
}
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
@@ -4775,6 +4777,7 @@ failed_mount:
ext4_blkdev_remove(sbi);
brelse(bh);
out_fail:
+ invalidate_bdev(sb->s_bdev);
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
out_free_base:
@@ -4833,7 +4836,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
journal_inode, journal_inode->i_size);
- if (!S_ISREG(journal_inode->i_mode)) {
+ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
return NULL;
@@ -4950,7 +4953,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
be32_to_cpu(journal->j_superblock->s_nr_users));
goto out_journal;
}
- EXT4_SB(sb)->journal_bdev = bdev;
+ EXT4_SB(sb)->s_journal_bdev = bdev;
ext4_init_journal_params(sb, journal);
return journal;
@@ -5603,9 +5606,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
#ifdef CONFIG_QUOTA
- /* Release old quota file names */
- for (i = 0; i < EXT4_MAXQUOTAS; i++)
- kfree(old_opts.s_qf_names[i]);
if (enable_quota) {
if (sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
@@ -5615,6 +5615,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
}
+ /* Release old quota file names */
+ for (i = 0; i < EXT4_MAXQUOTAS; i++)
+ kfree(old_opts.s_qf_names[i]);
#endif
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
ext4_release_system_zone(sb);
@@ -5631,6 +5634,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
return 0;
restore_opts:
+ /*
+ * If there was a failing r/w to ro transition, we may need to
+ * re-enable quota
+ */
+ if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+ sb_any_quota_suspended(sb))
+ dquot_resume(sb, -1);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -5835,7 +5845,7 @@ static int ext4_write_info(struct super_block *sb, int type)
handle_t *handle;
/* Data block + inode block */
- handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
+ handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit_info(sb, type);
@@ -5952,6 +5962,20 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return err;
}
+static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
+{
+ switch (type) {
+ case USRQUOTA:
+ return qf_inum == EXT4_USR_QUOTA_INO;
+ case GRPQUOTA:
+ return qf_inum == EXT4_GRP_QUOTA_INO;
+ case PRJQUOTA:
+ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
+ default:
+ BUG();
+ }
+}
+
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags)
{
@@ -5968,9 +5992,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
+ if (!ext4_check_quota_inum(type, qf_inums[type])) {
+ ext4_error(sb, "Bad quota inum: %lu, type: %d",
+ qf_inums[type], type);
+ return -EUCLEAN;
+ }
+
qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
+ ext4_error(sb, "Bad quota inode: %lu, type: %d",
+ qf_inums[type], type);
return PTR_ERR(qf_inode);
}
@@ -6009,8 +6040,9 @@ static int ext4_enable_quotas(struct super_block *sb)
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "e2fsck to fix.", type, err);
+ "(type=%d, err=%d, ino=%lu). "
+ "Please run e2fsck to fix.", type,
+ err, qf_inums[type]);
for (type--; type >= 0; type--) {
struct inode *inode;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9394360ff137..7768d7146d73 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -383,6 +383,11 @@ static void ext4_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
+static void ext4_feat_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
static const struct sysfs_ops ext4_attr_ops = {
.show = ext4_attr_show,
.store = ext4_attr_store,
@@ -397,7 +402,7 @@ static struct kobj_type ext4_sb_ktype = {
static struct kobj_type ext4_feat_ktype = {
.default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops,
- .release = (void (*)(struct kobject *))kfree,
+ .release = ext4_feat_release,
};
static struct kobject *ext4_root;
diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
index 6a30e54c1128..9879ea046e5a 100644
--- a/fs/ext4/verity.c
+++ b/fs/ext4/verity.c
@@ -79,8 +79,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
struct page *page;
- void *fsdata;
- void *addr;
+ void *fsdata = NULL;
int res;
res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
@@ -88,9 +87,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
if (res)
return res;
- addr = kmap_atomic(page);
- memcpy(addr + offset_in_page(pos), buf, n);
- kunmap_atomic(addr);
+ memcpy_to_page(page, offset_in_page(pos), buf, n);
res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
page, fsdata);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 20e40cac819e..cd371ac25f84 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -121,7 +121,11 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
#ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode)
{
+ struct ext4_inode_info *ei = EXT4_I(ea_inode);
+
lockdep_set_subclass(&ea_inode->i_rwsem, 1);
+ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
+ lockdep_set_subclass(&ei->i_data_sem, I_DATA_SEM_EA);
}
#endif
@@ -384,7 +388,18 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
struct inode *inode;
int err;
- inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
+ /*
+ * We have to check for this corruption early as otherwise
+ * iget_locked() could wait indefinitely for the state of our
+ * parent inode.
+ */
+ if (parent->i_ino == ea_ino) {
+ ext4_error(parent->i_sb,
+ "Parent and EA inode have the same ino %lu", ea_ino);
+ return -EFSCORRUPTED;
+ }
+
+ inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_EA_INODE);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
ext4_error(parent->i_sb,
@@ -392,23 +407,6 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
err);
return err;
}
-
- if (is_bad_inode(inode)) {
- ext4_error(parent->i_sb,
- "error while reading EA inode %lu is_bad_inode",
- ea_ino);
- err = -EIO;
- goto error;
- }
-
- if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
- ext4_error(parent->i_sb,
- "EA inode %lu does not have EXT4_EA_INODE_FL flag",
- ea_ino);
- err = -EINVAL;
- goto error;
- }
-
ext4_xattr_inode_set_class(inode);
/*
@@ -429,9 +427,21 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
*ea_inode = inode;
return 0;
-error:
- iput(inode);
- return err;
+}
+
+/* Remove entry from mbcache when EA inode is getting evicted */
+void ext4_evict_ea_inode(struct inode *inode)
+{
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
}
static int
@@ -1019,10 +1029,8 @@ static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
struct ext4_iloc iloc;
s64 ref_count;
- u32 hash;
int ret;
inode_lock(ea_inode);
@@ -1045,14 +1053,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache,
- GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
- }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -1065,12 +1065,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
- }
}
}
@@ -1249,6 +1243,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1258,9 +1253,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1284,7 +1288,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ce = mb_cache_entry_get(ea_block_cache, hash,
bh->b_blocknr);
if (ce) {
- ce->e_reusable = 1;
+ set_bit(MBE_REUSABLE_B, &ce->e_flags);
mb_cache_entry_put(ea_block_cache, ce);
}
}
@@ -1423,6 +1427,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
int err;
+ if (inode->i_sb->s_root == NULL) {
+ ext4_warning(inode->i_sb,
+ "refuse to create EA inode when umounting");
+ WARN_ON(1);
+ return ERR_PTR(-EINVAL);
+ }
+
/*
* Let the next inode be the goal, so we try and allocate the EA inode
* in the same group, or nearby one.
@@ -1442,6 +1453,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
if (!err)
err = ext4_inode_attach_jinode(ea_inode);
if (err) {
+ if (ext4_xattr_inode_dec_ref(handle, ea_inode))
+ ext4_warning_inode(ea_inode,
+ "cleanup dec ref error %d", err);
iput(ea_inode);
return ERR_PTR(err);
}
@@ -1487,11 +1501,11 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
while (ce) {
ea_inode = ext4_iget(inode->i_sb, ce->e_value,
- EXT4_IGET_NORMAL);
- if (!IS_ERR(ea_inode) &&
- !is_bad_inode(ea_inode) &&
- (EXT4_I(ea_inode)->i_flags & EXT4_EA_INODE_FL) &&
- i_size_read(ea_inode) == value_len &&
+ EXT4_IGET_EA_INODE);
+ if (IS_ERR(ea_inode))
+ goto next_entry;
+ ext4_xattr_inode_set_class(ea_inode);
+ if (i_size_read(ea_inode) == value_len &&
!ext4_xattr_inode_read(ea_inode, ea_data, value_len) &&
!ext4_xattr_inode_verify_hashes(ea_inode, NULL, ea_data,
value_len) &&
@@ -1501,9 +1515,8 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
kvfree(ea_data);
return ea_inode;
}
-
- if (!IS_ERR(ea_inode))
- iput(ea_inode);
+ iput(ea_inode);
+ next_entry:
ce = mb_cache_entry_find_next(ea_inode_cache, ce);
}
kvfree(ea_data);
@@ -1729,6 +1742,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
memmove(here, (void *)here + size,
(void *)last - (void *)here + sizeof(__u32));
memset(last, 0, size);
+
+ /*
+ * Update i_inline_off - moved ibody region might contain
+ * system.data attribute. Handling a failure here won't
+ * cause other complications for setting an xattr.
+ */
+ if (!is_block && ext4_has_inline_data(inode)) {
+ ret = ext4_find_inline_data_nolock(inode);
+ if (ret) {
+ ext4_warning_inode(inode,
+ "unable to update i_inline_off");
+ goto out;
+ }
+ }
} else if (s->not_found) {
/* Insert new name. */
size_t size = EXT4_XATTR_LEN(name_len);
@@ -1868,6 +1895,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
#define header(x) ((struct ext4_xattr_header *)(x))
if (s->base) {
+ int offset = (char *)s->here - bs->bh->b_data;
+
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -1882,9 +1911,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1899,50 +1939,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error)
goto cleanup;
goto inserted;
- } else {
- int offset = (char *)s->here - bs->bh->b_data;
+ }
+clone_block:
+ unlock_buffer(bs->bh);
+ ea_bdebug(bs->bh, "cloning");
+ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+ error = -ENOMEM;
+ if (s->base == NULL)
+ goto cleanup;
+ s->first = ENTRY(header(s->base)+1);
+ header(s->base)->h_refcount = cpu_to_le32(1);
+ s->here = ENTRY(s->base + offset);
+ s->end = s->base + bs->bh->b_size;
- unlock_buffer(bs->bh);
- ea_bdebug(bs->bh, "cloning");
- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
- error = -ENOMEM;
- if (s->base == NULL)
+ /*
+ * If existing entry points to an xattr inode, we need
+ * to prevent ext4_xattr_set_entry() from decrementing
+ * ref count on it because the reference belongs to the
+ * original block. In this case, make the entry look
+ * like it has an empty value.
+ */
+ if (!s->not_found && s->here->e_value_inum) {
+ ea_ino = le32_to_cpu(s->here->e_value_inum);
+ error = ext4_xattr_inode_iget(inode, ea_ino,
+ le32_to_cpu(s->here->e_hash),
+ &tmp_inode);
+ if (error)
goto cleanup;
- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
- s->first = ENTRY(header(s->base)+1);
- header(s->base)->h_refcount = cpu_to_le32(1);
- s->here = ENTRY(s->base + offset);
- s->end = s->base + bs->bh->b_size;
-
- /*
- * If existing entry points to an xattr inode, we need
- * to prevent ext4_xattr_set_entry() from decrementing
- * ref count on it because the reference belongs to the
- * original block. In this case, make the entry look
- * like it has an empty value.
- */
- if (!s->not_found && s->here->e_value_inum) {
- ea_ino = le32_to_cpu(s->here->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino,
- le32_to_cpu(s->here->e_hash),
- &tmp_inode);
- if (error)
- goto cleanup;
- if (!ext4_test_inode_state(tmp_inode,
- EXT4_STATE_LUSTRE_EA_INODE)) {
- /*
- * Defer quota free call for previous
- * inode until success is guaranteed.
- */
- old_ea_inode_quota = le32_to_cpu(
- s->here->e_value_size);
- }
- iput(tmp_inode);
-
- s->here->e_value_inum = 0;
- s->here->e_value_size = 0;
+ if (!ext4_test_inode_state(tmp_inode,
+ EXT4_STATE_LUSTRE_EA_INODE)) {
+ /*
+ * Defer quota free call for previous
+ * inode until success is guaranteed.
+ */
+ old_ea_inode_quota = le32_to_cpu(
+ s->here->e_value_size);
}
+ iput(tmp_inode);
+
+ s->here->e_value_inum = 0;
+ s->here->e_value_size = 0;
}
} else {
/* Allocate a buffer where we construct the new block. */
@@ -1993,8 +2030,9 @@ inserted:
else {
u32 ref;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -2009,18 +2047,13 @@ inserted:
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2035,10 +2068,9 @@ inserted:
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
- ce->e_reusable = 0;
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
+ clear_bit(MBE_REUSABLE_B, &ce->e_flags);
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
ext4_xattr_block_csum_set(inode, new_bh);
@@ -2062,23 +2094,16 @@ inserted:
/* We need to allocate a new block */
ext4_fsblk_t goal, block;
+#ifdef EXT4_XATTR_DEBUG
WARN_ON_ONCE(dquot_initialize_needed(inode));
-
+#endif
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
-
- /* non-extent files can't have physical blocks past 2^32 */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
-
block = ext4_new_meta_blocks(handle, inode, goal, 0,
NULL, &error);
if (error)
goto cleanup;
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
-
ea_idebug(inode, "creating block %llu",
(unsigned long long)block);
@@ -2184,8 +2209,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
struct ext4_inode *raw_inode;
int error;
- if (EXT4_I(inode)->i_extra_isize == 0)
+ if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
return 0;
+
raw_inode = ext4_raw_inode(&is->iloc);
header = IHDR(inode, raw_inode);
is->s.base = is->s.first = IFIRST(header);
@@ -2205,7 +2231,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
return 0;
}
-int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
+int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is)
{
@@ -2213,32 +2239,9 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_search *s = &is->s;
int error;
- if (EXT4_I(inode)->i_extra_isize == 0)
+ if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
- if (error)
- return error;
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
- if (!IS_LAST_ENTRY(s->first)) {
- header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
- ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- } else {
- header->h_magic = cpu_to_le32(0);
- ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
- }
- return 0;
-}
-static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is)
-{
- struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_search *s = &is->s;
- int error;
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
if (error)
return error;
@@ -2565,13 +2568,13 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
.in_inode = !!entry->e_value_inum,
};
struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
+ int needs_kvfree = 0;
int error;
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
- buffer = kmalloc(value_size, GFP_NOFS);
b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
- if (!is || !bs || !buffer || !b_entry_name) {
+ if (!is || !bs || !b_entry_name) {
error = -ENOMEM;
goto out;
}
@@ -2583,12 +2586,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
/* Save the entry name and the entry value */
if (entry->e_value_inum) {
+ buffer = kvmalloc(value_size, GFP_NOFS);
+ if (!buffer) {
+ error = -ENOMEM;
+ goto out;
+ }
+ needs_kvfree = 1;
error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
if (error)
goto out;
} else {
size_t value_offs = le16_to_cpu(entry->e_value_offs);
- memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ buffer = (void *)IFIRST(header) + value_offs;
}
memcpy(b_entry_name, entry->e_name, entry->e_name_len);
@@ -2603,25 +2612,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
- /* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(handle, inode, &i, is);
- if (error)
- goto out;
-
i.value = buffer;
i.value_len = value_size;
error = ext4_xattr_block_find(inode, &i, bs);
if (error)
goto out;
- /* Add entry which was removed from the inode into the block */
+ /* Move ea entry from the inode into the block */
error = ext4_xattr_block_set(handle, inode, &i, bs);
if (error)
goto out;
- error = 0;
+
+ /* Remove the chosen entry from the inode */
+ i.value = NULL;
+ i.value_len = 0;
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
+
out:
kfree(b_entry_name);
- kfree(buffer);
+ if (needs_kvfree && buffer)
+ kvfree(buffer);
if (is)
brelse(is->iloc.bh);
if (bs)
@@ -2796,6 +2806,9 @@ shift:
(void *)header, total_ino);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
+ if (ext4_has_inline_data(inode))
+ error = ext4_find_inline_data_nolock(inode);
+
cleanup:
if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index f39cad2abe2a..66911f8a11f8 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -95,6 +95,19 @@ struct ext4_xattr_entry {
#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
+/*
+ * If we want to add an xattr to the inode, we should make sure that
+ * i_extra_isize is not 0 and that the inode size is not less than
+ * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad.
+ * EXT4_GOOD_OLD_INODE_SIZE extra_isize header entry pad data
+ * |--------------------------|------------|------|---------|---|-------|
+ */
+#define EXT4_INODE_HAS_XATTR_SPACE(inode) \
+ ((EXT4_I(inode)->i_extra_isize != 0) && \
+ (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize + \
+ sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <= \
+ EXT4_INODE_SIZE((inode)->i_sb)))
+
struct ext4_xattr_info {
const char *name;
const void *value;
@@ -177,6 +190,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
+extern void ext4_evict_ea_inode(struct inode *inode);
extern const struct xattr_handler *ext4_xattr_handlers[];
@@ -185,9 +199,9 @@ extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
const char *name,
void *buffer, size_t buffer_size);
-extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is);
+extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_info *i,
+ struct ext4_xattr_ibody_find *is);
extern struct mb_cache *ext4_xattr_create_cache(void);
extern void ext4_xattr_destroy_cache(struct mb_cache *);