Message ID | 1592835419-7841-1-git-send-email-wangshilong1991@gmail.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
On Jun 22, 2020, at 8:16 AM, Wang Shilong <wangshilong1991@gmail.com> wrote: > > From: Wang Shilong <wshilong@ddn.com> > > Now WAS_TRIMMED flag will be cleared if there are any blocks > freed in this block group, this might be not good idea if there > are only few blocks freed, since most of freed blocks have been > issued discard before. > > So this patch tries to introduce another counter which record > how many blocks freed since last time trimmed, WAS_TRIMMED flag > will be only cleared if there are enough free blocks(default 128). > > Also expose a new sys interface min_freed_blocks_to_trim to tune > default behavior. > > Cc: Shuichi Ihara <sihara@ddn.com> > Cc: Andreas Dilger <adilger@dilger.ca> > Cc: Wang Shilong <wangshilong1991@gmail.com> > Signed-off-by: Wang Shilong <wshilong@ddn.com> Reviewed-by: Andreas Dilger <adilger@dilger.ca> > --- > changelog v1->v2: > init bb_freed_last_trimmed to be zero during setup > --- > fs/ext4/ext4.h | 7 +++++++ > fs/ext4/mballoc.c | 18 ++++++++++++++++-- > fs/ext4/sysfs.c | 2 ++ > 3 files changed, 25 insertions(+), 2 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 252754da2f1b..2da86d1ebe3f 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -1240,6 +1240,9 @@ extern void ext4_set_bits(void *bm, int cur, int len); > /* Metadata checksum algorithm codes */ > #define EXT4_CRC32C_CHKSUM 1 > > +/* Default min freed blocks which we could clear TRIMMED flags */ > +#define DEFAULT_MIN_FREED_BLOCKS_TO_TRIM 128 > + > /* > * Structure of the super block > */ > @@ -1533,6 +1536,9 @@ struct ext4_sb_info { > /* the size of zero-out chunk */ > unsigned int s_extent_max_zeroout_kb; > > + /* Min freed blocks per group that we could run trim on it*/ > + unsigned long s_min_freed_blocks_to_trim; > + > unsigned int s_log_groups_per_flex; > struct flex_groups * __rcu *s_flex_groups; > ext4_group_t s_flex_groups_allocated; > @@ -3125,6 +3131,7 @@ struct ext4_group_info { > struct rb_root bb_free_root; > ext4_grpblk_t bb_first_free; /* first free block */ > ext4_grpblk_t bb_free; /* total free blocks */ > + ext4_grpblk_t bb_freed_last_trimmed; /* total free blocks since last trimmed*/ > ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ > ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ > struct list_head bb_prealloc_list; > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index 235a316584d0..52ab9ac5be86 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -2558,6 +2558,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, > init_rwsem(&meta_group_info[i]->alloc_sem); > meta_group_info[i]->bb_free_root = RB_ROOT; > meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ > + meta_group_info[i]->bb_freed_last_trimmed = 0; > > mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group); > return 0; > @@ -2763,6 +2764,8 @@ int ext4_mb_init(struct super_block *sb) > sbi->s_mb_group_prealloc, sbi->s_stripe); > } > > + sbi->s_min_freed_blocks_to_trim = DEFAULT_MIN_FREED_BLOCKS_TO_TRIM; > + > sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); > if (sbi->s_locality_groups == NULL) { > ret = -ENOMEM; > @@ -5091,8 +5094,18 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, > * If the volume is mounted with -o discard, online discard > * is supported and the free blocks will be trimmed online. > */ > - if (!test_opt(sb, DISCARD)) > - EXT4_MB_GDP_CLEAR_TRIMMED(gdp); > + if (!test_opt(sb, DISCARD)) { > + e4b.bd_info->bb_freed_last_trimmed += count; > + /* > + * Only clear the WAS_TRIMMED flag if there are > + * several blocks freed, or if the group becomes > + * totally 'empty'(free < num_itable_blocks + 2). > + */ > + if (e4b.bd_info->bb_freed_last_trimmed >= > + sbi->s_min_freed_blocks_to_trim || > + e4b.bd_info->bb_free < (sbi->s_itb_per_group + 2)) > + EXT4_MB_GDP_CLEAR_TRIMMED(gdp); > + } > ext4_group_desc_csum_set(sb, block_group, gdp); > ext4_unlock_group(sb, block_group); > > @@ -5425,6 +5438,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, > } > ext4_lock_group(sb, group); > EXT4_MB_GDP_SET_TRIMMED(gdp); > + e4b.bd_info->bb_freed_last_trimmed = 0; > ext4_group_desc_csum_set(sb, group, gdp); > ext4_unlock_group(sb, group); > err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); > diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c > index 6c9fc9e21c13..8ee4e7e3f125 100644 > --- a/fs/ext4/sysfs.c > +++ b/fs/ext4/sysfs.c > @@ -216,6 +216,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); > EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); > EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); > EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); > +EXT4_RW_ATTR_SBI_UI(min_freed_blocks_to_trim, s_min_freed_blocks_to_trim); > EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); > EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); > EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); > @@ -259,6 +260,7 @@ static struct attribute *ext4_attrs[] = { > ATTR_LIST(mb_group_prealloc), > ATTR_LIST(max_writeback_mb_bump), > ATTR_LIST(extent_max_zeroout_kb), > + ATTR_LIST(min_freed_blocks_to_trim), > ATTR_LIST(trigger_fs_error), > ATTR_LIST(err_ratelimit_interval_ms), > ATTR_LIST(err_ratelimit_burst), > -- > 2.25.4 > Cheers, Andreas
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 252754da2f1b..2da86d1ebe3f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1240,6 +1240,9 @@ extern void ext4_set_bits(void *bm, int cur, int len); /* Metadata checksum algorithm codes */ #define EXT4_CRC32C_CHKSUM 1 +/* Default min freed blocks which we could clear TRIMMED flags */ +#define DEFAULT_MIN_FREED_BLOCKS_TO_TRIM 128 + /* * Structure of the super block */ @@ -1533,6 +1536,9 @@ struct ext4_sb_info { /* the size of zero-out chunk */ unsigned int s_extent_max_zeroout_kb; + /* Min freed blocks per group that we could run trim on it*/ + unsigned long s_min_freed_blocks_to_trim; + unsigned int s_log_groups_per_flex; struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; @@ -3125,6 +3131,7 @@ struct ext4_group_info { struct rb_root bb_free_root; ext4_grpblk_t bb_first_free; /* first free block */ ext4_grpblk_t bb_free; /* total free blocks */ + ext4_grpblk_t bb_freed_last_trimmed; /* total free blocks since last trimmed*/ ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ struct list_head bb_prealloc_list; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 235a316584d0..52ab9ac5be86 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2558,6 +2558,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, init_rwsem(&meta_group_info[i]->alloc_sem); meta_group_info[i]->bb_free_root = RB_ROOT; meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ + meta_group_info[i]->bb_freed_last_trimmed = 0; mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group); return 0; @@ -2763,6 +2764,8 @@ int ext4_mb_init(struct super_block *sb) sbi->s_mb_group_prealloc, sbi->s_stripe); } + sbi->s_min_freed_blocks_to_trim = DEFAULT_MIN_FREED_BLOCKS_TO_TRIM; + sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { ret = -ENOMEM; @@ -5091,8 +5094,18 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * If the volume is mounted with -o discard, online discard * is supported and the free blocks will be trimmed online. */ - if (!test_opt(sb, DISCARD)) - EXT4_MB_GDP_CLEAR_TRIMMED(gdp); + if (!test_opt(sb, DISCARD)) { + e4b.bd_info->bb_freed_last_trimmed += count; + /* + * Only clear the WAS_TRIMMED flag if there are + * several blocks freed, or if the group becomes + * totally 'empty'(free < num_itable_blocks + 2). + */ + if (e4b.bd_info->bb_freed_last_trimmed >= + sbi->s_min_freed_blocks_to_trim || + e4b.bd_info->bb_free < (sbi->s_itb_per_group + 2)) + EXT4_MB_GDP_CLEAR_TRIMMED(gdp); + } ext4_group_desc_csum_set(sb, block_group, gdp); ext4_unlock_group(sb, block_group); @@ -5425,6 +5438,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, } ext4_lock_group(sb, group); EXT4_MB_GDP_SET_TRIMMED(gdp); + e4b.bd_info->bb_freed_last_trimmed = 0; ext4_group_desc_csum_set(sb, group, gdp); ext4_unlock_group(sb, group); err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 6c9fc9e21c13..8ee4e7e3f125 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -216,6 +216,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); +EXT4_RW_ATTR_SBI_UI(min_freed_blocks_to_trim, s_min_freed_blocks_to_trim); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); @@ -259,6 +260,7 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(mb_group_prealloc), ATTR_LIST(max_writeback_mb_bump), ATTR_LIST(extent_max_zeroout_kb), + ATTR_LIST(min_freed_blocks_to_trim), ATTR_LIST(trigger_fs_error), ATTR_LIST(err_ratelimit_interval_ms), ATTR_LIST(err_ratelimit_burst),