Message ID | 5c2778979ea32e840005936cef42c5e5eff0b353.1431967209.git.berto@igalia.com |
---|---|
State | New |
Headers | show |
On 18.05.2015 18:48, Alberto Garcia wrote: > This adds a new 'cache-clean-interval' option that cleans all qcow2 > cache entries that haven't been used in a certain interval, given in > seconds. > > This allows setting a large L2 cache size so it can handle scenarios > with lots of I/O and at the same time use little memory during periods > of inactivity. > > This feature currently relies on MADV_DONTNEED to free that memory, so > it is not useful in systems that don't follow that behavior. > > Signed-off-by: Alberto Garcia <berto@igalia.com> > --- > block/qcow2-cache.c | 35 ++++++++++++++++++++++++++++++++ > block/qcow2.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ > block/qcow2.h | 4 ++++ > qapi/block-core.json | 6 +++++- > 4 files changed, 100 insertions(+), 1 deletion(-) > > diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c > index ed14a92..a215f5b 100644 > --- a/block/qcow2-cache.c > +++ b/block/qcow2-cache.c > @@ -43,6 +43,7 @@ struct Qcow2Cache { > bool depends_on_flush; > void *table_array; > uint64_t lru_counter; > + uint64_t cache_clean_lru_counter; > }; > > static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, > @@ -78,6 +79,40 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, > #endif > } > > +static inline bool can_clean_entry(Qcow2Cache *c, int i) > +{ > + Qcow2CachedTable *t = &c->entries[i]; > + return t->ref == 0 && !t->dirty && t->offset != 0 && > + t->lru_counter <= c->cache_clean_lru_counter; > +} > + > +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) > +{ > + int i = 0; > + while (i < c->size) { > + int to_clean = 0; > + > + /* Skip the entries that we don't need to clean */ > + while (i < c->size && !can_clean_entry(c, i)) { > + i++; > + } > + > + /* And count how many we can clean in a row */ > + while (i < c->size && can_clean_entry(c, i)) { > + c->entries[i].offset = 0; > + c->entries[i].lru_counter = 0; > + i++; > + to_clean++; > + } > + > + if (to_clean > 0) { > + qcow2_cache_table_release(bs, c, i - to_clean, to_clean); > + } > + } > + > + c->cache_clean_lru_counter = c->lru_counter; > +} > + > Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) > { > BDRVQcowState *s = bs->opaque; > diff --git a/block/qcow2.c b/block/qcow2.c > index b9a72e3..f6ed39f 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -468,6 +468,11 @@ static QemuOptsList qcow2_runtime_opts = { > .type = QEMU_OPT_SIZE, > .help = "Maximum refcount block cache size", > }, > + { > + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, > + .type = QEMU_OPT_NUMBER, > + .help = "Clean unused cache entries after this time (in seconds)", > + }, > { /* end of list */ } > }, > }; > @@ -483,6 +488,49 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { > [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, > }; > > +static void cache_clean_timer_cb(void *opaque) > +{ > + BlockDriverState *bs = opaque; > + BDRVQcowState *s = bs->opaque; > + qcow2_cache_clean_unused(bs, s->l2_table_cache); > + qcow2_cache_clean_unused(bs, s->refcount_block_cache); > + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + > + s->cache_clean_interval * 1000); > +} > + > +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) > +{ > + BDRVQcowState *s = bs->opaque; > + if (s->cache_clean_interval > 0) { > + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, > + SCALE_MS, cache_clean_timer_cb, > + bs); > + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + > + s->cache_clean_interval * 1000); Can overflow. > + } > +} > + > +static void cache_clean_timer_del(BlockDriverState *bs) > +{ > + BDRVQcowState *s = bs->opaque; > + if (s->cache_clean_timer) { > + timer_del(s->cache_clean_timer); > + timer_free(s->cache_clean_timer); > + s->cache_clean_timer = NULL; > + } > +} > + > +static void qcow2_detach_aio_context(BlockDriverState *bs) > +{ > + cache_clean_timer_del(bs); > +} > + > +static void qcow2_attach_aio_context(BlockDriverState *bs, > + AioContext *new_context) > +{ > + cache_clean_timer_init(bs, new_context); > +} > + > static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, > uint64_t *refcount_cache_size, Error **errp) > { > @@ -838,6 +886,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, > ret = -ENOMEM; > goto fail; > } > + s->cache_clean_interval = > + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0); Can overflow. Apart from these two things (which are by no means really bad, but it'd better not to introduce them in the first place...), the patch looks good. Max > + cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); > > s->cluster_cache = g_malloc(s->cluster_size); > /* one more sector for decompressed data alignment */ > @@ -1004,6 +1055,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, > qemu_vfree(s->l1_table); > /* else pre-write overlap checks in cache_destroy may crash */ > s->l1_table = NULL; > + cache_clean_timer_del(bs); > if (s->l2_table_cache) { > qcow2_cache_destroy(bs, s->l2_table_cache); > } > @@ -1453,6 +1505,7 @@ static void qcow2_close(BlockDriverState *bs) > } > } > > + cache_clean_timer_del(bs); > qcow2_cache_destroy(bs, s->l2_table_cache); > qcow2_cache_destroy(bs, s->refcount_block_cache); > > @@ -2964,6 +3017,9 @@ BlockDriver bdrv_qcow2 = { > .create_opts = &qcow2_create_opts, > .bdrv_check = qcow2_check, > .bdrv_amend_options = qcow2_amend_options, > + > + .bdrv_detach_aio_context = qcow2_detach_aio_context, > + .bdrv_attach_aio_context = qcow2_attach_aio_context, > }; > > static void bdrv_qcow2_init(void) > diff --git a/block/qcow2.h b/block/qcow2.h > index 0076512..2c45eb2 100644 > --- a/block/qcow2.h > +++ b/block/qcow2.h > @@ -93,6 +93,7 @@ > #define QCOW2_OPT_CACHE_SIZE "cache-size" > #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" > #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" > +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" > > typedef struct QCowHeader { > uint32_t magic; > @@ -236,6 +237,8 @@ typedef struct BDRVQcowState { > > Qcow2Cache* l2_table_cache; > Qcow2Cache* refcount_block_cache; > + QEMUTimer *cache_clean_timer; > + unsigned cache_clean_interval; > > uint8_t *cluster_cache; > uint8_t *cluster_data; > @@ -581,6 +584,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, > Qcow2Cache *dependency); > void qcow2_cache_depends_on_flush(Qcow2Cache *c); > > +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c); > int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); > > int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, > diff --git a/qapi/block-core.json b/qapi/block-core.json > index 863ffea..f42338d 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -1538,6 +1538,9 @@ > # @refcount-cache-size: #optional the maximum size of the refcount block cache > # in bytes (since 2.2) > # > +# @cache-clean-interval: #optional clean unused entries in the L2 and refcount > +# caches. The interval is in seconds (since 2.4) > +# > # Since: 1.7 > ## > { 'struct': 'BlockdevOptionsQcow2', > @@ -1549,7 +1552,8 @@ > '*overlap-check': 'Qcow2OverlapChecks', > '*cache-size': 'int', > '*l2-cache-size': 'int', > - '*refcount-cache-size': 'int' } } > + '*refcount-cache-size': 'int', > + '*cache-clean-interval': 'int' } } > > > ##
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index ed14a92..a215f5b 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -43,6 +43,7 @@ struct Qcow2Cache { bool depends_on_flush; void *table_array; uint64_t lru_counter; + uint64_t cache_clean_lru_counter; }; static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, @@ -78,6 +79,40 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, #endif } +static inline bool can_clean_entry(Qcow2Cache *c, int i) +{ + Qcow2CachedTable *t = &c->entries[i]; + return t->ref == 0 && !t->dirty && t->offset != 0 && + t->lru_counter <= c->cache_clean_lru_counter; +} + +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) +{ + int i = 0; + while (i < c->size) { + int to_clean = 0; + + /* Skip the entries that we don't need to clean */ + while (i < c->size && !can_clean_entry(c, i)) { + i++; + } + + /* And count how many we can clean in a row */ + while (i < c->size && can_clean_entry(c, i)) { + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + i++; + to_clean++; + } + + if (to_clean > 0) { + qcow2_cache_table_release(bs, c, i - to_clean, to_clean); + } + } + + c->cache_clean_lru_counter = c->lru_counter; +} + Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) { BDRVQcowState *s = bs->opaque; diff --git a/block/qcow2.c b/block/qcow2.c index b9a72e3..f6ed39f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -468,6 +468,11 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Maximum refcount block cache size", }, + { + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, + .type = QEMU_OPT_NUMBER, + .help = "Clean unused cache entries after this time (in seconds)", + }, { /* end of list */ } }, }; @@ -483,6 +488,49 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; +static void cache_clean_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVQcowState *s = bs->opaque; + qcow2_cache_clean_unused(bs, s->l2_table_cache); + qcow2_cache_clean_unused(bs, s->refcount_block_cache); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + s->cache_clean_interval * 1000); +} + +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) +{ + BDRVQcowState *s = bs->opaque; + if (s->cache_clean_interval > 0) { + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, + SCALE_MS, cache_clean_timer_cb, + bs); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + s->cache_clean_interval * 1000); + } +} + +static void cache_clean_timer_del(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + if (s->cache_clean_timer) { + timer_del(s->cache_clean_timer); + timer_free(s->cache_clean_timer); + s->cache_clean_timer = NULL; + } +} + +static void qcow2_detach_aio_context(BlockDriverState *bs) +{ + cache_clean_timer_del(bs); +} + +static void qcow2_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + cache_clean_timer_init(bs, new_context); +} + static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) { @@ -838,6 +886,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOMEM; goto fail; } + s->cache_clean_interval = + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0); + cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); s->cluster_cache = g_malloc(s->cluster_size); /* one more sector for decompressed data alignment */ @@ -1004,6 +1055,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; + cache_clean_timer_del(bs); if (s->l2_table_cache) { qcow2_cache_destroy(bs, s->l2_table_cache); } @@ -1453,6 +1505,7 @@ static void qcow2_close(BlockDriverState *bs) } } + cache_clean_timer_del(bs); qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); @@ -2964,6 +3017,9 @@ BlockDriver bdrv_qcow2 = { .create_opts = &qcow2_create_opts, .bdrv_check = qcow2_check, .bdrv_amend_options = qcow2_amend_options, + + .bdrv_detach_aio_context = qcow2_detach_aio_context, + .bdrv_attach_aio_context = qcow2_attach_aio_context, }; static void bdrv_qcow2_init(void) diff --git a/block/qcow2.h b/block/qcow2.h index 0076512..2c45eb2 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -93,6 +93,7 @@ #define QCOW2_OPT_CACHE_SIZE "cache-size" #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" typedef struct QCowHeader { uint32_t magic; @@ -236,6 +237,8 @@ typedef struct BDRVQcowState { Qcow2Cache* l2_table_cache; Qcow2Cache* refcount_block_cache; + QEMUTimer *cache_clean_timer; + unsigned cache_clean_interval; uint8_t *cluster_cache; uint8_t *cluster_data; @@ -581,6 +584,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, Qcow2Cache *dependency); void qcow2_cache_depends_on_flush(Qcow2Cache *c); +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, diff --git a/qapi/block-core.json b/qapi/block-core.json index 863ffea..f42338d 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1538,6 +1538,9 @@ # @refcount-cache-size: #optional the maximum size of the refcount block cache # in bytes (since 2.2) # +# @cache-clean-interval: #optional clean unused entries in the L2 and refcount +# caches. The interval is in seconds (since 2.4) +# # Since: 1.7 ## { 'struct': 'BlockdevOptionsQcow2', @@ -1549,7 +1552,8 @@ '*overlap-check': 'Qcow2OverlapChecks', '*cache-size': 'int', '*l2-cache-size': 'int', - '*refcount-cache-size': 'int' } } + '*refcount-cache-size': 'int', + '*cache-clean-interval': 'int' } } ##
This adds a new 'cache-clean-interval' option that cleans all qcow2 cache entries that haven't been used in a certain interval, given in seconds. This allows setting a large L2 cache size so it can handle scenarios with lots of I/O and at the same time use little memory during periods of inactivity. This feature currently relies on MADV_DONTNEED to free that memory, so it is not useful in systems that don't follow that behavior. Signed-off-by: Alberto Garcia <berto@igalia.com> --- block/qcow2-cache.c | 35 ++++++++++++++++++++++++++++++++ block/qcow2.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 4 ++++ qapi/block-core.json | 6 +++++- 4 files changed, 100 insertions(+), 1 deletion(-)