diff mbox

[2/3] qcow2: add option to clean unused cache entries after some time

Message ID 5c2778979ea32e840005936cef42c5e5eff0b353.1431967209.git.berto@igalia.com
State New
Headers show

Commit Message

Alberto Garcia May 18, 2015, 4:48 p.m. UTC
This adds a new 'cache-clean-interval' option that cleans all qcow2
cache entries that haven't been used in a certain interval, given in
seconds.

This allows setting a large L2 cache size so it can handle scenarios
with lots of I/O and at the same time use little memory during periods
of inactivity.

This feature currently relies on MADV_DONTNEED to free that memory, so
it is not useful in systems that don't follow that behavior.

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 block/qcow2-cache.c  | 35 ++++++++++++++++++++++++++++++++
 block/qcow2.c        | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.h        |  4 ++++
 qapi/block-core.json |  6 +++++-
 4 files changed, 100 insertions(+), 1 deletion(-)

Comments

Max Reitz May 26, 2015, 4:07 p.m. UTC | #1
On 18.05.2015 18:48, Alberto Garcia wrote:
> This adds a new 'cache-clean-interval' option that cleans all qcow2
> cache entries that haven't been used in a certain interval, given in
> seconds.
>
> This allows setting a large L2 cache size so it can handle scenarios
> with lots of I/O and at the same time use little memory during periods
> of inactivity.
>
> This feature currently relies on MADV_DONTNEED to free that memory, so
> it is not useful in systems that don't follow that behavior.
>
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>   block/qcow2-cache.c  | 35 ++++++++++++++++++++++++++++++++
>   block/qcow2.c        | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>   block/qcow2.h        |  4 ++++
>   qapi/block-core.json |  6 +++++-
>   4 files changed, 100 insertions(+), 1 deletion(-)
>
> diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
> index ed14a92..a215f5b 100644
> --- a/block/qcow2-cache.c
> +++ b/block/qcow2-cache.c
> @@ -43,6 +43,7 @@ struct Qcow2Cache {
>       bool                    depends_on_flush;
>       void                   *table_array;
>       uint64_t                lru_counter;
> +    uint64_t                cache_clean_lru_counter;
>   };
>   
>   static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
> @@ -78,6 +79,40 @@ static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
>   #endif
>   }
>   
> +static inline bool can_clean_entry(Qcow2Cache *c, int i)
> +{
> +    Qcow2CachedTable *t = &c->entries[i];
> +    return t->ref == 0 && !t->dirty && t->offset != 0 &&
> +        t->lru_counter <= c->cache_clean_lru_counter;
> +}
> +
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
> +{
> +    int i = 0;
> +    while (i < c->size) {
> +        int to_clean = 0;
> +
> +        /* Skip the entries that we don't need to clean */
> +        while (i < c->size && !can_clean_entry(c, i)) {
> +            i++;
> +        }
> +
> +        /* And count how many we can clean in a row */
> +        while (i < c->size && can_clean_entry(c, i)) {
> +            c->entries[i].offset = 0;
> +            c->entries[i].lru_counter = 0;
> +            i++;
> +            to_clean++;
> +        }
> +
> +        if (to_clean > 0) {
> +            qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
> +        }
> +    }
> +
> +    c->cache_clean_lru_counter = c->lru_counter;
> +}
> +
>   Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
>   {
>       BDRVQcowState *s = bs->opaque;
> diff --git a/block/qcow2.c b/block/qcow2.c
> index b9a72e3..f6ed39f 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -468,6 +468,11 @@ static QemuOptsList qcow2_runtime_opts = {
>               .type = QEMU_OPT_SIZE,
>               .help = "Maximum refcount block cache size",
>           },
> +        {
> +            .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
> +            .type = QEMU_OPT_NUMBER,
> +            .help = "Clean unused cache entries after this time (in seconds)",
> +        },
>           { /* end of list */ }
>       },
>   };
> @@ -483,6 +488,49 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
>       [QCOW2_OL_INACTIVE_L2_BITNR]    = QCOW2_OPT_OVERLAP_INACTIVE_L2,
>   };
>   
> +static void cache_clean_timer_cb(void *opaque)
> +{
> +    BlockDriverState *bs = opaque;
> +    BDRVQcowState *s = bs->opaque;
> +    qcow2_cache_clean_unused(bs, s->l2_table_cache);
> +    qcow2_cache_clean_unused(bs, s->refcount_block_cache);
> +    timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
> +              s->cache_clean_interval * 1000);
> +}
> +
> +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    if (s->cache_clean_interval > 0) {
> +        s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
> +                                             SCALE_MS, cache_clean_timer_cb,
> +                                             bs);
> +        timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
> +                  s->cache_clean_interval * 1000);

Can overflow.

> +    }
> +}
> +
> +static void cache_clean_timer_del(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    if (s->cache_clean_timer) {
> +        timer_del(s->cache_clean_timer);
> +        timer_free(s->cache_clean_timer);
> +        s->cache_clean_timer = NULL;
> +    }
> +}
> +
> +static void qcow2_detach_aio_context(BlockDriverState *bs)
> +{
> +    cache_clean_timer_del(bs);
> +}
> +
> +static void qcow2_attach_aio_context(BlockDriverState *bs,
> +                                     AioContext *new_context)
> +{
> +    cache_clean_timer_init(bs, new_context);
> +}
> +
>   static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size,
>                                uint64_t *refcount_cache_size, Error **errp)
>   {
> @@ -838,6 +886,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>           ret = -ENOMEM;
>           goto fail;
>       }
> +    s->cache_clean_interval =
> +        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0);

Can overflow.

Apart from these two things (which are by no means really bad, but it'd 
better not to introduce them in the first place...), the patch looks good.

Max

> +    cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
>   
>       s->cluster_cache = g_malloc(s->cluster_size);
>       /* one more sector for decompressed data alignment */
> @@ -1004,6 +1055,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>       qemu_vfree(s->l1_table);
>       /* else pre-write overlap checks in cache_destroy may crash */
>       s->l1_table = NULL;
> +    cache_clean_timer_del(bs);
>       if (s->l2_table_cache) {
>           qcow2_cache_destroy(bs, s->l2_table_cache);
>       }
> @@ -1453,6 +1505,7 @@ static void qcow2_close(BlockDriverState *bs)
>           }
>       }
>   
> +    cache_clean_timer_del(bs);
>       qcow2_cache_destroy(bs, s->l2_table_cache);
>       qcow2_cache_destroy(bs, s->refcount_block_cache);
>   
> @@ -2964,6 +3017,9 @@ BlockDriver bdrv_qcow2 = {
>       .create_opts         = &qcow2_create_opts,
>       .bdrv_check          = qcow2_check,
>       .bdrv_amend_options  = qcow2_amend_options,
> +
> +    .bdrv_detach_aio_context  = qcow2_detach_aio_context,
> +    .bdrv_attach_aio_context  = qcow2_attach_aio_context,
>   };
>   
>   static void bdrv_qcow2_init(void)
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 0076512..2c45eb2 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -93,6 +93,7 @@
>   #define QCOW2_OPT_CACHE_SIZE "cache-size"
>   #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
>   #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
> +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
>   
>   typedef struct QCowHeader {
>       uint32_t magic;
> @@ -236,6 +237,8 @@ typedef struct BDRVQcowState {
>   
>       Qcow2Cache* l2_table_cache;
>       Qcow2Cache* refcount_block_cache;
> +    QEMUTimer *cache_clean_timer;
> +    unsigned cache_clean_interval;
>   
>       uint8_t *cluster_cache;
>       uint8_t *cluster_data;
> @@ -581,6 +584,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
>       Qcow2Cache *dependency);
>   void qcow2_cache_depends_on_flush(Qcow2Cache *c);
>   
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
>   int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
>   
>   int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 863ffea..f42338d 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1538,6 +1538,9 @@
>   # @refcount-cache-size:   #optional the maximum size of the refcount block cache
>   #                         in bytes (since 2.2)
>   #
> +# @cache-clean-interval:  #optional clean unused entries in the L2 and refcount
> +#                         caches. The interval is in seconds (since 2.4)
> +#
>   # Since: 1.7
>   ##
>   { 'struct': 'BlockdevOptionsQcow2',
> @@ -1549,7 +1552,8 @@
>               '*overlap-check': 'Qcow2OverlapChecks',
>               '*cache-size': 'int',
>               '*l2-cache-size': 'int',
> -            '*refcount-cache-size': 'int' } }
> +            '*refcount-cache-size': 'int',
> +            '*cache-clean-interval': 'int' } }
>   
>   
>   ##
diff mbox

Patch

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index ed14a92..a215f5b 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -43,6 +43,7 @@  struct Qcow2Cache {
     bool                    depends_on_flush;
     void                   *table_array;
     uint64_t                lru_counter;
+    uint64_t                cache_clean_lru_counter;
 };
 
 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
@@ -78,6 +79,40 @@  static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
 #endif
 }
 
+static inline bool can_clean_entry(Qcow2Cache *c, int i)
+{
+    Qcow2CachedTable *t = &c->entries[i];
+    return t->ref == 0 && !t->dirty && t->offset != 0 &&
+        t->lru_counter <= c->cache_clean_lru_counter;
+}
+
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+{
+    int i = 0;
+    while (i < c->size) {
+        int to_clean = 0;
+
+        /* Skip the entries that we don't need to clean */
+        while (i < c->size && !can_clean_entry(c, i)) {
+            i++;
+        }
+
+        /* And count how many we can clean in a row */
+        while (i < c->size && can_clean_entry(c, i)) {
+            c->entries[i].offset = 0;
+            c->entries[i].lru_counter = 0;
+            i++;
+            to_clean++;
+        }
+
+        if (to_clean > 0) {
+            qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+        }
+    }
+
+    c->cache_clean_lru_counter = c->lru_counter;
+}
+
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 {
     BDRVQcowState *s = bs->opaque;
diff --git a/block/qcow2.c b/block/qcow2.c
index b9a72e3..f6ed39f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -468,6 +468,11 @@  static QemuOptsList qcow2_runtime_opts = {
             .type = QEMU_OPT_SIZE,
             .help = "Maximum refcount block cache size",
         },
+        {
+            .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+            .type = QEMU_OPT_NUMBER,
+            .help = "Clean unused cache entries after this time (in seconds)",
+        },
         { /* end of list */ }
     },
 };
@@ -483,6 +488,49 @@  static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
     [QCOW2_OL_INACTIVE_L2_BITNR]    = QCOW2_OPT_OVERLAP_INACTIVE_L2,
 };
 
+static void cache_clean_timer_cb(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    BDRVQcowState *s = bs->opaque;
+    qcow2_cache_clean_unused(bs, s->l2_table_cache);
+    qcow2_cache_clean_unused(bs, s->refcount_block_cache);
+    timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+              s->cache_clean_interval * 1000);
+}
+
+static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
+{
+    BDRVQcowState *s = bs->opaque;
+    if (s->cache_clean_interval > 0) {
+        s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
+                                             SCALE_MS, cache_clean_timer_cb,
+                                             bs);
+        timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+                  s->cache_clean_interval * 1000);
+    }
+}
+
+static void cache_clean_timer_del(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    if (s->cache_clean_timer) {
+        timer_del(s->cache_clean_timer);
+        timer_free(s->cache_clean_timer);
+        s->cache_clean_timer = NULL;
+    }
+}
+
+static void qcow2_detach_aio_context(BlockDriverState *bs)
+{
+    cache_clean_timer_del(bs);
+}
+
+static void qcow2_attach_aio_context(BlockDriverState *bs,
+                                     AioContext *new_context)
+{
+    cache_clean_timer_init(bs, new_context);
+}
+
 static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size,
                              uint64_t *refcount_cache_size, Error **errp)
 {
@@ -838,6 +886,9 @@  static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -ENOMEM;
         goto fail;
     }
+    s->cache_clean_interval =
+        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, 0);
+    cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
 
     s->cluster_cache = g_malloc(s->cluster_size);
     /* one more sector for decompressed data alignment */
@@ -1004,6 +1055,7 @@  static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     qemu_vfree(s->l1_table);
     /* else pre-write overlap checks in cache_destroy may crash */
     s->l1_table = NULL;
+    cache_clean_timer_del(bs);
     if (s->l2_table_cache) {
         qcow2_cache_destroy(bs, s->l2_table_cache);
     }
@@ -1453,6 +1505,7 @@  static void qcow2_close(BlockDriverState *bs)
         }
     }
 
+    cache_clean_timer_del(bs);
     qcow2_cache_destroy(bs, s->l2_table_cache);
     qcow2_cache_destroy(bs, s->refcount_block_cache);
 
@@ -2964,6 +3017,9 @@  BlockDriver bdrv_qcow2 = {
     .create_opts         = &qcow2_create_opts,
     .bdrv_check          = qcow2_check,
     .bdrv_amend_options  = qcow2_amend_options,
+
+    .bdrv_detach_aio_context  = qcow2_detach_aio_context,
+    .bdrv_attach_aio_context  = qcow2_attach_aio_context,
 };
 
 static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index 0076512..2c45eb2 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -93,6 +93,7 @@ 
 #define QCOW2_OPT_CACHE_SIZE "cache-size"
 #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
 #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
+#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
 
 typedef struct QCowHeader {
     uint32_t magic;
@@ -236,6 +237,8 @@  typedef struct BDRVQcowState {
 
     Qcow2Cache* l2_table_cache;
     Qcow2Cache* refcount_block_cache;
+    QEMUTimer *cache_clean_timer;
+    unsigned cache_clean_interval;
 
     uint8_t *cluster_cache;
     uint8_t *cluster_data;
@@ -581,6 +584,7 @@  int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
     Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);
 
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
 
 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 863ffea..f42338d 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1538,6 +1538,9 @@ 
 # @refcount-cache-size:   #optional the maximum size of the refcount block cache
 #                         in bytes (since 2.2)
 #
+# @cache-clean-interval:  #optional clean unused entries in the L2 and refcount
+#                         caches. The interval is in seconds (since 2.4)
+#
 # Since: 1.7
 ##
 { 'struct': 'BlockdevOptionsQcow2',
@@ -1549,7 +1552,8 @@ 
             '*overlap-check': 'Qcow2OverlapChecks',
             '*cache-size': 'int',
             '*l2-cache-size': 'int',
-            '*refcount-cache-size': 'int' } }
+            '*refcount-cache-size': 'int',
+            '*cache-clean-interval': 'int' } }
 
 
 ##