diff mbox series

KVM: stats: Add VM dirty_pages stats for the number of dirty pages

Message ID 20210810223238.979194-1-jingzhangos@google.com
State New
Headers show
Series KVM: stats: Add VM dirty_pages stats for the number of dirty pages | expand

Commit Message

Jing Zhang Aug. 10, 2021, 10:32 p.m. UTC
Add a generic VM stats dirty_pages to record the number of dirty pages
reflected in dirty_bitmap at the moment.

Original-by: Peter Feiner <pfeiner@google.com>
Signed-off-by: Jing Zhang <jingzhangos@google.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
 arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
 include/linux/kvm_host.h               |  3 ++-
 include/linux/kvm_types.h              |  1 +
 virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
 6 files changed, 34 insertions(+), 6 deletions(-)


base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb

Comments

Peter Feiner Aug. 10, 2021, 10:44 p.m. UTC | #1
On Tue, Aug 10, 2021 at 3:32 PM Jing Zhang <jingzhangos@google.com> wrote:
>
> Add a generic VM stats dirty_pages to record the number of dirty pages
> reflected in dirty_bitmap at the moment.
>
> Original-by: Peter Feiner <pfeiner@google.com>
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
>  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
>  arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
>  include/linux/kvm_host.h               |  3 ++-
>  include/linux/kvm_types.h              |  1 +
>  virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
>  6 files changed, 34 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index c63e263312a4..e4aafa10efa1 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1122,8 +1122,10 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
>                  * since we always put huge-page HPTEs in the rmap chain
>                  * corresponding to their page base address.
>                  */
> -               if (npages)
> +               if (npages) {
>                         set_dirty_bits(map, i, npages);
> +                       kvm->stat.generic.dirty_pages += npages;
> +               }
>                 ++rmapp;
>         }
>         preempt_enable();
> @@ -1178,8 +1180,10 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
>         gfn = gpa >> PAGE_SHIFT;
>         srcu_idx = srcu_read_lock(&kvm->srcu);
>         memslot = gfn_to_memslot(kvm, gfn);
> -       if (memslot && memslot->dirty_bitmap)
> +       if (memslot && memslot->dirty_bitmap) {
>                 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
> +               ++kvm->stat.generic.dirty_pages;
> +       }
>         srcu_read_unlock(&kvm->srcu, srcu_idx);
>  }
>
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index b5905ae4377c..3a6cb3854a44 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -1150,6 +1150,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
>                 j = i + 1;
>                 if (npages) {
>                         set_dirty_bits(map, i, npages);
> +                       kvm->stat.generic.dirty_pages += npages;
>                         j = i + npages;
>                 }
>         }
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 632b2545072b..16806bc473fa 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -109,6 +109,7 @@ void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
>         npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
>         gfn -= memslot->base_gfn;
>         set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
> +       kvm->stat.generic.dirty_pages += npages;
>  }
>  EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f50bfcf225f0..1e8e66fb915b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1421,7 +1421,8 @@ struct _kvm_stats_desc {
>                 KVM_STATS_BASE_POW10, -9)
>
>  #define KVM_GENERIC_VM_STATS()                                                \
> -       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
> +       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),                      \
> +       STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
>
>  #define KVM_GENERIC_VCPU_STATS()                                              \
>         STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),                \
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index ed6a985c5680..6c05df00aebf 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -78,6 +78,7 @@ struct kvm_mmu_memory_cache {
>
>  struct kvm_vm_stat_generic {
>         u64 remote_tlb_flush;
> +       u64 dirty_pages;
>  };
>
>  struct kvm_vcpu_stat_generic {
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index a438a7a3774a..93f0ca2ea326 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1228,6 +1228,19 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
>         return 0;
>  }
>
> +static inline unsigned long hweight_dirty_bitmap(
> +                                               struct kvm_memory_slot *memslot)
> +{
> +       unsigned long i;
> +       unsigned long count = 0;
> +       unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> +
> +       for (i = 0; i < n / sizeof(long); ++i)
> +               count += hweight_long(memslot->dirty_bitmap[i]);
> +
> +       return count;
> +}
> +
>  /*
>   * Delete a memslot by decrementing the number of used slots and shifting all
>   * other entries in the array forward one spot.
> @@ -1612,6 +1625,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
>         if (r)
>                 return r;
>
> +       kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
>         kvm_free_memslot(kvm, old);
>         return 0;
>  }
> @@ -1733,8 +1747,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
>         if (r)
>                 goto out_bitmap;
>
> -       if (old.dirty_bitmap && !new.dirty_bitmap)
> +       if (old.dirty_bitmap && !new.dirty_bitmap) {
> +               kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
>                 kvm_destroy_dirty_bitmap(&old);
> +       }
>         return 0;
>
>  out_bitmap:
> @@ -1895,6 +1911,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
>                         offset = i * BITS_PER_LONG;
>                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
>                                                                 offset, mask);
> +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
>                 }
>                 KVM_MMU_UNLOCK(kvm);
>         }
> @@ -2012,6 +2029,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
>                         flush = true;
>                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
>                                                                 offset, mask);
> +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
>                 }
>         }
>         KVM_MMU_UNLOCK(kvm);
> @@ -3062,11 +3080,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
>                 unsigned long rel_gfn = gfn - memslot->base_gfn;
>                 u32 slot = (memslot->as_id << 16) | memslot->id;
>
> -               if (kvm->dirty_ring_size)
> +               if (kvm->dirty_ring_size) {
>                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
>                                             slot, rel_gfn);
> -               else
> +               } else {
>                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> +                       ++kvm->stat.generic.dirty_pages;

Couple of problems here:

- Calls to mark_page_dirty_in_slot aren't serialized by the mmu_lock,
so these updates will race.
- The page might already be dirty in the bitmap, so you're potentially
double counting here

You can fix both of these problems by changing set_bit_le to a
test_and_set_bit_le (might not be the function name -- but you get the
idea) and conditionally incrementing dirty_pages.


> +               }
>         }
>  }
>  EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
>
> base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb
> --
> 2.32.0.605.g8dce9f2422-goog
>
Jing Zhang Aug. 10, 2021, 10:51 p.m. UTC | #2
On Tue, Aug 10, 2021 at 3:45 PM Peter Feiner <pfeiner@google.com> wrote:
>
> On Tue, Aug 10, 2021 at 3:32 PM Jing Zhang <jingzhangos@google.com> wrote:
> >
> > Add a generic VM stats dirty_pages to record the number of dirty pages
> > reflected in dirty_bitmap at the moment.
> >
> > Original-by: Peter Feiner <pfeiner@google.com>
> > Signed-off-by: Jing Zhang <jingzhangos@google.com>
> > ---
> >  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
> >  arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
> >  arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
> >  include/linux/kvm_host.h               |  3 ++-
> >  include/linux/kvm_types.h              |  1 +
> >  virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
> >  6 files changed, 34 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > index c63e263312a4..e4aafa10efa1 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > @@ -1122,8 +1122,10 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
> >                  * since we always put huge-page HPTEs in the rmap chain
> >                  * corresponding to their page base address.
> >                  */
> > -               if (npages)
> > +               if (npages) {
> >                         set_dirty_bits(map, i, npages);
> > +                       kvm->stat.generic.dirty_pages += npages;
> > +               }
> >                 ++rmapp;
> >         }
> >         preempt_enable();
> > @@ -1178,8 +1180,10 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
> >         gfn = gpa >> PAGE_SHIFT;
> >         srcu_idx = srcu_read_lock(&kvm->srcu);
> >         memslot = gfn_to_memslot(kvm, gfn);
> > -       if (memslot && memslot->dirty_bitmap)
> > +       if (memslot && memslot->dirty_bitmap) {
> >                 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
> > +               ++kvm->stat.generic.dirty_pages;
> > +       }
> >         srcu_read_unlock(&kvm->srcu, srcu_idx);
> >  }
> >
> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > index b5905ae4377c..3a6cb3854a44 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > @@ -1150,6 +1150,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
> >                 j = i + 1;
> >                 if (npages) {
> >                         set_dirty_bits(map, i, npages);
> > +                       kvm->stat.generic.dirty_pages += npages;
> >                         j = i + npages;
> >                 }
> >         }
> > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > index 632b2545072b..16806bc473fa 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > @@ -109,6 +109,7 @@ void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
> >         npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
> >         gfn -= memslot->base_gfn;
> >         set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
> > +       kvm->stat.generic.dirty_pages += npages;
> >  }
> >  EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f50bfcf225f0..1e8e66fb915b 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -1421,7 +1421,8 @@ struct _kvm_stats_desc {
> >                 KVM_STATS_BASE_POW10, -9)
> >
> >  #define KVM_GENERIC_VM_STATS()                                                \
> > -       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
> > +       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),                      \
> > +       STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
> >
> >  #define KVM_GENERIC_VCPU_STATS()                                              \
> >         STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),                \
> > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > index ed6a985c5680..6c05df00aebf 100644
> > --- a/include/linux/kvm_types.h
> > +++ b/include/linux/kvm_types.h
> > @@ -78,6 +78,7 @@ struct kvm_mmu_memory_cache {
> >
> >  struct kvm_vm_stat_generic {
> >         u64 remote_tlb_flush;
> > +       u64 dirty_pages;
> >  };
> >
> >  struct kvm_vcpu_stat_generic {
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index a438a7a3774a..93f0ca2ea326 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -1228,6 +1228,19 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
> >         return 0;
> >  }
> >
> > +static inline unsigned long hweight_dirty_bitmap(
> > +                                               struct kvm_memory_slot *memslot)
> > +{
> > +       unsigned long i;
> > +       unsigned long count = 0;
> > +       unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> > +
> > +       for (i = 0; i < n / sizeof(long); ++i)
> > +               count += hweight_long(memslot->dirty_bitmap[i]);
> > +
> > +       return count;
> > +}
> > +
> >  /*
> >   * Delete a memslot by decrementing the number of used slots and shifting all
> >   * other entries in the array forward one spot.
> > @@ -1612,6 +1625,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
> >         if (r)
> >                 return r;
> >
> > +       kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
> >         kvm_free_memslot(kvm, old);
> >         return 0;
> >  }
> > @@ -1733,8 +1747,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
> >         if (r)
> >                 goto out_bitmap;
> >
> > -       if (old.dirty_bitmap && !new.dirty_bitmap)
> > +       if (old.dirty_bitmap && !new.dirty_bitmap) {
> > +               kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
> >                 kvm_destroy_dirty_bitmap(&old);
> > +       }
> >         return 0;
> >
> >  out_bitmap:
> > @@ -1895,6 +1911,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
> >                         offset = i * BITS_PER_LONG;
> >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> >                                                                 offset, mask);
> > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> >                 }
> >                 KVM_MMU_UNLOCK(kvm);
> >         }
> > @@ -2012,6 +2029,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
> >                         flush = true;
> >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> >                                                                 offset, mask);
> > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> >                 }
> >         }
> >         KVM_MMU_UNLOCK(kvm);
> > @@ -3062,11 +3080,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
> >                 unsigned long rel_gfn = gfn - memslot->base_gfn;
> >                 u32 slot = (memslot->as_id << 16) | memslot->id;
> >
> > -               if (kvm->dirty_ring_size)
> > +               if (kvm->dirty_ring_size) {
> >                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
> >                                             slot, rel_gfn);
> > -               else
> > +               } else {
> >                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> > +                       ++kvm->stat.generic.dirty_pages;
>
> Couple of problems here:
>
> - Calls to mark_page_dirty_in_slot aren't serialized by the mmu_lock,
> so these updates will race.
> - The page might already be dirty in the bitmap, so you're potentially
> double counting here
>
> You can fix both of these problems by changing set_bit_le to a
> test_and_set_bit_le (might not be the function name -- but you get the
> idea) and conditionally incrementing dirty_pages.
Thanks Peter, will do that.
>
>
> > +               }
> >         }
> >  }
> >  EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
> >
> > base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb
> > --
> > 2.32.0.605.g8dce9f2422-goog
> >
Jing
Oliver Upton Aug. 10, 2021, 10:56 p.m. UTC | #3
Hi Jing,

On Tue, Aug 10, 2021 at 3:32 PM Jing Zhang <jingzhangos@google.com> wrote:
>
> Add a generic VM stats dirty_pages to record the number of dirty pages
> reflected in dirty_bitmap at the moment.

There can be multiple dirty bitmaps in a VM, one per memslot.

> Original-by: Peter Feiner <pfeiner@google.com>
> Signed-off-by: Jing Zhang <jingzhangos@google.com>
> ---
>  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
>  arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
>  include/linux/kvm_host.h               |  3 ++-
>  include/linux/kvm_types.h              |  1 +
>  virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
>  6 files changed, 34 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index c63e263312a4..e4aafa10efa1 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1122,8 +1122,10 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
>                  * since we always put huge-page HPTEs in the rmap chain
>                  * corresponding to their page base address.
>                  */
> -               if (npages)
> +               if (npages) {
>                         set_dirty_bits(map, i, npages);
> +                       kvm->stat.generic.dirty_pages += npages;
> +               }
>                 ++rmapp;
>         }
>         preempt_enable();
> @@ -1178,8 +1180,10 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
>         gfn = gpa >> PAGE_SHIFT;
>         srcu_idx = srcu_read_lock(&kvm->srcu);
>         memslot = gfn_to_memslot(kvm, gfn);
> -       if (memslot && memslot->dirty_bitmap)
> +       if (memslot && memslot->dirty_bitmap) {
>                 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
> +               ++kvm->stat.generic.dirty_pages;
> +       }
>         srcu_read_unlock(&kvm->srcu, srcu_idx);
>  }
>
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> index b5905ae4377c..3a6cb3854a44 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> @@ -1150,6 +1150,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
>                 j = i + 1;
>                 if (npages) {
>                         set_dirty_bits(map, i, npages);
> +                       kvm->stat.generic.dirty_pages += npages;
>                         j = i + npages;
>                 }
>         }
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 632b2545072b..16806bc473fa 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -109,6 +109,7 @@ void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
>         npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
>         gfn -= memslot->base_gfn;
>         set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
> +       kvm->stat.generic.dirty_pages += npages;
>  }
>  EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f50bfcf225f0..1e8e66fb915b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1421,7 +1421,8 @@ struct _kvm_stats_desc {
>                 KVM_STATS_BASE_POW10, -9)
>
>  #define KVM_GENERIC_VM_STATS()                                                \
> -       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
> +       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),                      \
> +       STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
>
>  #define KVM_GENERIC_VCPU_STATS()                                              \
>         STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),                \
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index ed6a985c5680..6c05df00aebf 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -78,6 +78,7 @@ struct kvm_mmu_memory_cache {
>
>  struct kvm_vm_stat_generic {
>         u64 remote_tlb_flush;
> +       u64 dirty_pages;
>  };
>
>  struct kvm_vcpu_stat_generic {
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index a438a7a3774a..93f0ca2ea326 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1228,6 +1228,19 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
>         return 0;
>  }
>
> +static inline unsigned long hweight_dirty_bitmap(
> +                                               struct kvm_memory_slot *memslot)
> +{
> +       unsigned long i;
> +       unsigned long count = 0;
> +       unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> +
> +       for (i = 0; i < n / sizeof(long); ++i)
> +               count += hweight_long(memslot->dirty_bitmap[i]);
> +
> +       return count;
> +}

Hrm, this seems like a decent amount of work for a statistic.

> +
>  /*
>   * Delete a memslot by decrementing the number of used slots and shifting all
>   * other entries in the array forward one spot.
> @@ -1612,6 +1625,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
>         if (r)
>                 return r;
>
> +       kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
>         kvm_free_memslot(kvm, old);
>         return 0;
>  }
> @@ -1733,8 +1747,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
>         if (r)
>                 goto out_bitmap;
>
> -       if (old.dirty_bitmap && !new.dirty_bitmap)
> +       if (old.dirty_bitmap && !new.dirty_bitmap) {
> +               kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
>                 kvm_destroy_dirty_bitmap(&old);
> +       }

Races to increment by a few pages might be OK, so long as imprecision
is acceptable, but decrementing by an entire bitmap could cause the
stat to get waaay off from the state of the VM.

What if the statistic was 'dirtied_pages', which records the number of
pages dirtied in the lifetime of a VM? Userspace could just record the
value each time it blows away the dirty bitmaps and subtract that
value next time it reads the stat. It would circumvent the need to
walk the entire dirty bitmap to keep the statistic sane.

>         return 0;
>
>  out_bitmap:
> @@ -1895,6 +1911,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
>                         offset = i * BITS_PER_LONG;
>                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
>                                                                 offset, mask);
> +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
>                 }
>                 KVM_MMU_UNLOCK(kvm);
>         }
> @@ -2012,6 +2029,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
>                         flush = true;
>                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
>                                                                 offset, mask);
> +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
>                 }
>         }
>         KVM_MMU_UNLOCK(kvm);
> @@ -3062,11 +3080,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
>                 unsigned long rel_gfn = gfn - memslot->base_gfn;
>                 u32 slot = (memslot->as_id << 16) | memslot->id;
>
> -               if (kvm->dirty_ring_size)
> +               if (kvm->dirty_ring_size) {
>                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
>                                             slot, rel_gfn);
> -               else
> +               } else {
>                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> +                       ++kvm->stat.generic.dirty_pages;
> +               }

Aren't pages being pushed out to the dirty ring just as dirty? :-)

>         }
>  }
>  EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
>
> base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb
> --
> 2.32.0.605.g8dce9f2422-goog
>
Peter Feiner Aug. 10, 2021, 11:15 p.m. UTC | #4
On Tue, Aug 10, 2021 at 3:56 PM Oliver Upton <oupton@google.com> wrote:
>
> Hi Jing,
>
> On Tue, Aug 10, 2021 at 3:32 PM Jing Zhang <jingzhangos@google.com> wrote:
> >
> > Add a generic VM stats dirty_pages to record the number of dirty pages
> > reflected in dirty_bitmap at the moment.
>
> There can be multiple dirty bitmaps in a VM, one per memslot.
>
> > Original-by: Peter Feiner <pfeiner@google.com>
> > Signed-off-by: Jing Zhang <jingzhangos@google.com>
> > ---
> >  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
> >  arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
> >  arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
> >  include/linux/kvm_host.h               |  3 ++-
> >  include/linux/kvm_types.h              |  1 +
> >  virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
> >  6 files changed, 34 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > index c63e263312a4..e4aafa10efa1 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > @@ -1122,8 +1122,10 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
> >                  * since we always put huge-page HPTEs in the rmap chain
> >                  * corresponding to their page base address.
> >                  */
> > -               if (npages)
> > +               if (npages) {
> >                         set_dirty_bits(map, i, npages);
> > +                       kvm->stat.generic.dirty_pages += npages;
> > +               }
> >                 ++rmapp;
> >         }
> >         preempt_enable();
> > @@ -1178,8 +1180,10 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
> >         gfn = gpa >> PAGE_SHIFT;
> >         srcu_idx = srcu_read_lock(&kvm->srcu);
> >         memslot = gfn_to_memslot(kvm, gfn);
> > -       if (memslot && memslot->dirty_bitmap)
> > +       if (memslot && memslot->dirty_bitmap) {
> >                 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
> > +               ++kvm->stat.generic.dirty_pages;
> > +       }
> >         srcu_read_unlock(&kvm->srcu, srcu_idx);
> >  }
> >
> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > index b5905ae4377c..3a6cb3854a44 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > @@ -1150,6 +1150,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
> >                 j = i + 1;
> >                 if (npages) {
> >                         set_dirty_bits(map, i, npages);
> > +                       kvm->stat.generic.dirty_pages += npages;
> >                         j = i + npages;
> >                 }
> >         }
> > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > index 632b2545072b..16806bc473fa 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > @@ -109,6 +109,7 @@ void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
> >         npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
> >         gfn -= memslot->base_gfn;
> >         set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
> > +       kvm->stat.generic.dirty_pages += npages;
> >  }
> >  EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f50bfcf225f0..1e8e66fb915b 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -1421,7 +1421,8 @@ struct _kvm_stats_desc {
> >                 KVM_STATS_BASE_POW10, -9)
> >
> >  #define KVM_GENERIC_VM_STATS()                                                \
> > -       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
> > +       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),                      \
> > +       STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
> >
> >  #define KVM_GENERIC_VCPU_STATS()                                              \
> >         STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),                \
> > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > index ed6a985c5680..6c05df00aebf 100644
> > --- a/include/linux/kvm_types.h
> > +++ b/include/linux/kvm_types.h
> > @@ -78,6 +78,7 @@ struct kvm_mmu_memory_cache {
> >
> >  struct kvm_vm_stat_generic {
> >         u64 remote_tlb_flush;
> > +       u64 dirty_pages;
> >  };
> >
> >  struct kvm_vcpu_stat_generic {
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index a438a7a3774a..93f0ca2ea326 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -1228,6 +1228,19 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
> >         return 0;
> >  }
> >
> > +static inline unsigned long hweight_dirty_bitmap(
> > +                                               struct kvm_memory_slot *memslot)
> > +{
> > +       unsigned long i;
> > +       unsigned long count = 0;
> > +       unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> > +
> > +       for (i = 0; i < n / sizeof(long); ++i)
> > +               count += hweight_long(memslot->dirty_bitmap[i]);
> > +
> > +       return count;
> > +}
>
> Hrm, this seems like a decent amount of work for a statistic.
>
> > +
> >  /*
> >   * Delete a memslot by decrementing the number of used slots and shifting all
> >   * other entries in the array forward one spot.
> > @@ -1612,6 +1625,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
> >         if (r)
> >                 return r;
> >
> > +       kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
> >         kvm_free_memslot(kvm, old);
> >         return 0;
> >  }
> > @@ -1733,8 +1747,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
> >         if (r)
> >                 goto out_bitmap;
> >
> > -       if (old.dirty_bitmap && !new.dirty_bitmap)
> > +       if (old.dirty_bitmap && !new.dirty_bitmap) {
> > +               kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
> >                 kvm_destroy_dirty_bitmap(&old);
> > +       }
>
> Races to increment by a few pages might be OK, so long as imprecision
> is acceptable, but decrementing by an entire bitmap could cause the
> stat to get waaay off from the state of the VM.

My original use case was to know the rate at which memory was being
dirtied to predict how expensive a live migration would be. I didn't
need full precision, but I would have needed a bound on the slop. A
"few pages" isn't a bound :-)

IMO, this patch isn't worth figuring out without a use case. It's
complex and has perf overhead. Maybe just drop it?

>
> What if the statistic was 'dirtied_pages', which records the number of
> pages dirtied in the lifetime of a VM? Userspace could just record the
> value each time it blows away the dirty bitmaps and subtract that
> value next time it reads the stat. It would circumvent the need to
> walk the entire dirty bitmap to keep the statistic sane.
>
> >         return 0;
> >
> >  out_bitmap:
> > @@ -1895,6 +1911,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
> >                         offset = i * BITS_PER_LONG;
> >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> >                                                                 offset, mask);
> > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> >                 }
> >                 KVM_MMU_UNLOCK(kvm);
> >         }
> > @@ -2012,6 +2029,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
> >                         flush = true;
> >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> >                                                                 offset, mask);
> > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> >                 }
> >         }
> >         KVM_MMU_UNLOCK(kvm);
> > @@ -3062,11 +3080,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
> >                 unsigned long rel_gfn = gfn - memslot->base_gfn;
> >                 u32 slot = (memslot->as_id << 16) | memslot->id;
> >
> > -               if (kvm->dirty_ring_size)
> > +               if (kvm->dirty_ring_size) {
> >                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
> >                                             slot, rel_gfn);
> > -               else
> > +               } else {
> >                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> > +                       ++kvm->stat.generic.dirty_pages;
> > +               }
>
> Aren't pages being pushed out to the dirty ring just as dirty? :-)
>
> >         }
> >  }
> >  EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
> >
> > base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb
> > --
> > 2.32.0.605.g8dce9f2422-goog
> >
Oliver Upton Aug. 10, 2021, 11:26 p.m. UTC | #5
On Tue, Aug 10, 2021 at 4:15 PM Peter Feiner <pfeiner@google.com> wrote:
> > Races to increment by a few pages might be OK, so long as imprecision
> > is acceptable, but decrementing by an entire bitmap could cause the
> > stat to get waaay off from the state of the VM.
>
> My original use case was to know the rate at which memory was being
> dirtied to predict how expensive a live migration would be. I didn't
> need full precision, but I would have needed a bound on the slop. A
> "few pages" isn't a bound :-)

I think the agreement with VM-scoped statistics is that slop is OK,
better than paying the cost of locking/atomics. If we want to be
exact, it'd have to be a vCPU stat.

> IMO, this patch isn't worth figuring out without a use case. It's
> complex and has perf overhead. Maybe just drop it?

Knowing the approximate rate at which pages are being dirtied would be
a nice-to-have for debugging, IMO. Just treating this stat as
monotonic would greatly simplify it and avoid the overhead.

--
Thanks,
Oliver
Paolo Bonzini Aug. 11, 2021, 10:20 a.m. UTC | #6
On 11/08/21 00:56, Oliver Upton wrote:
> What if the statistic was 'dirtied_pages', which records the number of
> pages dirtied in the lifetime of a VM? Userspace could just record the
> value each time it blows away the dirty bitmaps and subtract that
> value next time it reads the stat. It would circumvent the need to
> walk the entire dirty bitmap to keep the statistic sane.

Yeah, that'd be much better also because the "number of dirty pages" 
statistic is not well defined in init-all-dirty mode.

Making it a vCPU stat works in fact, because mark_page_dirty_in_slot is 
only called with kvm_get_running_vcpu() != NULL; see 
kvm_dirty_ring_get() in virt/kvm/dirty_ring.c.

>>
>> +               if (kvm->dirty_ring_size) {
>>                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
>>                                             slot, rel_gfn);
>> -               else
>> +               } else {
>>                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
>> +                       ++kvm->stat.generic.dirty_pages;
>> +               }
> 
> Aren't pages being pushed out to the dirty ring just as dirty? 
> 

Yes, they are.

Paolo
Jing Zhang Aug. 11, 2021, 8:14 p.m. UTC | #7
On Wed, Aug 11, 2021 at 3:20 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 11/08/21 00:56, Oliver Upton wrote:
> > What if the statistic was 'dirtied_pages', which records the number of
> > pages dirtied in the lifetime of a VM? Userspace could just record the
> > value each time it blows away the dirty bitmaps and subtract that
> > value next time it reads the stat. It would circumvent the need to
> > walk the entire dirty bitmap to keep the statistic sane.
>
> Yeah, that'd be much better also because the "number of dirty pages"
> statistic is not well defined in init-all-dirty mode.
>
> Making it a vCPU stat works in fact, because mark_page_dirty_in_slot is
> only called with kvm_get_running_vcpu() != NULL; see
> kvm_dirty_ring_get() in virt/kvm/dirty_ring.c.
>
> >>
> >> +               if (kvm->dirty_ring_size) {
> >>                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
> >>                                             slot, rel_gfn);
> >> -               else
> >> +               } else {
> >>                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> >> +                       ++kvm->stat.generic.dirty_pages;
> >> +               }
> >
> > Aren't pages being pushed out to the dirty ring just as dirty?
> >
>
> Yes, they are.
>
> Paolo
>
According to Oliver's idea, let's define the "dirty_pages" as the
number of dirtied pages during the life cycle of a VM to avoid the
overhead caused by walking the entire dirty_bitmap.
I didn't consider the dirty ring in this patch, but will do in the next.
Will still define dirty_pages as a VM scoped stat in the next patch.
From there, we will see if it is necessary to define it as a VCPU
scoped stat. (Both KVM code and user code would be simpler with it
being as a VM scoped stat).

Thanks,
Jing
Jing Zhang Aug. 11, 2021, 10:52 p.m. UTC | #8
Hi Peter,

On Tue, Aug 10, 2021 at 3:45 PM Peter Feiner <pfeiner@google.com> wrote:
>
> On Tue, Aug 10, 2021 at 3:32 PM Jing Zhang <jingzhangos@google.com> wrote:
> >
> > Add a generic VM stats dirty_pages to record the number of dirty pages
> > reflected in dirty_bitmap at the moment.
> >
> > Original-by: Peter Feiner <pfeiner@google.com>
> > Signed-off-by: Jing Zhang <jingzhangos@google.com>
> > ---
> >  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
> >  arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
> >  arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
> >  include/linux/kvm_host.h               |  3 ++-
> >  include/linux/kvm_types.h              |  1 +
> >  virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
> >  6 files changed, 34 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > index c63e263312a4..e4aafa10efa1 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > @@ -1122,8 +1122,10 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
> >                  * since we always put huge-page HPTEs in the rmap chain
> >                  * corresponding to their page base address.
> >                  */
> > -               if (npages)
> > +               if (npages) {
> >                         set_dirty_bits(map, i, npages);
> > +                       kvm->stat.generic.dirty_pages += npages;
> > +               }
> >                 ++rmapp;
> >         }
> >         preempt_enable();
> > @@ -1178,8 +1180,10 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
> >         gfn = gpa >> PAGE_SHIFT;
> >         srcu_idx = srcu_read_lock(&kvm->srcu);
> >         memslot = gfn_to_memslot(kvm, gfn);
> > -       if (memslot && memslot->dirty_bitmap)
> > +       if (memslot && memslot->dirty_bitmap) {
> >                 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
> > +               ++kvm->stat.generic.dirty_pages;
> > +       }
> >         srcu_read_unlock(&kvm->srcu, srcu_idx);
> >  }
> >
> > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > index b5905ae4377c..3a6cb3854a44 100644
> > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > @@ -1150,6 +1150,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
> >                 j = i + 1;
> >                 if (npages) {
> >                         set_dirty_bits(map, i, npages);
> > +                       kvm->stat.generic.dirty_pages += npages;
> >                         j = i + npages;
> >                 }
> >         }
> > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > index 632b2545072b..16806bc473fa 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > @@ -109,6 +109,7 @@ void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
> >         npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
> >         gfn -= memslot->base_gfn;
> >         set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
> > +       kvm->stat.generic.dirty_pages += npages;
> >  }
> >  EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f50bfcf225f0..1e8e66fb915b 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -1421,7 +1421,8 @@ struct _kvm_stats_desc {
> >                 KVM_STATS_BASE_POW10, -9)
> >
> >  #define KVM_GENERIC_VM_STATS()                                                \
> > -       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
> > +       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),                      \
> > +       STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
> >
> >  #define KVM_GENERIC_VCPU_STATS()                                              \
> >         STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),                \
> > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > index ed6a985c5680..6c05df00aebf 100644
> > --- a/include/linux/kvm_types.h
> > +++ b/include/linux/kvm_types.h
> > @@ -78,6 +78,7 @@ struct kvm_mmu_memory_cache {
> >
> >  struct kvm_vm_stat_generic {
> >         u64 remote_tlb_flush;
> > +       u64 dirty_pages;
> >  };
> >
> >  struct kvm_vcpu_stat_generic {
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index a438a7a3774a..93f0ca2ea326 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -1228,6 +1228,19 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
> >         return 0;
> >  }
> >
> > +static inline unsigned long hweight_dirty_bitmap(
> > +                                               struct kvm_memory_slot *memslot)
> > +{
> > +       unsigned long i;
> > +       unsigned long count = 0;
> > +       unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> > +
> > +       for (i = 0; i < n / sizeof(long); ++i)
> > +               count += hweight_long(memslot->dirty_bitmap[i]);
> > +
> > +       return count;
> > +}
> > +
> >  /*
> >   * Delete a memslot by decrementing the number of used slots and shifting all
> >   * other entries in the array forward one spot.
> > @@ -1612,6 +1625,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
> >         if (r)
> >                 return r;
> >
> > +       kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
> >         kvm_free_memslot(kvm, old);
> >         return 0;
> >  }
> > @@ -1733,8 +1747,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
> >         if (r)
> >                 goto out_bitmap;
> >
> > -       if (old.dirty_bitmap && !new.dirty_bitmap)
> > +       if (old.dirty_bitmap && !new.dirty_bitmap) {
> > +               kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
> >                 kvm_destroy_dirty_bitmap(&old);
> > +       }
> >         return 0;
> >
> >  out_bitmap:
> > @@ -1895,6 +1911,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
> >                         offset = i * BITS_PER_LONG;
> >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> >                                                                 offset, mask);
> > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> >                 }
> >                 KVM_MMU_UNLOCK(kvm);
> >         }
> > @@ -2012,6 +2029,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
> >                         flush = true;
> >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> >                                                                 offset, mask);
> > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> >                 }
> >         }
> >         KVM_MMU_UNLOCK(kvm);
> > @@ -3062,11 +3080,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
> >                 unsigned long rel_gfn = gfn - memslot->base_gfn;
> >                 u32 slot = (memslot->as_id << 16) | memslot->id;
> >
> > -               if (kvm->dirty_ring_size)
> > +               if (kvm->dirty_ring_size) {
> >                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
> >                                             slot, rel_gfn);
> > -               else
> > +               } else {
> >                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> > +                       ++kvm->stat.generic.dirty_pages;
>
> Couple of problems here:
>
> - Calls to mark_page_dirty_in_slot aren't serialized by the mmu_lock,
> so these updates will race.
> - The page might already be dirty in the bitmap, so you're potentially
> double counting here
>
> You can fix both of these problems by changing set_bit_le to a
> test_and_set_bit_le (might not be the function name -- but you get the
> idea) and conditionally incrementing dirty_pages.
>
>
Since we need to consider the dirty ring situation and it is not
trivial to check if the page is already in the dirty ring, I'll not
change set_bit_le. Anyway, we are going to define the stat
"dirty_pages" as dirtied pages in the life cycle of a VM, we only care
about its growth rate for your use case.
I'll send out another version of the patch and we'll see if it is good enough.
> > +               }
> >         }
> >  }
> >  EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
> >
> > base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb
> > --
> > 2.32.0.605.g8dce9f2422-goog
> >

Thanks,
Jing
Peter Feiner Aug. 11, 2021, 11:19 p.m. UTC | #9
On Wed, Aug 11, 2021 at 3:53 PM Jing Zhang <jingzhangos@google.com> wrote:
>
> Hi Peter,
>
> On Tue, Aug 10, 2021 at 3:45 PM Peter Feiner <pfeiner@google.com> wrote:
> >
> > On Tue, Aug 10, 2021 at 3:32 PM Jing Zhang <jingzhangos@google.com> wrote:
> > >
> > > Add a generic VM stats dirty_pages to record the number of dirty pages
> > > reflected in dirty_bitmap at the moment.
> > >
> > > Original-by: Peter Feiner <pfeiner@google.com>
> > > Signed-off-by: Jing Zhang <jingzhangos@google.com>
> > > ---
> > >  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
> > >  arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
> > >  arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
> > >  include/linux/kvm_host.h               |  3 ++-
> > >  include/linux/kvm_types.h              |  1 +
> > >  virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
> > >  6 files changed, 34 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > > index c63e263312a4..e4aafa10efa1 100644
> > > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > > @@ -1122,8 +1122,10 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
> > >                  * since we always put huge-page HPTEs in the rmap chain
> > >                  * corresponding to their page base address.
> > >                  */
> > > -               if (npages)
> > > +               if (npages) {
> > >                         set_dirty_bits(map, i, npages);
> > > +                       kvm->stat.generic.dirty_pages += npages;
> > > +               }
> > >                 ++rmapp;
> > >         }
> > >         preempt_enable();
> > > @@ -1178,8 +1180,10 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
> > >         gfn = gpa >> PAGE_SHIFT;
> > >         srcu_idx = srcu_read_lock(&kvm->srcu);
> > >         memslot = gfn_to_memslot(kvm, gfn);
> > > -       if (memslot && memslot->dirty_bitmap)
> > > +       if (memslot && memslot->dirty_bitmap) {
> > >                 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
> > > +               ++kvm->stat.generic.dirty_pages;
> > > +       }
> > >         srcu_read_unlock(&kvm->srcu, srcu_idx);
> > >  }
> > >
> > > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > > index b5905ae4377c..3a6cb3854a44 100644
> > > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > > @@ -1150,6 +1150,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
> > >                 j = i + 1;
> > >                 if (npages) {
> > >                         set_dirty_bits(map, i, npages);
> > > +                       kvm->stat.generic.dirty_pages += npages;
> > >                         j = i + npages;
> > >                 }
> > >         }
> > > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > index 632b2545072b..16806bc473fa 100644
> > > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > @@ -109,6 +109,7 @@ void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
> > >         npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
> > >         gfn -= memslot->base_gfn;
> > >         set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
> > > +       kvm->stat.generic.dirty_pages += npages;
> > >  }
> > >  EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index f50bfcf225f0..1e8e66fb915b 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -1421,7 +1421,8 @@ struct _kvm_stats_desc {
> > >                 KVM_STATS_BASE_POW10, -9)
> > >
> > >  #define KVM_GENERIC_VM_STATS()                                                \
> > > -       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
> > > +       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),                      \
> > > +       STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
> > >
> > >  #define KVM_GENERIC_VCPU_STATS()                                              \
> > >         STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),                \
> > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > > index ed6a985c5680..6c05df00aebf 100644
> > > --- a/include/linux/kvm_types.h
> > > +++ b/include/linux/kvm_types.h
> > > @@ -78,6 +78,7 @@ struct kvm_mmu_memory_cache {
> > >
> > >  struct kvm_vm_stat_generic {
> > >         u64 remote_tlb_flush;
> > > +       u64 dirty_pages;
> > >  };
> > >
> > >  struct kvm_vcpu_stat_generic {
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index a438a7a3774a..93f0ca2ea326 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -1228,6 +1228,19 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
> > >         return 0;
> > >  }
> > >
> > > +static inline unsigned long hweight_dirty_bitmap(
> > > +                                               struct kvm_memory_slot *memslot)
> > > +{
> > > +       unsigned long i;
> > > +       unsigned long count = 0;
> > > +       unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> > > +
> > > +       for (i = 0; i < n / sizeof(long); ++i)
> > > +               count += hweight_long(memslot->dirty_bitmap[i]);
> > > +
> > > +       return count;
> > > +}
> > > +
> > >  /*
> > >   * Delete a memslot by decrementing the number of used slots and shifting all
> > >   * other entries in the array forward one spot.
> > > @@ -1612,6 +1625,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
> > >         if (r)
> > >                 return r;
> > >
> > > +       kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
> > >         kvm_free_memslot(kvm, old);
> > >         return 0;
> > >  }
> > > @@ -1733,8 +1747,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
> > >         if (r)
> > >                 goto out_bitmap;
> > >
> > > -       if (old.dirty_bitmap && !new.dirty_bitmap)
> > > +       if (old.dirty_bitmap && !new.dirty_bitmap) {
> > > +               kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
> > >                 kvm_destroy_dirty_bitmap(&old);
> > > +       }
> > >         return 0;
> > >
> > >  out_bitmap:
> > > @@ -1895,6 +1911,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
> > >                         offset = i * BITS_PER_LONG;
> > >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> > >                                                                 offset, mask);
> > > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> > >                 }
> > >                 KVM_MMU_UNLOCK(kvm);
> > >         }
> > > @@ -2012,6 +2029,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
> > >                         flush = true;
> > >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> > >                                                                 offset, mask);
> > > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> > >                 }
> > >         }
> > >         KVM_MMU_UNLOCK(kvm);
> > > @@ -3062,11 +3080,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
> > >                 unsigned long rel_gfn = gfn - memslot->base_gfn;
> > >                 u32 slot = (memslot->as_id << 16) | memslot->id;
> > >
> > > -               if (kvm->dirty_ring_size)
> > > +               if (kvm->dirty_ring_size) {
> > >                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
> > >                                             slot, rel_gfn);
> > > -               else
> > > +               } else {
> > >                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> > > +                       ++kvm->stat.generic.dirty_pages;
> >
> > Couple of problems here:
> >
> > - Calls to mark_page_dirty_in_slot aren't serialized by the mmu_lock,
> > so these updates will race.
> > - The page might already be dirty in the bitmap, so you're potentially
> > double counting here
> >
> > You can fix both of these problems by changing set_bit_le to a
> > test_and_set_bit_le (might not be the function name -- but you get the
> > idea) and conditionally incrementing dirty_pages.
> >
> >
> Since we need to consider the dirty ring situation and it is not
> trivial to check if the page is already in the dirty ring, I'll not
> change set_bit_le. Anyway, we are going to define the stat
> "dirty_pages" as dirtied pages in the life cycle of a VM, we only care
> about its growth rate for your use case.

To be clear, I don't have a use case anymore. It was years ago that I
came up with the idea of doing a kind of light memory dirty tracking
to predict how expensive pre-copy migration was going to be. However,
I didn't follow through on the idea; so if this never saw the light of
day in an upstream kernel, I'd be no worse for wear.

With that said, the use case I imagined did require knowing how many
pages were in the dirty bitmap at any moment. The number of pages
dirtied across the lifetime of the VM wouldn't have served that
purpose. So unless you have a use case for this new stat that's been
thought through end-to-end, I would suggest just dropping this patch
here (and from Google's kernel fork as well).



> I'll send out another version of the patch and we'll see if it is good enough.
> > > +               }
> > >         }
> > >  }
> > >  EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
> > >
> > > base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb
> > > --
> > > 2.32.0.605.g8dce9f2422-goog
> > >
>
> Thanks,
> Jing
Jing Zhang Aug. 11, 2021, 11:25 p.m. UTC | #10
On Wed, Aug 11, 2021 at 4:19 PM Peter Feiner <pfeiner@google.com> wrote:
>
> On Wed, Aug 11, 2021 at 3:53 PM Jing Zhang <jingzhangos@google.com> wrote:
> >
> > Hi Peter,
> >
> > On Tue, Aug 10, 2021 at 3:45 PM Peter Feiner <pfeiner@google.com> wrote:
> > >
> > > On Tue, Aug 10, 2021 at 3:32 PM Jing Zhang <jingzhangos@google.com> wrote:
> > > >
> > > > Add a generic VM stats dirty_pages to record the number of dirty pages
> > > > reflected in dirty_bitmap at the moment.
> > > >
> > > > Original-by: Peter Feiner <pfeiner@google.com>
> > > > Signed-off-by: Jing Zhang <jingzhangos@google.com>
> > > > ---
> > > >  arch/powerpc/kvm/book3s_64_mmu_hv.c    |  8 ++++++--
> > > >  arch/powerpc/kvm/book3s_64_mmu_radix.c |  1 +
> > > >  arch/powerpc/kvm/book3s_hv_rm_mmu.c    |  1 +
> > > >  include/linux/kvm_host.h               |  3 ++-
> > > >  include/linux/kvm_types.h              |  1 +
> > > >  virt/kvm/kvm_main.c                    | 26 +++++++++++++++++++++++---
> > > >  6 files changed, 34 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > > > index c63e263312a4..e4aafa10efa1 100644
> > > > --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > > > +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> > > > @@ -1122,8 +1122,10 @@ long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
> > > >                  * since we always put huge-page HPTEs in the rmap chain
> > > >                  * corresponding to their page base address.
> > > >                  */
> > > > -               if (npages)
> > > > +               if (npages) {
> > > >                         set_dirty_bits(map, i, npages);
> > > > +                       kvm->stat.generic.dirty_pages += npages;
> > > > +               }
> > > >                 ++rmapp;
> > > >         }
> > > >         preempt_enable();
> > > > @@ -1178,8 +1180,10 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
> > > >         gfn = gpa >> PAGE_SHIFT;
> > > >         srcu_idx = srcu_read_lock(&kvm->srcu);
> > > >         memslot = gfn_to_memslot(kvm, gfn);
> > > > -       if (memslot && memslot->dirty_bitmap)
> > > > +       if (memslot && memslot->dirty_bitmap) {
> > > >                 set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
> > > > +               ++kvm->stat.generic.dirty_pages;
> > > > +       }
> > > >         srcu_read_unlock(&kvm->srcu, srcu_idx);
> > > >  }
> > > >
> > > > diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > > > index b5905ae4377c..3a6cb3854a44 100644
> > > > --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > > > +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
> > > > @@ -1150,6 +1150,7 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
> > > >                 j = i + 1;
> > > >                 if (npages) {
> > > >                         set_dirty_bits(map, i, npages);
> > > > +                       kvm->stat.generic.dirty_pages += npages;
> > > >                         j = i + npages;
> > > >                 }
> > > >         }
> > > > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > > index 632b2545072b..16806bc473fa 100644
> > > > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > > > @@ -109,6 +109,7 @@ void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
> > > >         npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
> > > >         gfn -= memslot->base_gfn;
> > > >         set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
> > > > +       kvm->stat.generic.dirty_pages += npages;
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
> > > >
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index f50bfcf225f0..1e8e66fb915b 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -1421,7 +1421,8 @@ struct _kvm_stats_desc {
> > > >                 KVM_STATS_BASE_POW10, -9)
> > > >
> > > >  #define KVM_GENERIC_VM_STATS()                                                \
> > > > -       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
> > > > +       STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),                      \
> > > > +       STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
> > > >
> > > >  #define KVM_GENERIC_VCPU_STATS()                                              \
> > > >         STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),                \
> > > > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > > > index ed6a985c5680..6c05df00aebf 100644
> > > > --- a/include/linux/kvm_types.h
> > > > +++ b/include/linux/kvm_types.h
> > > > @@ -78,6 +78,7 @@ struct kvm_mmu_memory_cache {
> > > >
> > > >  struct kvm_vm_stat_generic {
> > > >         u64 remote_tlb_flush;
> > > > +       u64 dirty_pages;
> > > >  };
> > > >
> > > >  struct kvm_vcpu_stat_generic {
> > > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > > index a438a7a3774a..93f0ca2ea326 100644
> > > > --- a/virt/kvm/kvm_main.c
> > > > +++ b/virt/kvm/kvm_main.c
> > > > @@ -1228,6 +1228,19 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
> > > >         return 0;
> > > >  }
> > > >
> > > > +static inline unsigned long hweight_dirty_bitmap(
> > > > +                                               struct kvm_memory_slot *memslot)
> > > > +{
> > > > +       unsigned long i;
> > > > +       unsigned long count = 0;
> > > > +       unsigned long n = kvm_dirty_bitmap_bytes(memslot);
> > > > +
> > > > +       for (i = 0; i < n / sizeof(long); ++i)
> > > > +               count += hweight_long(memslot->dirty_bitmap[i]);
> > > > +
> > > > +       return count;
> > > > +}
> > > > +
> > > >  /*
> > > >   * Delete a memslot by decrementing the number of used slots and shifting all
> > > >   * other entries in the array forward one spot.
> > > > @@ -1612,6 +1625,7 @@ static int kvm_delete_memslot(struct kvm *kvm,
> > > >         if (r)
> > > >                 return r;
> > > >
> > > > +       kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
> > > >         kvm_free_memslot(kvm, old);
> > > >         return 0;
> > > >  }
> > > > @@ -1733,8 +1747,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
> > > >         if (r)
> > > >                 goto out_bitmap;
> > > >
> > > > -       if (old.dirty_bitmap && !new.dirty_bitmap)
> > > > +       if (old.dirty_bitmap && !new.dirty_bitmap) {
> > > > +               kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
> > > >                 kvm_destroy_dirty_bitmap(&old);
> > > > +       }
> > > >         return 0;
> > > >
> > > >  out_bitmap:
> > > > @@ -1895,6 +1911,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
> > > >                         offset = i * BITS_PER_LONG;
> > > >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> > > >                                                                 offset, mask);
> > > > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> > > >                 }
> > > >                 KVM_MMU_UNLOCK(kvm);
> > > >         }
> > > > @@ -2012,6 +2029,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
> > > >                         flush = true;
> > > >                         kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
> > > >                                                                 offset, mask);
> > > > +                       kvm->stat.generic.dirty_pages -= hweight_long(mask);
> > > >                 }
> > > >         }
> > > >         KVM_MMU_UNLOCK(kvm);
> > > > @@ -3062,11 +3080,13 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
> > > >                 unsigned long rel_gfn = gfn - memslot->base_gfn;
> > > >                 u32 slot = (memslot->as_id << 16) | memslot->id;
> > > >
> > > > -               if (kvm->dirty_ring_size)
> > > > +               if (kvm->dirty_ring_size) {
> > > >                         kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
> > > >                                             slot, rel_gfn);
> > > > -               else
> > > > +               } else {
> > > >                         set_bit_le(rel_gfn, memslot->dirty_bitmap);
> > > > +                       ++kvm->stat.generic.dirty_pages;
> > >
> > > Couple of problems here:
> > >
> > > - Calls to mark_page_dirty_in_slot aren't serialized by the mmu_lock,
> > > so these updates will race.
> > > - The page might already be dirty in the bitmap, so you're potentially
> > > double counting here
> > >
> > > You can fix both of these problems by changing set_bit_le to a
> > > test_and_set_bit_le (might not be the function name -- but you get the
> > > idea) and conditionally incrementing dirty_pages.
> > >
> > >
> > Since we need to consider the dirty ring situation and it is not
> > trivial to check if the page is already in the dirty ring, I'll not
> > change set_bit_le. Anyway, we are going to define the stat
> > "dirty_pages" as dirtied pages in the life cycle of a VM, we only care
> > about its growth rate for your use case.
>
> To be clear, I don't have a use case anymore. It was years ago that I
> came up with the idea of doing a kind of light memory dirty tracking
> to predict how expensive pre-copy migration was going to be. However,
> I didn't follow through on the idea; so if this never saw the light of
> day in an upstream kernel, I'd be no worse for wear.
>
> With that said, the use case I imagined did require knowing how many
> pages were in the dirty bitmap at any moment. The number of pages
> dirtied across the lifetime of the VM wouldn't have served that
> purpose. So unless you have a use case for this new stat that's been
> thought through end-to-end, I would suggest just dropping this patch
> here (and from Google's kernel fork as well).
Thanks for the clarification, Peter.
I will send out another patch anyway according to Oliver's idea. We'll
see how we can proceed from there.
>
>
>
> > I'll send out another version of the patch and we'll see if it is good enough.
> > > > +               }
> > > >         }
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);
> > > >
> > > > base-commit: d0732b0f8884d9cc0eca0082bbaef043f3fef7fb
> > > > --
> > > > 2.32.0.605.g8dce9f2422-goog
> > > >
> >
> > Thanks,
> > Jing
diff mbox series

Patch

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c63e263312a4..e4aafa10efa1 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1122,8 +1122,10 @@  long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
 		 * since we always put huge-page HPTEs in the rmap chain
 		 * corresponding to their page base address.
 		 */
-		if (npages)
+		if (npages) {
 			set_dirty_bits(map, i, npages);
+			kvm->stat.generic.dirty_pages += npages;
+		}
 		++rmapp;
 	}
 	preempt_enable();
@@ -1178,8 +1180,10 @@  void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
 	gfn = gpa >> PAGE_SHIFT;
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	memslot = gfn_to_memslot(kvm, gfn);
-	if (memslot && memslot->dirty_bitmap)
+	if (memslot && memslot->dirty_bitmap) {
 		set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
+		++kvm->stat.generic.dirty_pages;
+	}
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 }
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index b5905ae4377c..3a6cb3854a44 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -1150,6 +1150,7 @@  long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
 		j = i + 1;
 		if (npages) {
 			set_dirty_bits(map, i, npages);
+			kvm->stat.generic.dirty_pages += npages;
 			j = i + npages;
 		}
 	}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 632b2545072b..16806bc473fa 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -109,6 +109,7 @@  void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
 	npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
 	gfn -= memslot->base_gfn;
 	set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
+	kvm->stat.generic.dirty_pages += npages;
 }
 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f50bfcf225f0..1e8e66fb915b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1421,7 +1421,8 @@  struct _kvm_stats_desc {
 		KVM_STATS_BASE_POW10, -9)
 
 #define KVM_GENERIC_VM_STATS()						       \
-	STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
+	STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush),		       \
+	STATS_DESC_COUNTER(VM_GENERIC, dirty_pages)
 
 #define KVM_GENERIC_VCPU_STATS()					       \
 	STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),		       \
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index ed6a985c5680..6c05df00aebf 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -78,6 +78,7 @@  struct kvm_mmu_memory_cache {
 
 struct kvm_vm_stat_generic {
 	u64 remote_tlb_flush;
+	u64 dirty_pages;
 };
 
 struct kvm_vcpu_stat_generic {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a438a7a3774a..93f0ca2ea326 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1228,6 +1228,19 @@  static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
 	return 0;
 }
 
+static inline unsigned long hweight_dirty_bitmap(
+						struct kvm_memory_slot *memslot)
+{
+	unsigned long i;
+	unsigned long count = 0;
+	unsigned long n = kvm_dirty_bitmap_bytes(memslot);
+
+	for (i = 0; i < n / sizeof(long); ++i)
+		count += hweight_long(memslot->dirty_bitmap[i]);
+
+	return count;
+}
+
 /*
  * Delete a memslot by decrementing the number of used slots and shifting all
  * other entries in the array forward one spot.
@@ -1612,6 +1625,7 @@  static int kvm_delete_memslot(struct kvm *kvm,
 	if (r)
 		return r;
 
+	kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(old);
 	kvm_free_memslot(kvm, old);
 	return 0;
 }
@@ -1733,8 +1747,10 @@  int __kvm_set_memory_region(struct kvm *kvm,
 	if (r)
 		goto out_bitmap;
 
-	if (old.dirty_bitmap && !new.dirty_bitmap)
+	if (old.dirty_bitmap && !new.dirty_bitmap) {
+		kvm->stat.generic.dirty_pages -= hweight_dirty_bitmap(&old);
 		kvm_destroy_dirty_bitmap(&old);
+	}
 	return 0;
 
 out_bitmap:
@@ -1895,6 +1911,7 @@  static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
 			offset = i * BITS_PER_LONG;
 			kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
 								offset, mask);
+			kvm->stat.generic.dirty_pages -= hweight_long(mask);
 		}
 		KVM_MMU_UNLOCK(kvm);
 	}
@@ -2012,6 +2029,7 @@  static int kvm_clear_dirty_log_protect(struct kvm *kvm,
 			flush = true;
 			kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
 								offset, mask);
+			kvm->stat.generic.dirty_pages -= hweight_long(mask);
 		}
 	}
 	KVM_MMU_UNLOCK(kvm);
@@ -3062,11 +3080,13 @@  void mark_page_dirty_in_slot(struct kvm *kvm,
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 		u32 slot = (memslot->as_id << 16) | memslot->id;
 
-		if (kvm->dirty_ring_size)
+		if (kvm->dirty_ring_size) {
 			kvm_dirty_ring_push(kvm_dirty_ring_get(kvm),
 					    slot, rel_gfn);
-		else
+		} else {
 			set_bit_le(rel_gfn, memslot->dirty_bitmap);
+			++kvm->stat.generic.dirty_pages;
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(mark_page_dirty_in_slot);