diff mbox series

[v4,13/25] memory: Add Error** argument to .log_global_start() handler

Message ID 20240306133441.2351700-14-clg@redhat.com
State New
Headers show
Series migration: Improve error reporting | expand

Commit Message

Cédric Le Goater March 6, 2024, 1:34 p.m. UTC
Modify all .log_global_start() handlers to take an Error** parameter
and return a bool. Adapt memory_global_dirty_log_start() to interrupt
on the first error the loop on handlers. In such case, a rollback is
performed to stop dirty logging on all listeners where it was
previously enabled.

Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Anthony Perard <anthony.perard@citrix.com>
Cc: Paul Durrant <paul@xen.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---

 Changes in v4:

 - Dropped log_global_stop() and log_global_sync() changes
 - Dropped MEMORY_LISTENER_CALL_LOG_GLOBAL 
 - Modified memory_global_dirty_log_start() to loop on the list of
   listeners and handle errors directly.
 - Introduced memory_global_dirty_log_rollback() to revert operations
   previously done
   
 Changes in v3: 

 - Fixed return value of vfio_listener_log_global_start() and
   vfio_listener_log_global_stop(). Went unnoticed because not tested.
   
 include/exec/memory.h |  5 ++++-
 hw/i386/xen/xen-hvm.c |  3 ++-
 hw/vfio/common.c      |  4 +++-
 hw/virtio/vhost.c     |  3 ++-
 system/memory.c       | 43 +++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 52 insertions(+), 6 deletions(-)

Comments

Peter Xu March 15, 2024, 11:18 a.m. UTC | #1
On Wed, Mar 06, 2024 at 02:34:28PM +0100, Cédric Le Goater wrote:
> diff --git a/system/memory.c b/system/memory.c
> index a229a79988fce2aa3cb77e3a130db4c694e8cd49..3600e716149407c10a1f6bf8f0a81c2611cf15ba 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -2914,9 +2914,27 @@ static unsigned int postponed_stop_flags;
>  static VMChangeStateEntry *vmstate_change;
>  static void memory_global_dirty_log_stop_postponed_run(void);
>  
> +/*
> + * Stop dirty logging on all listeners where it was previously enabled.
> + */
> +static void memory_global_dirty_log_rollback(MemoryListener *listener,
> +                                             unsigned int flags)
> +{
> +    global_dirty_tracking &= ~flags;

Having a hook rollback function to touch the global_dirty_tracking flag is
IMHO tricky.

Can we instead provide a helper to call all log_global_start() hooks, but
allow a gracefully fail (so rollback will be called if it fails)?

  bool memory_global_dirty_log_start_hooks(...)

Or any better names..  Leaving global_dirty_tracking rollback to
memory_global_dirty_log_start() when it returns false.

Would this be cleaner?

> +    trace_global_dirty_changed(global_dirty_tracking);
> +
> +    while (listener) {
> +        if (listener->log_global_stop) {
> +            listener->log_global_stop(listener);
> +        }
> +        listener = QTAILQ_PREV(listener, link);
> +    }
> +}
> +
>  void memory_global_dirty_log_start(unsigned int flags)
>  {
>      unsigned int old_flags;
> +    Error *local_err = NULL;
>  
>      assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
>  
> @@ -2936,7 +2954,25 @@ void memory_global_dirty_log_start(unsigned int flags)
>      trace_global_dirty_changed(global_dirty_tracking);
>  
>      if (!old_flags) {
> -        MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
> +        MemoryListener *listener;
> +        bool ret = true;
> +
> +        QTAILQ_FOREACH(listener, &memory_listeners, link) {
> +            if (listener->log_global_start) {
> +                ret = listener->log_global_start(listener, &local_err);
> +                if (!ret) {
> +                    break;
> +                }
> +            }
> +        }
> +
> +        if (!ret) {
> +            memory_global_dirty_log_rollback(QTAILQ_PREV(listener, link),
> +                                             flags);
> +            error_report_err(local_err);
> +            return;
> +        }
> +
>          memory_region_transaction_begin();
>          memory_region_update_pending = true;
>          memory_region_transaction_commit();
> @@ -3009,13 +3045,16 @@ static void listener_add_address_space(MemoryListener *listener,
>  {
>      FlatView *view;
>      FlatRange *fr;
> +    Error *local_err = NULL;
>  
>      if (listener->begin) {
>          listener->begin(listener);
>      }
>      if (global_dirty_tracking) {
>          if (listener->log_global_start) {
> -            listener->log_global_start(listener);
> +            if (!listener->log_global_start(listener, &local_err)) {
> +                error_report_err(local_err);
> +            }

IMHO we should assert here instead of error report.  We have this to guard
hot-plug during migration so I think the assert is justified:

qdev_device_add_from_qdict():

    if (!migration_is_idle()) {
        error_setg(errp, "device_add not allowed while migrating");
        return NULL;
    }

If it really happens it's a bug, as listener_add_address_space() will still
keep the rest things around even if the hook failed.  It'll start to be a
total mess..

Thanks,

>          }
>      }
>  
> -- 
> 2.44.0
>
Cédric Le Goater March 18, 2024, 2:33 p.m. UTC | #2
On 3/15/24 12:18, Peter Xu wrote:
> On Wed, Mar 06, 2024 at 02:34:28PM +0100, Cédric Le Goater wrote:
>> diff --git a/system/memory.c b/system/memory.c
>> index a229a79988fce2aa3cb77e3a130db4c694e8cd49..3600e716149407c10a1f6bf8f0a81c2611cf15ba 100644
>> --- a/system/memory.c
>> +++ b/system/memory.c
>> @@ -2914,9 +2914,27 @@ static unsigned int postponed_stop_flags;
>>   static VMChangeStateEntry *vmstate_change;
>>   static void memory_global_dirty_log_stop_postponed_run(void);
>>   
>> +/*
>> + * Stop dirty logging on all listeners where it was previously enabled.
>> + */
>> +static void memory_global_dirty_log_rollback(MemoryListener *listener,
>> +                                             unsigned int flags)
>> +{
>> +    global_dirty_tracking &= ~flags;
> 
> Having a hook rollback function to touch the global_dirty_tracking flag is
> IMHO tricky.
> 
> Can we instead provide a helper to call all log_global_start() hooks, but
> allow a gracefully fail (so rollback will be called if it fails)?
> 
>    bool memory_global_dirty_log_start_hooks(...)
> 
> Or any better names..  Leaving global_dirty_tracking rollback to
> memory_global_dirty_log_start() when it returns false.
> 
> Would this be cleaner?

I will introduce a memory_global_dirty_log_do_start() helper to call
the log_global_start() handlers and to do the rollback in case of
error. Modification of the global_dirty_tracking flag will stay local
to memory_global_dirty_log_start() to avoid any futur errors.


>> +    trace_global_dirty_changed(global_dirty_tracking);
>> +
>> +    while (listener) {
>> +        if (listener->log_global_stop) {
>> +            listener->log_global_stop(listener);
>> +        }
>> +        listener = QTAILQ_PREV(listener, link);
>> +    }
>> +}
>> +
>>   void memory_global_dirty_log_start(unsigned int flags)
>>   {
>>       unsigned int old_flags;
>> +    Error *local_err = NULL;
>>   
>>       assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
>>   
>> @@ -2936,7 +2954,25 @@ void memory_global_dirty_log_start(unsigned int flags)
>>       trace_global_dirty_changed(global_dirty_tracking);
>>   
>>       if (!old_flags) {
>> -        MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
>> +        MemoryListener *listener;
>> +        bool ret = true;
>> +
>> +        QTAILQ_FOREACH(listener, &memory_listeners, link) {
>> +            if (listener->log_global_start) {
>> +                ret = listener->log_global_start(listener, &local_err);
>> +                if (!ret) {
>> +                    break;
>> +                }
>> +            }
>> +        }
>> +
>> +        if (!ret) {
>> +            memory_global_dirty_log_rollback(QTAILQ_PREV(listener, link),
>> +                                             flags);
>> +            error_report_err(local_err);
>> +            return;
>> +        }
>> +
>>           memory_region_transaction_begin();
>>           memory_region_update_pending = true;
>>           memory_region_transaction_commit();
>> @@ -3009,13 +3045,16 @@ static void listener_add_address_space(MemoryListener *listener,
>>   {
>>       FlatView *view;
>>       FlatRange *fr;
>> +    Error *local_err = NULL;
>>   
>>       if (listener->begin) {
>>           listener->begin(listener);
>>       }
>>       if (global_dirty_tracking) {
>>           if (listener->log_global_start) {
>> -            listener->log_global_start(listener);
>> +            if (!listener->log_global_start(listener, &local_err)) {
>> +                error_report_err(local_err);
>> +            }
> 
> IMHO we should assert here instead of error report.  We have this to guard
> hot-plug during migration so I think the assert is justified:
> 
> qdev_device_add_from_qdict():
> 
>      if (!migration_is_idle()) {
>          error_setg(errp, "device_add not allowed while migrating");
>          return NULL;
>      }
> 
> If it really happens it's a bug, as listener_add_address_space() will still
> keep the rest things around even if the hook failed.  It'll start to be a
> total mess..

OK. I will change the Error parameter to error_abort in that case.

However, It would be useful to catch errors of the .region_add() handler
for VFIO. Let's address that later.

Thanks,

C.
Cédric Le Goater March 18, 2024, 2:54 p.m. UTC | #3
On 3/15/24 12:18, Peter Xu wrote:
>> @@ -3009,13 +3045,16 @@ static void listener_add_address_space(MemoryListener *listener,
>>   {
>>       FlatView *view;
>>       FlatRange *fr;
>> +    Error *local_err = NULL;
>>   
>>       if (listener->begin) {
>>           listener->begin(listener);
>>       }
>>       if (global_dirty_tracking) {
>>           if (listener->log_global_start) {
>> -            listener->log_global_start(listener);
>> +            if (!listener->log_global_start(listener, &local_err)) {
>> +                error_report_err(local_err);
>> +            }
> IMHO we should assert here instead of error report.  We have this to guard
> hot-plug during migration so I think the assert is justified:
> 
> qdev_device_add_from_qdict():
> 
>      if (!migration_is_idle()) {
>          error_setg(errp, "device_add not allowed while migrating");
>          return NULL;
>      }
> 
> If it really happens it's a bug, as listener_add_address_space() will still
> keep the rest things around even if the hook failed.  It'll start to be a
> total mess..

It seems that adding a region listener while logging is active has been
supported from the beginning, commit 7664e80c8470 ("memory: add API for
observing  updates to the physical memory map"). Can it happen ? if not
we could simply remove the  log_global_start() call.

Thanks,

C.
Peter Xu March 18, 2024, 4:27 p.m. UTC | #4
On Mon, Mar 18, 2024 at 03:54:28PM +0100, Cédric Le Goater wrote:
> On 3/15/24 12:18, Peter Xu wrote:
> > > @@ -3009,13 +3045,16 @@ static void listener_add_address_space(MemoryListener *listener,
> > >   {
> > >       FlatView *view;
> > >       FlatRange *fr;
> > > +    Error *local_err = NULL;
> > >       if (listener->begin) {
> > >           listener->begin(listener);
> > >       }
> > >       if (global_dirty_tracking) {
> > >           if (listener->log_global_start) {
> > > -            listener->log_global_start(listener);
> > > +            if (!listener->log_global_start(listener, &local_err)) {
> > > +                error_report_err(local_err);
> > > +            }
> > IMHO we should assert here instead of error report.  We have this to guard
> > hot-plug during migration so I think the assert is justified:
> > 
> > qdev_device_add_from_qdict():
> > 
> >      if (!migration_is_idle()) {
> >          error_setg(errp, "device_add not allowed while migrating");
> >          return NULL;
> >      }
> > 
> > If it really happens it's a bug, as listener_add_address_space() will still
> > keep the rest things around even if the hook failed.  It'll start to be a
> > total mess..
> 
> It seems that adding a region listener while logging is active has been
> supported from the beginning, commit 7664e80c8470 ("memory: add API for
> observing  updates to the physical memory map"). Can it happen ? if not
> we could simply remove the  log_global_start() call.

IMHO we'd better keep it for the sake of logic completeness, even though I
don't know when it'll be useful..

I think it's safe to assert because log_global_start() should only be
triggered by either vhost/vfio with current code base when reaching here.
It doesn't mean that in the future all log_global_start() hooks are based
on a device object. E.g., there's the other Xen user, it just won't trigger
either, afaict.  So the assert should be safe.

In the future maybe we could allow other things to trigger here besides
device, but obviously we're not ready for failing it.  Instead of adding
the failure handling which will never be used for now, IIUC it's simpler we
just provide an assert until someone add a real user of such.

Thanks,
diff mbox series

Patch

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 8626a355b310ed7b1a1db7978ba4b394032c2f15..5555567bc4c9fdb53e8f63487f1400980275687d 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -998,8 +998,11 @@  struct MemoryListener {
      * active at that time.
      *
      * @listener: The #MemoryListener.
+     * @errp: pointer to Error*, to store an error if it happens.
+     *
+     * Return: true on success, else false setting @errp with error.
      */
-    void (*log_global_start)(MemoryListener *listener);
+    bool (*log_global_start)(MemoryListener *listener, Error **errp);
 
     /**
      * @log_global_stop:
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index f42621e6742552035122ea58092c91c3458338ff..0608ca99f5166fd6379ee674442484e805eff9c0 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -446,11 +446,12 @@  static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
                           int128_get64(section->size));
 }
 
-static void xen_log_global_start(MemoryListener *listener)
+static bool xen_log_global_start(MemoryListener *listener, Error **errp)
 {
     if (xen_enabled()) {
         xen_in_migration = true;
     }
+    return true;
 }
 
 static void xen_log_global_stop(MemoryListener *listener)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ff88c3f31ca660b3c0a790601100fdc6116192a0..800ba0aeac84b8dcc83b042bb70c37b4bf78d3f4 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1075,7 +1075,8 @@  out:
     return ret;
 }
 
-static void vfio_listener_log_global_start(MemoryListener *listener)
+static bool vfio_listener_log_global_start(MemoryListener *listener,
+                                           Error **errp)
 {
     VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
                                                  listener);
@@ -1092,6 +1093,7 @@  static void vfio_listener_log_global_start(MemoryListener *listener)
                      ret, strerror(-ret));
         vfio_set_migration_error(ret);
     }
+    return !ret;
 }
 
 static void vfio_listener_log_global_stop(MemoryListener *listener)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 2c9ac794680ea9b65eba6cc22e70cf141e90aa73..030b24db9270fc272024db3ff60a6cc449fba1ca 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1044,7 +1044,7 @@  check_dev_state:
     return r;
 }
 
-static void vhost_log_global_start(MemoryListener *listener)
+static bool vhost_log_global_start(MemoryListener *listener, Error **errp)
 {
     int r;
 
@@ -1052,6 +1052,7 @@  static void vhost_log_global_start(MemoryListener *listener)
     if (r < 0) {
         abort();
     }
+    return true;
 }
 
 static void vhost_log_global_stop(MemoryListener *listener)
diff --git a/system/memory.c b/system/memory.c
index a229a79988fce2aa3cb77e3a130db4c694e8cd49..3600e716149407c10a1f6bf8f0a81c2611cf15ba 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2914,9 +2914,27 @@  static unsigned int postponed_stop_flags;
 static VMChangeStateEntry *vmstate_change;
 static void memory_global_dirty_log_stop_postponed_run(void);
 
+/*
+ * Stop dirty logging on all listeners where it was previously enabled.
+ */
+static void memory_global_dirty_log_rollback(MemoryListener *listener,
+                                             unsigned int flags)
+{
+    global_dirty_tracking &= ~flags;
+    trace_global_dirty_changed(global_dirty_tracking);
+
+    while (listener) {
+        if (listener->log_global_stop) {
+            listener->log_global_stop(listener);
+        }
+        listener = QTAILQ_PREV(listener, link);
+    }
+}
+
 void memory_global_dirty_log_start(unsigned int flags)
 {
     unsigned int old_flags;
+    Error *local_err = NULL;
 
     assert(flags && !(flags & (~GLOBAL_DIRTY_MASK)));
 
@@ -2936,7 +2954,25 @@  void memory_global_dirty_log_start(unsigned int flags)
     trace_global_dirty_changed(global_dirty_tracking);
 
     if (!old_flags) {
-        MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward);
+        MemoryListener *listener;
+        bool ret = true;
+
+        QTAILQ_FOREACH(listener, &memory_listeners, link) {
+            if (listener->log_global_start) {
+                ret = listener->log_global_start(listener, &local_err);
+                if (!ret) {
+                    break;
+                }
+            }
+        }
+
+        if (!ret) {
+            memory_global_dirty_log_rollback(QTAILQ_PREV(listener, link),
+                                             flags);
+            error_report_err(local_err);
+            return;
+        }
+
         memory_region_transaction_begin();
         memory_region_update_pending = true;
         memory_region_transaction_commit();
@@ -3009,13 +3045,16 @@  static void listener_add_address_space(MemoryListener *listener,
 {
     FlatView *view;
     FlatRange *fr;
+    Error *local_err = NULL;
 
     if (listener->begin) {
         listener->begin(listener);
     }
     if (global_dirty_tracking) {
         if (listener->log_global_start) {
-            listener->log_global_start(listener);
+            if (!listener->log_global_start(listener, &local_err)) {
+                error_report_err(local_err);
+            }
         }
     }