diff mbox series

[v2,16/21] virtio-net: Use replay_schedule_bh_event for bhs that affect machine state

Message ID 20240813202329.1237572-17-alex.bennee@linaro.org
State New
Headers show
Series Various fixes and tweaks for 9.1-rc2/3 | expand

Commit Message

Alex Bennée Aug. 13, 2024, 8:23 p.m. UTC
From: Nicholas Piggin <npiggin@gmail.com>

The regular qemu_bh_schedule() calls result in non-deterministic
execution of the bh in record-replay mode, which causes replay failure.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Message-Id: <20240813050638.446172-9-npiggin@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 hw/net/virtio-net.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

Comments

Michael S. Tsirkin Aug. 13, 2024, 8:48 p.m. UTC | #1
On Tue, Aug 13, 2024 at 09:23:24PM +0100, Alex Bennée wrote:
> From: Nicholas Piggin <npiggin@gmail.com>
> 
> The regular qemu_bh_schedule() calls result in non-deterministic
> execution of the bh in record-replay mode, which causes replay failure.
> 
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> Reviewed-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> Message-Id: <20240813050638.446172-9-npiggin@gmail.com>
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> ---
>  hw/net/virtio-net.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 08aa0b65e3..10ebaae5e2 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -40,6 +40,7 @@
>  #include "migration/misc.h"
>  #include "standard-headers/linux/ethtool.h"
>  #include "sysemu/sysemu.h"
> +#include "sysemu/replay.h"
>  #include "trace.h"
>  #include "monitor/qdev.h"
>  #include "monitor/monitor.h"
> @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>                  timer_mod(q->tx_timer,
>                                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>              } else {
> -                qemu_bh_schedule(q->tx_bh);
> +                replay_bh_schedule_event(q->tx_bh);
>              }
>          } else {
>              if (q->tx_timer) {
> @@ -2672,7 +2673,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
>           */
>          virtio_queue_set_notification(q->tx_vq, 0);
>          if (q->tx_bh) {
> -            qemu_bh_schedule(q->tx_bh);
> +            replay_bh_schedule_event(q->tx_bh);
>          } else {
>              timer_mod(q->tx_timer,
>                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> @@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>          return;
>      }
>      virtio_queue_set_notification(vq, 0);
> -    qemu_bh_schedule(q->tx_bh);
> +    replay_bh_schedule_event(q->tx_bh);
>  }
>  
>  static void virtio_net_tx_timer(void *opaque)
> @@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
>      /* If we flush a full burst of packets, assume there are
>       * more coming and immediately reschedule */
>      if (ret >= n->tx_burst) {
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>          q->tx_waiting = 1;
>          return;
>      }
> @@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
>          return;
>      } else if (ret > 0) {
>          virtio_queue_set_notification(q->tx_vq, 0);
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>          q->tx_waiting = 1;
>      }
>  }
> -- 
> 2.39.2


Is this really the only way to fix this? I do not think
virtio has any business knowing about replay.
What does this API do, even? BH but not broken with replay?
Do we ever want replay broken? Why not fix qemu_bh_schedule?
And when we add another feature which we do not want to break
will we do foo_bar_replay_bh_schedule_event or what?
Nicholas Piggin Aug. 14, 2024, 6:05 a.m. UTC | #2
On Wed Aug 14, 2024 at 6:48 AM AEST, Michael S. Tsirkin wrote:
> On Tue, Aug 13, 2024 at 09:23:24PM +0100, Alex Bennée wrote:
> > From: Nicholas Piggin <npiggin@gmail.com>
> > 
> > The regular qemu_bh_schedule() calls result in non-deterministic
> > execution of the bh in record-replay mode, which causes replay failure.
> > 
> > Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> > Reviewed-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > Message-Id: <20240813050638.446172-9-npiggin@gmail.com>
> > Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> > ---
> >  hw/net/virtio-net.c | 11 ++++++-----
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 08aa0b65e3..10ebaae5e2 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -40,6 +40,7 @@
> >  #include "migration/misc.h"
> >  #include "standard-headers/linux/ethtool.h"
> >  #include "sysemu/sysemu.h"
> > +#include "sysemu/replay.h"
> >  #include "trace.h"
> >  #include "monitor/qdev.h"
> >  #include "monitor/monitor.h"
> > @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
> >                  timer_mod(q->tx_timer,
> >                                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> >              } else {
> > -                qemu_bh_schedule(q->tx_bh);
> > +                replay_bh_schedule_event(q->tx_bh);
> >              }
> >          } else {
> >              if (q->tx_timer) {
> > @@ -2672,7 +2673,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
> >           */
> >          virtio_queue_set_notification(q->tx_vq, 0);
> >          if (q->tx_bh) {
> > -            qemu_bh_schedule(q->tx_bh);
> > +            replay_bh_schedule_event(q->tx_bh);
> >          } else {
> >              timer_mod(q->tx_timer,
> >                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> > @@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
> >          return;
> >      }
> >      virtio_queue_set_notification(vq, 0);
> > -    qemu_bh_schedule(q->tx_bh);
> > +    replay_bh_schedule_event(q->tx_bh);
> >  }
> >  
> >  static void virtio_net_tx_timer(void *opaque)
> > @@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
> >      /* If we flush a full burst of packets, assume there are
> >       * more coming and immediately reschedule */
> >      if (ret >= n->tx_burst) {
> > -        qemu_bh_schedule(q->tx_bh);
> > +        replay_bh_schedule_event(q->tx_bh);
> >          q->tx_waiting = 1;
> >          return;
> >      }
> > @@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
> >          return;
> >      } else if (ret > 0) {
> >          virtio_queue_set_notification(q->tx_vq, 0);
> > -        qemu_bh_schedule(q->tx_bh);
> > +        replay_bh_schedule_event(q->tx_bh);
> >          q->tx_waiting = 1;
> >      }
> >  }
> > -- 
> > 2.39.2
>
>
> Is this really the only way to fix this? I do not think
> virtio has any business knowing about replay.
> What does this API do, even? BH but not broken with replay?
> Do we ever want replay broken? Why not fix qemu_bh_schedule?
> And when we add another feature which we do not want to break
> will we do foo_bar_replay_bh_schedule_event or what?

I agree with you. We need to do this (a couple of other hw
subsystems already do and likely some are still broken vs
replay and would need to be converted), but I think it's
mostly a case of bad naming. You're right the caller should
not know about replay at all, what it should be is whether
the event is for the target machine or the host harness,
same as timers are VIRTUAL / HOST.

So I think we just need to make a qemu_bh_schedule_<type>,
or qemu_bh_scheudle_event(... QEMU_EVENT_VIRTUAL/HOST/etc).
I had started on a conversion once but not completed it.
I could resurrect if there is agreement on the API?

Thanks,
Nick
Michael S. Tsirkin Aug. 14, 2024, 7:06 a.m. UTC | #3
On Wed, Aug 14, 2024 at 04:05:34PM +1000, Nicholas Piggin wrote:
> On Wed Aug 14, 2024 at 6:48 AM AEST, Michael S. Tsirkin wrote:
> > On Tue, Aug 13, 2024 at 09:23:24PM +0100, Alex Bennée wrote:
> > > From: Nicholas Piggin <npiggin@gmail.com>
> > > 
> > > The regular qemu_bh_schedule() calls result in non-deterministic
> > > execution of the bh in record-replay mode, which causes replay failure.
> > > 
> > > Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> > > Reviewed-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > > Message-Id: <20240813050638.446172-9-npiggin@gmail.com>
> > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> > > ---
> > >  hw/net/virtio-net.c | 11 ++++++-----
> > >  1 file changed, 6 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > > index 08aa0b65e3..10ebaae5e2 100644
> > > --- a/hw/net/virtio-net.c
> > > +++ b/hw/net/virtio-net.c
> > > @@ -40,6 +40,7 @@
> > >  #include "migration/misc.h"
> > >  #include "standard-headers/linux/ethtool.h"
> > >  #include "sysemu/sysemu.h"
> > > +#include "sysemu/replay.h"
> > >  #include "trace.h"
> > >  #include "monitor/qdev.h"
> > >  #include "monitor/monitor.h"
> > > @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
> > >                  timer_mod(q->tx_timer,
> > >                                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> > >              } else {
> > > -                qemu_bh_schedule(q->tx_bh);
> > > +                replay_bh_schedule_event(q->tx_bh);
> > >              }
> > >          } else {
> > >              if (q->tx_timer) {
> > > @@ -2672,7 +2673,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
> > >           */
> > >          virtio_queue_set_notification(q->tx_vq, 0);
> > >          if (q->tx_bh) {
> > > -            qemu_bh_schedule(q->tx_bh);
> > > +            replay_bh_schedule_event(q->tx_bh);
> > >          } else {
> > >              timer_mod(q->tx_timer,
> > >                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> > > @@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
> > >          return;
> > >      }
> > >      virtio_queue_set_notification(vq, 0);
> > > -    qemu_bh_schedule(q->tx_bh);
> > > +    replay_bh_schedule_event(q->tx_bh);
> > >  }
> > >  
> > >  static void virtio_net_tx_timer(void *opaque)
> > > @@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
> > >      /* If we flush a full burst of packets, assume there are
> > >       * more coming and immediately reschedule */
> > >      if (ret >= n->tx_burst) {
> > > -        qemu_bh_schedule(q->tx_bh);
> > > +        replay_bh_schedule_event(q->tx_bh);
> > >          q->tx_waiting = 1;
> > >          return;
> > >      }
> > > @@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
> > >          return;
> > >      } else if (ret > 0) {
> > >          virtio_queue_set_notification(q->tx_vq, 0);
> > > -        qemu_bh_schedule(q->tx_bh);
> > > +        replay_bh_schedule_event(q->tx_bh);
> > >          q->tx_waiting = 1;
> > >      }
> > >  }
> > > -- 
> > > 2.39.2
> >
> >
> > Is this really the only way to fix this? I do not think
> > virtio has any business knowing about replay.
> > What does this API do, even? BH but not broken with replay?
> > Do we ever want replay broken? Why not fix qemu_bh_schedule?
> > And when we add another feature which we do not want to break
> > will we do foo_bar_replay_bh_schedule_event or what?
> 
> I agree with you. We need to do this (a couple of other hw
> subsystems already do and likely some are still broken vs
> replay and would need to be converted), but I think it's
> mostly a case of bad naming. You're right the caller should
> not know about replay at all, what it should be is whether
> the event is for the target machine or the host harness,
> same as timers are VIRTUAL / HOST.
> So I think we just need to make a qemu_bh_schedule_<type>,
> or qemu_bh_scheudle_event(... QEMU_EVENT_VIRTUAL/HOST/etc).

Or just pass QEMUClockType?

> I had started on a conversion once but not completed it.
> I could resurrect if there is agreement on the API?
> 
> Thanks,
> Nick
Alex Bennée Aug. 14, 2024, 5:25 p.m. UTC | #4
"Michael S. Tsirkin" <mst@redhat.com> writes:

> On Wed, Aug 14, 2024 at 04:05:34PM +1000, Nicholas Piggin wrote:
>> On Wed Aug 14, 2024 at 6:48 AM AEST, Michael S. Tsirkin wrote:
>> > On Tue, Aug 13, 2024 at 09:23:24PM +0100, Alex Bennée wrote:
>> > > From: Nicholas Piggin <npiggin@gmail.com>
>> > > 
>> > > The regular qemu_bh_schedule() calls result in non-deterministic
>> > > execution of the bh in record-replay mode, which causes replay failure.
>> > > 
>> > > Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
>> > > Reviewed-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> > > Message-Id: <20240813050638.446172-9-npiggin@gmail.com>
>> > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> > > ---
>> > >  hw/net/virtio-net.c | 11 ++++++-----
>> > >  1 file changed, 6 insertions(+), 5 deletions(-)
>> > > 
>> > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> > > index 08aa0b65e3..10ebaae5e2 100644
>> > > --- a/hw/net/virtio-net.c
>> > > +++ b/hw/net/virtio-net.c
>> > > @@ -40,6 +40,7 @@
>> > >  #include "migration/misc.h"
>> > >  #include "standard-headers/linux/ethtool.h"
>> > >  #include "sysemu/sysemu.h"
>> > > +#include "sysemu/replay.h"
>> > >  #include "trace.h"
>> > >  #include "monitor/qdev.h"
>> > >  #include "monitor/monitor.h"
>> > > @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>> > >                  timer_mod(q->tx_timer,
>> > >                                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>> > >              } else {
>> > > -                qemu_bh_schedule(q->tx_bh);
>> > > +                replay_bh_schedule_event(q->tx_bh);
>> > >              }
>> > >          } else {
>> > >              if (q->tx_timer) {
>> > > @@ -2672,7 +2673,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
>> > >           */
>> > >          virtio_queue_set_notification(q->tx_vq, 0);
>> > >          if (q->tx_bh) {
>> > > -            qemu_bh_schedule(q->tx_bh);
>> > > +            replay_bh_schedule_event(q->tx_bh);
>> > >          } else {
>> > >              timer_mod(q->tx_timer,
>> > >                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>> > > @@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>> > >          return;
>> > >      }
>> > >      virtio_queue_set_notification(vq, 0);
>> > > -    qemu_bh_schedule(q->tx_bh);
>> > > +    replay_bh_schedule_event(q->tx_bh);
>> > >  }
>> > >  
>> > >  static void virtio_net_tx_timer(void *opaque)
>> > > @@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
>> > >      /* If we flush a full burst of packets, assume there are
>> > >       * more coming and immediately reschedule */
>> > >      if (ret >= n->tx_burst) {
>> > > -        qemu_bh_schedule(q->tx_bh);
>> > > +        replay_bh_schedule_event(q->tx_bh);
>> > >          q->tx_waiting = 1;
>> > >          return;
>> > >      }
>> > > @@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
>> > >          return;
>> > >      } else if (ret > 0) {
>> > >          virtio_queue_set_notification(q->tx_vq, 0);
>> > > -        qemu_bh_schedule(q->tx_bh);
>> > > +        replay_bh_schedule_event(q->tx_bh);
>> > >          q->tx_waiting = 1;
>> > >      }
>> > >  }
>> > > -- 
>> > > 2.39.2
>> >
>> >
>> > Is this really the only way to fix this? I do not think
>> > virtio has any business knowing about replay.
>> > What does this API do, even? BH but not broken with replay?
>> > Do we ever want replay broken? Why not fix qemu_bh_schedule?
>> > And when we add another feature which we do not want to break
>> > will we do foo_bar_replay_bh_schedule_event or what?
>> 
>> I agree with you. We need to do this (a couple of other hw
>> subsystems already do and likely some are still broken vs
>> replay and would need to be converted), but I think it's
>> mostly a case of bad naming. You're right the caller should
>> not know about replay at all, what it should be is whether
>> the event is for the target machine or the host harness,
>> same as timers are VIRTUAL / HOST.
>> So I think we just need to make a qemu_bh_schedule_<type>,
>> or qemu_bh_scheudle_event(... QEMU_EVENT_VIRTUAL/HOST/etc).
>
> Or just pass QEMUClockType?

Is this wider re-factoring something that can wait for the next
developer cycle?

>> I had started on a conversion once but not completed it.
>> I could resurrect if there is agreement on the API?

I would certainly welcome it being cleaned up. The supported replay
devices are very piecemeal at the moment.

>> 
>> Thanks,
>> Nick
Nicholas Piggin Aug. 15, 2024, 7:12 a.m. UTC | #5
On Thu Aug 15, 2024 at 3:25 AM AEST, Alex Bennée wrote:
> "Michael S. Tsirkin" <mst@redhat.com> writes:
>
> > On Wed, Aug 14, 2024 at 04:05:34PM +1000, Nicholas Piggin wrote:
> >> On Wed Aug 14, 2024 at 6:48 AM AEST, Michael S. Tsirkin wrote:
> >> > On Tue, Aug 13, 2024 at 09:23:24PM +0100, Alex Bennée wrote:
> >> > > From: Nicholas Piggin <npiggin@gmail.com>
> >> > > 
> >> > > The regular qemu_bh_schedule() calls result in non-deterministic
> >> > > execution of the bh in record-replay mode, which causes replay failure.
> >> > > 
> >> > > Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> >> > > Reviewed-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> >> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> >> > > Message-Id: <20240813050638.446172-9-npiggin@gmail.com>
> >> > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> >> > > ---
> >> > >  hw/net/virtio-net.c | 11 ++++++-----
> >> > >  1 file changed, 6 insertions(+), 5 deletions(-)
> >> > > 
> >> > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> >> > > index 08aa0b65e3..10ebaae5e2 100644
> >> > > --- a/hw/net/virtio-net.c
> >> > > +++ b/hw/net/virtio-net.c
> >> > > @@ -40,6 +40,7 @@
> >> > >  #include "migration/misc.h"
> >> > >  #include "standard-headers/linux/ethtool.h"
> >> > >  #include "sysemu/sysemu.h"
> >> > > +#include "sysemu/replay.h"
> >> > >  #include "trace.h"
> >> > >  #include "monitor/qdev.h"
> >> > >  #include "monitor/monitor.h"
> >> > > @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
> >> > >                  timer_mod(q->tx_timer,
> >> > >                                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> >> > >              } else {
> >> > > -                qemu_bh_schedule(q->tx_bh);
> >> > > +                replay_bh_schedule_event(q->tx_bh);
> >> > >              }
> >> > >          } else {
> >> > >              if (q->tx_timer) {
> >> > > @@ -2672,7 +2673,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
> >> > >           */
> >> > >          virtio_queue_set_notification(q->tx_vq, 0);
> >> > >          if (q->tx_bh) {
> >> > > -            qemu_bh_schedule(q->tx_bh);
> >> > > +            replay_bh_schedule_event(q->tx_bh);
> >> > >          } else {
> >> > >              timer_mod(q->tx_timer,
> >> > >                        qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
> >> > > @@ -2838,7 +2839,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
> >> > >          return;
> >> > >      }
> >> > >      virtio_queue_set_notification(vq, 0);
> >> > > -    qemu_bh_schedule(q->tx_bh);
> >> > > +    replay_bh_schedule_event(q->tx_bh);
> >> > >  }
> >> > >  
> >> > >  static void virtio_net_tx_timer(void *opaque)
> >> > > @@ -2921,7 +2922,7 @@ static void virtio_net_tx_bh(void *opaque)
> >> > >      /* If we flush a full burst of packets, assume there are
> >> > >       * more coming and immediately reschedule */
> >> > >      if (ret >= n->tx_burst) {
> >> > > -        qemu_bh_schedule(q->tx_bh);
> >> > > +        replay_bh_schedule_event(q->tx_bh);
> >> > >          q->tx_waiting = 1;
> >> > >          return;
> >> > >      }
> >> > > @@ -2935,7 +2936,7 @@ static void virtio_net_tx_bh(void *opaque)
> >> > >          return;
> >> > >      } else if (ret > 0) {
> >> > >          virtio_queue_set_notification(q->tx_vq, 0);
> >> > > -        qemu_bh_schedule(q->tx_bh);
> >> > > +        replay_bh_schedule_event(q->tx_bh);
> >> > >          q->tx_waiting = 1;
> >> > >      }
> >> > >  }
> >> > > -- 
> >> > > 2.39.2
> >> >
> >> >
> >> > Is this really the only way to fix this? I do not think
> >> > virtio has any business knowing about replay.
> >> > What does this API do, even? BH but not broken with replay?
> >> > Do we ever want replay broken? Why not fix qemu_bh_schedule?
> >> > And when we add another feature which we do not want to break
> >> > will we do foo_bar_replay_bh_schedule_event or what?
> >> 
> >> I agree with you. We need to do this (a couple of other hw
> >> subsystems already do and likely some are still broken vs
> >> replay and would need to be converted), but I think it's
> >> mostly a case of bad naming. You're right the caller should
> >> not know about replay at all, what it should be is whether
> >> the event is for the target machine or the host harness,
> >> same as timers are VIRTUAL / HOST.
> >> So I think we just need to make a qemu_bh_schedule_<type>,
> >> or qemu_bh_scheudle_event(... QEMU_EVENT_VIRTUAL/HOST/etc).
> >
> > Or just pass QEMUClockType?

Could be a good idea. Although I'm not sure what to do with
all types, maybe we can restrict what is supported.

> Is this wider re-factoring something that can wait for the next
> developer cycle?

I would say so. It's not quite trivial to do nicely since
things are a bit tangled between util/async and replay.

> >> I had started on a conversion once but not completed it.
> >> I could resurrect if there is agreement on the API?
>
> I would certainly welcome it being cleaned up. The supported replay
> devices are very piecemeal at the moment.

I'll tidy up and post an RFC for how the new API might look.

Thanks,
Nick
Michael S. Tsirkin Aug. 15, 2024, 2:28 p.m. UTC | #6
On Thu, Aug 15, 2024 at 05:12:32PM +1000, Nicholas Piggin wrote:
> Could be a good idea. Although I'm not sure what to do with
> all types, maybe we can restrict what is supported.
> 
> > Is this wider re-factoring something that can wait for the next
> > developer cycle?
> 
> I would say so. It's not quite trivial to do nicely since
> things are a bit tangled between util/async and replay.
> 
> > >> I had started on a conversion once but not completed it.
> > >> I could resurrect if there is agreement on the API?
> >
> > I would certainly welcome it being cleaned up. The supported replay
> > devices are very piecemeal at the moment.
> 
> I'll tidy up and post an RFC for how the new API might look.
> 
> Thanks,
> Nick

Fundamentally it's virtio net, up to Jason. I don't like messy
APIs and people tend to get distracted and not fix them up
if one does not make this a blocker.
Nicholas Piggin Aug. 16, 2024, 2:26 a.m. UTC | #7
On Fri Aug 16, 2024 at 12:28 AM AEST, Michael S. Tsirkin wrote:
> On Thu, Aug 15, 2024 at 05:12:32PM +1000, Nicholas Piggin wrote:
> > Could be a good idea. Although I'm not sure what to do with
> > all types, maybe we can restrict what is supported.
> > 
> > > Is this wider re-factoring something that can wait for the next
> > > developer cycle?
> > 
> > I would say so. It's not quite trivial to do nicely since
> > things are a bit tangled between util/async and replay.
> > 
> > > >> I had started on a conversion once but not completed it.
> > > >> I could resurrect if there is agreement on the API?
> > >
> > > I would certainly welcome it being cleaned up. The supported replay
> > > devices are very piecemeal at the moment.
> > 
> > I'll tidy up and post an RFC for how the new API might look.
> > 
> > Thanks,
> > Nick
>
> Fundamentally it's virtio net, up to Jason. I don't like messy
> APIs and people tend to get distracted and not fix them up
> if one does not make this a blocker.

Happy for objections, but FYI Michael did find the proposed API
tweak nicer, so shall we get these minimal fixes in for 9.1 then
switch them for next release?

Thanks,
Nick
Jason Wang Aug. 16, 2024, 2:31 a.m. UTC | #8
On Thu, Aug 15, 2024 at 10:29 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Thu, Aug 15, 2024 at 05:12:32PM +1000, Nicholas Piggin wrote:
> > Could be a good idea. Although I'm not sure what to do with
> > all types, maybe we can restrict what is supported.
> >
> > > Is this wider re-factoring something that can wait for the next
> > > developer cycle?
> >
> > I would say so. It's not quite trivial to do nicely since
> > things are a bit tangled between util/async and replay.
> >
> > > >> I had started on a conversion once but not completed it.
> > > >> I could resurrect if there is agreement on the API?
> > >
> > > I would certainly welcome it being cleaned up. The supported replay
> > > devices are very piecemeal at the moment.
> >
> > I'll tidy up and post an RFC for how the new API might look.
> >
> > Thanks,
> > Nick
>
> Fundamentally it's virtio net, up to Jason.

It seems it has been in the pull request from Alex Bennée.

> I don't like messy
> APIs and people tend to get distracted and not fix them up
> if one does not make this a blocker.

+1

>
> --
> MST

Thanks

>
Alex Bennée Aug. 16, 2024, 12:58 p.m. UTC | #9
Jason Wang <jasowang@redhat.com> writes:

> On Thu, Aug 15, 2024 at 10:29 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>>
>> On Thu, Aug 15, 2024 at 05:12:32PM +1000, Nicholas Piggin wrote:
>> > Could be a good idea. Although I'm not sure what to do with
>> > all types, maybe we can restrict what is supported.
>> >
>> > > Is this wider re-factoring something that can wait for the next
>> > > developer cycle?
>> >
>> > I would say so. It's not quite trivial to do nicely since
>> > things are a bit tangled between util/async and replay.
>> >
>> > > >> I had started on a conversion once but not completed it.
>> > > >> I could resurrect if there is agreement on the API?
>> > >
>> > > I would certainly welcome it being cleaned up. The supported replay
>> > > devices are very piecemeal at the moment.
>> >
>> > I'll tidy up and post an RFC for how the new API might look.
>> >
>> > Thanks,
>> > Nick
>>
>> Fundamentally it's virtio net, up to Jason.
>
> It seems it has been in the pull request from Alex Bennée.

Sorry I'd suibmitted the PR before I saw mst's update.

>
>> I don't like messy
>> APIs and people tend to get distracted and not fix them up
>> if one does not make this a blocker.
>
> +1

do you want me to drop the series for 9.1 and leave it in its current
(broken) state? Nick has already posted a suggestion for the API update
but it seems a bit late in the cycle to include it in the release.

>
>>
>> --
>> MST
>
> Thanks
>
>>
diff mbox series

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 08aa0b65e3..10ebaae5e2 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -40,6 +40,7 @@ 
 #include "migration/misc.h"
 #include "standard-headers/linux/ethtool.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/replay.h"
 #include "trace.h"
 #include "monitor/qdev.h"
 #include "monitor/monitor.h"
@@ -417,7 +418,7 @@  static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
                 timer_mod(q->tx_timer,
                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
             } else {
-                qemu_bh_schedule(q->tx_bh);
+                replay_bh_schedule_event(q->tx_bh);
             }
         } else {
             if (q->tx_timer) {
@@ -2672,7 +2673,7 @@  static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
          */
         virtio_queue_set_notification(q->tx_vq, 0);
         if (q->tx_bh) {
-            qemu_bh_schedule(q->tx_bh);
+            replay_bh_schedule_event(q->tx_bh);
         } else {
             timer_mod(q->tx_timer,
                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
@@ -2838,7 +2839,7 @@  static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
         return;
     }
     virtio_queue_set_notification(vq, 0);
-    qemu_bh_schedule(q->tx_bh);
+    replay_bh_schedule_event(q->tx_bh);
 }
 
 static void virtio_net_tx_timer(void *opaque)
@@ -2921,7 +2922,7 @@  static void virtio_net_tx_bh(void *opaque)
     /* If we flush a full burst of packets, assume there are
      * more coming and immediately reschedule */
     if (ret >= n->tx_burst) {
-        qemu_bh_schedule(q->tx_bh);
+        replay_bh_schedule_event(q->tx_bh);
         q->tx_waiting = 1;
         return;
     }
@@ -2935,7 +2936,7 @@  static void virtio_net_tx_bh(void *opaque)
         return;
     } else if (ret > 0) {
         virtio_queue_set_notification(q->tx_vq, 0);
-        qemu_bh_schedule(q->tx_bh);
+        replay_bh_schedule_event(q->tx_bh);
         q->tx_waiting = 1;
     }
 }