diff mbox series

replay: improve determinism of virtio-net

Message ID 162125666020.1252655.9997723318921206001.stgit@pasha-ThinkPad-X280
State New
Headers show
Series replay: improve determinism of virtio-net | expand

Commit Message

Pavel Dovgalyuk May 17, 2021, 1:04 p.m. UTC
virtio-net device uses bottom halves for callbacks.
These callbacks should be deterministic, because they affect VM state.
This patch replaces BH invocations with corresponding replay functions,
making them deterministic in record/replay mode.
This patch also disables guest announce timers for record/replay,
because they break correct loadvm in deterministic mode.

Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
---
 hw/net/virtio-net.c |   13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

Comments

Pavel Dovgalyuk May 31, 2021, 4:46 a.m. UTC | #1
ping

On 17.05.2021 16:04, Pavel Dovgalyuk wrote:
> virtio-net device uses bottom halves for callbacks.
> These callbacks should be deterministic, because they affect VM state.
> This patch replaces BH invocations with corresponding replay functions,
> making them deterministic in record/replay mode.
> This patch also disables guest announce timers for record/replay,
> because they break correct loadvm in deterministic mode.
> 
> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> ---
>   hw/net/virtio-net.c |   13 +++++++++----
>   1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 6b7e8dd04e..e876363236 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -44,6 +44,7 @@
>   #include "hw/pci/pci.h"
>   #include "net_rx_pkt.h"
>   #include "hw/virtio/vhost.h"
> +#include "sysemu/replay.h"
>   
>   #define VIRTIO_NET_VM_VERSION    11
>   
> @@ -394,7 +395,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>                   timer_mod(q->tx_timer,
>                                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>               } else {
> -                qemu_bh_schedule(q->tx_bh);
> +                replay_bh_schedule_event(q->tx_bh);
>               }
>           } else {
>               if (q->tx_timer) {
> @@ -2546,7 +2547,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>           return;
>       }
>       virtio_queue_set_notification(vq, 0);
> -    qemu_bh_schedule(q->tx_bh);
> +    replay_bh_schedule_event(q->tx_bh);
>   }
>   
>   static void virtio_net_tx_timer(void *opaque)
> @@ -2602,7 +2603,7 @@ static void virtio_net_tx_bh(void *opaque)
>       /* If we flush a full burst of packets, assume there are
>        * more coming and immediately reschedule */
>       if (ret >= n->tx_burst) {
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>           q->tx_waiting = 1;
>           return;
>       }
> @@ -2616,7 +2617,7 @@ static void virtio_net_tx_bh(void *opaque)
>           return;
>       } else if (ret > 0) {
>           virtio_queue_set_notification(q->tx_vq, 0);
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>           q->tx_waiting = 1;
>       }
>   }
> @@ -3206,6 +3207,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>           n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
>       }
>   
> +    if (replay_mode != REPLAY_MODE_NONE) {
> +        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
> +    }
> +
>       if (n->net_conf.duplex_str) {
>           if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
>               n->net_conf.duplex = DUPLEX_HALF;
>
Jason Wang May 31, 2021, 4:55 a.m. UTC | #2
在 2021/5/17 下午9:04, Pavel Dovgalyuk 写道:
> virtio-net device uses bottom halves for callbacks.
> These callbacks should be deterministic, because they affect VM state.
> This patch replaces BH invocations with corresponding replay functions,
> making them deterministic in record/replay mode.
> This patch also disables guest announce timers for record/replay,
> because they break correct loadvm in deterministic mode.


Virtio-net can be configured to work in tx timer mode. Do we need to 
care about that?


> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> ---
>   hw/net/virtio-net.c |   13 +++++++++----
>   1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 6b7e8dd04e..e876363236 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -44,6 +44,7 @@
>   #include "hw/pci/pci.h"
>   #include "net_rx_pkt.h"
>   #include "hw/virtio/vhost.h"
> +#include "sysemu/replay.h"
>   
>   #define VIRTIO_NET_VM_VERSION    11
>   
> @@ -394,7 +395,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>                   timer_mod(q->tx_timer,
>                                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>               } else {
> -                qemu_bh_schedule(q->tx_bh);
> +                replay_bh_schedule_event(q->tx_bh);
>               }
>           } else {
>               if (q->tx_timer) {
> @@ -2546,7 +2547,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>           return;
>       }
>       virtio_queue_set_notification(vq, 0);
> -    qemu_bh_schedule(q->tx_bh);
> +    replay_bh_schedule_event(q->tx_bh);


Not familiar with replay but any chance to change qemu_bh_schedule() 
instead?

Thanks


>   }
>   
>   static void virtio_net_tx_timer(void *opaque)
> @@ -2602,7 +2603,7 @@ static void virtio_net_tx_bh(void *opaque)
>       /* If we flush a full burst of packets, assume there are
>        * more coming and immediately reschedule */
>       if (ret >= n->tx_burst) {
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>           q->tx_waiting = 1;
>           return;
>       }
> @@ -2616,7 +2617,7 @@ static void virtio_net_tx_bh(void *opaque)
>           return;
>       } else if (ret > 0) {
>           virtio_queue_set_notification(q->tx_vq, 0);
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>           q->tx_waiting = 1;
>       }
>   }
> @@ -3206,6 +3207,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>           n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
>       }
>   
> +    if (replay_mode != REPLAY_MODE_NONE) {
> +        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
> +    }
> +
>       if (n->net_conf.duplex_str) {
>           if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
>               n->net_conf.duplex = DUPLEX_HALF;
>
>
Pavel Dovgalyuk May 31, 2021, 6:35 a.m. UTC | #3
On 31.05.2021 07:55, Jason Wang wrote:
> 
> 在 2021/5/17 下午9:04, Pavel Dovgalyuk 写道:
>> virtio-net device uses bottom halves for callbacks.
>> These callbacks should be deterministic, because they affect VM state.
>> This patch replaces BH invocations with corresponding replay functions,
>> making them deterministic in record/replay mode.
>> This patch also disables guest announce timers for record/replay,
>> because they break correct loadvm in deterministic mode.
> 
> 
> Virtio-net can be configured to work in tx timer mode. Do we need to 
> care about that?

What does it mean? This patch fixes interaction with TX timer. Is it 
related to that mode?

> 
> 
>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>> ---
>>   hw/net/virtio-net.c |   13 +++++++++----
>>   1 file changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index 6b7e8dd04e..e876363236 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -44,6 +44,7 @@
>>   #include "hw/pci/pci.h"
>>   #include "net_rx_pkt.h"
>>   #include "hw/virtio/vhost.h"
>> +#include "sysemu/replay.h"
>>   #define VIRTIO_NET_VM_VERSION    11
>> @@ -394,7 +395,7 @@ static void virtio_net_set_status(struct 
>> VirtIODevice *vdev, uint8_t status)
>>                   timer_mod(q->tx_timer,
>>                                  qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) 
>> + n->tx_timeout);
>>               } else {
>> -                qemu_bh_schedule(q->tx_bh);
>> +                replay_bh_schedule_event(q->tx_bh);
>>               }
>>           } else {
>>               if (q->tx_timer) {
>> @@ -2546,7 +2547,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice 
>> *vdev, VirtQueue *vq)
>>           return;
>>       }
>>       virtio_queue_set_notification(vq, 0);
>> -    qemu_bh_schedule(q->tx_bh);
>> +    replay_bh_schedule_event(q->tx_bh);
> 
> 
> Not familiar with replay but any chance to change qemu_bh_schedule() 
> instead?

It would be better, but sometimes qemu_bh_schedule is used for the 
callbacks that are not related to the guest state change.

> 
> Thanks
> 
> 
>>   }
>>   static void virtio_net_tx_timer(void *opaque)
>> @@ -2602,7 +2603,7 @@ static void virtio_net_tx_bh(void *opaque)
>>       /* If we flush a full burst of packets, assume there are
>>        * more coming and immediately reschedule */
>>       if (ret >= n->tx_burst) {
>> -        qemu_bh_schedule(q->tx_bh);
>> +        replay_bh_schedule_event(q->tx_bh);
>>           q->tx_waiting = 1;
>>           return;
>>       }
>> @@ -2616,7 +2617,7 @@ static void virtio_net_tx_bh(void *opaque)
>>           return;
>>       } else if (ret > 0) {
>>           virtio_queue_set_notification(q->tx_vq, 0);
>> -        qemu_bh_schedule(q->tx_bh);
>> +        replay_bh_schedule_event(q->tx_bh);
>>           q->tx_waiting = 1;
>>       }
>>   }
>> @@ -3206,6 +3207,10 @@ static void 
>> virtio_net_device_realize(DeviceState *dev, Error **errp)
>>           n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
>>       }
>> +    if (replay_mode != REPLAY_MODE_NONE) {
>> +        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
>> +    }
>> +
>>       if (n->net_conf.duplex_str) {
>>           if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
>>               n->net_conf.duplex = DUPLEX_HALF;
>>
>>
>
Jason Wang May 31, 2021, 6:39 a.m. UTC | #4
在 2021/5/31 下午2:35, Pavel Dovgalyuk 写道:
> On 31.05.2021 07:55, Jason Wang wrote:
>>
>> 在 2021/5/17 下午9:04, Pavel Dovgalyuk 写道:
>>> virtio-net device uses bottom halves for callbacks.
>>> These callbacks should be deterministic, because they affect VM state.
>>> This patch replaces BH invocations with corresponding replay functions,
>>> making them deterministic in record/replay mode.
>>> This patch also disables guest announce timers for record/replay,
>>> because they break correct loadvm in deterministic mode.
>>
>>
>> Virtio-net can be configured to work in tx timer mode. Do we need to 
>> care about that?
>
> What does it mean? This patch fixes interaction with TX timer. Is it 
> related to that mode?


I meant is the timer used by virtio_net_handle_tx_timer() safe consider 
you disable announce timer.

Thanks


>
>>
>>
>>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>>> ---
>>>   hw/net/virtio-net.c |   13 +++++++++----
>>>   1 file changed, 9 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>>> index 6b7e8dd04e..e876363236 100644
>>> --- a/hw/net/virtio-net.c
>>> +++ b/hw/net/virtio-net.c
>>> @@ -44,6 +44,7 @@
>>>   #include "hw/pci/pci.h"
>>>   #include "net_rx_pkt.h"
>>>   #include "hw/virtio/vhost.h"
>>> +#include "sysemu/replay.h"
>>>   #define VIRTIO_NET_VM_VERSION    11
>>> @@ -394,7 +395,7 @@ static void virtio_net_set_status(struct 
>>> VirtIODevice *vdev, uint8_t status)
>>>                   timer_mod(q->tx_timer,
>>> qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>>>               } else {
>>> -                qemu_bh_schedule(q->tx_bh);
>>> +                replay_bh_schedule_event(q->tx_bh);
>>>               }
>>>           } else {
>>>               if (q->tx_timer) {
>>> @@ -2546,7 +2547,7 @@ static void 
>>> virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>>>           return;
>>>       }
>>>       virtio_queue_set_notification(vq, 0);
>>> -    qemu_bh_schedule(q->tx_bh);
>>> +    replay_bh_schedule_event(q->tx_bh);
>>
>>
>> Not familiar with replay but any chance to change qemu_bh_schedule() 
>> instead?
>
> It would be better, but sometimes qemu_bh_schedule is used for the 
> callbacks that are not related to the guest state change.
>
>>
>> Thanks
>>
>>
>>>   }
>>>   static void virtio_net_tx_timer(void *opaque)
>>> @@ -2602,7 +2603,7 @@ static void virtio_net_tx_bh(void *opaque)
>>>       /* If we flush a full burst of packets, assume there are
>>>        * more coming and immediately reschedule */
>>>       if (ret >= n->tx_burst) {
>>> -        qemu_bh_schedule(q->tx_bh);
>>> +        replay_bh_schedule_event(q->tx_bh);
>>>           q->tx_waiting = 1;
>>>           return;
>>>       }
>>> @@ -2616,7 +2617,7 @@ static void virtio_net_tx_bh(void *opaque)
>>>           return;
>>>       } else if (ret > 0) {
>>>           virtio_queue_set_notification(q->tx_vq, 0);
>>> -        qemu_bh_schedule(q->tx_bh);
>>> +        replay_bh_schedule_event(q->tx_bh);
>>>           q->tx_waiting = 1;
>>>       }
>>>   }
>>> @@ -3206,6 +3207,10 @@ static void 
>>> virtio_net_device_realize(DeviceState *dev, Error **errp)
>>>           n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
>>>       }
>>> +    if (replay_mode != REPLAY_MODE_NONE) {
>>> +        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
>>> +    }
>>> +
>>>       if (n->net_conf.duplex_str) {
>>>           if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
>>>               n->net_conf.duplex = DUPLEX_HALF;
>>>
>>>
>>
>
Pavel Dovgalyuk May 31, 2021, 8:47 a.m. UTC | #5
On 31.05.2021 09:39, Jason Wang wrote:
> 
> 在 2021/5/31 下午2:35, Pavel Dovgalyuk 写道:
>> On 31.05.2021 07:55, Jason Wang wrote:
>>>
>>> 在 2021/5/17 下午9:04, Pavel Dovgalyuk 写道:
>>>> virtio-net device uses bottom halves for callbacks.
>>>> These callbacks should be deterministic, because they affect VM state.
>>>> This patch replaces BH invocations with corresponding replay functions,
>>>> making them deterministic in record/replay mode.
>>>> This patch also disables guest announce timers for record/replay,
>>>> because they break correct loadvm in deterministic mode.
>>>
>>>
>>> Virtio-net can be configured to work in tx timer mode. Do we need to 
>>> care about that?
>>
>> What does it mean? This patch fixes interaction with TX timer. Is it 
>> related to that mode?
> 
> 
> I meant is the timer used by virtio_net_handle_tx_timer() safe consider 
> you disable announce timer.

I'm not sure that tx_timer is ok. It uses virtual time, but is not saved 
in vmstate.

> 
> Thanks
> 
> 
>>
>>>
>>>
>>>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>>>> ---
>>>>   hw/net/virtio-net.c |   13 +++++++++----
>>>>   1 file changed, 9 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>>>> index 6b7e8dd04e..e876363236 100644
>>>> --- a/hw/net/virtio-net.c
>>>> +++ b/hw/net/virtio-net.c
>>>> @@ -44,6 +44,7 @@
>>>>   #include "hw/pci/pci.h"
>>>>   #include "net_rx_pkt.h"
>>>>   #include "hw/virtio/vhost.h"
>>>> +#include "sysemu/replay.h"
>>>>   #define VIRTIO_NET_VM_VERSION    11
>>>> @@ -394,7 +395,7 @@ static void virtio_net_set_status(struct 
>>>> VirtIODevice *vdev, uint8_t status)
>>>>                   timer_mod(q->tx_timer,
>>>> qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>>>>               } else {
>>>> -                qemu_bh_schedule(q->tx_bh);
>>>> +                replay_bh_schedule_event(q->tx_bh);
>>>>               }
>>>>           } else {
>>>>               if (q->tx_timer) {
>>>> @@ -2546,7 +2547,7 @@ static void 
>>>> virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>>>>           return;
>>>>       }
>>>>       virtio_queue_set_notification(vq, 0);
>>>> -    qemu_bh_schedule(q->tx_bh);
>>>> +    replay_bh_schedule_event(q->tx_bh);
>>>
>>>
>>> Not familiar with replay but any chance to change qemu_bh_schedule() 
>>> instead?
>>
>> It would be better, but sometimes qemu_bh_schedule is used for the 
>> callbacks that are not related to the guest state change.
>>
>>>
>>> Thanks
>>>
>>>
>>>>   }
>>>>   static void virtio_net_tx_timer(void *opaque)
>>>> @@ -2602,7 +2603,7 @@ static void virtio_net_tx_bh(void *opaque)
>>>>       /* If we flush a full burst of packets, assume there are
>>>>        * more coming and immediately reschedule */
>>>>       if (ret >= n->tx_burst) {
>>>> -        qemu_bh_schedule(q->tx_bh);
>>>> +        replay_bh_schedule_event(q->tx_bh);
>>>>           q->tx_waiting = 1;
>>>>           return;
>>>>       }
>>>> @@ -2616,7 +2617,7 @@ static void virtio_net_tx_bh(void *opaque)
>>>>           return;
>>>>       } else if (ret > 0) {
>>>>           virtio_queue_set_notification(q->tx_vq, 0);
>>>> -        qemu_bh_schedule(q->tx_bh);
>>>> +        replay_bh_schedule_event(q->tx_bh);
>>>>           q->tx_waiting = 1;
>>>>       }
>>>>   }
>>>> @@ -3206,6 +3207,10 @@ static void 
>>>> virtio_net_device_realize(DeviceState *dev, Error **errp)
>>>>           n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
>>>>       }
>>>> +    if (replay_mode != REPLAY_MODE_NONE) {
>>>> +        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
>>>> +    }
>>>> +
>>>>       if (n->net_conf.duplex_str) {
>>>>           if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
>>>>               n->net_conf.duplex = DUPLEX_HALF;
>>>>
>>>>
>>>
>>
>
Pavel Dovgalyuk June 1, 2021, 10:33 a.m. UTC | #6
On 31.05.2021 09:39, Jason Wang wrote:
> 
> 在 2021/5/31 下午2:35, Pavel Dovgalyuk 写道:
>> On 31.05.2021 07:55, Jason Wang wrote:
>>>
>>> 在 2021/5/17 下午9:04, Pavel Dovgalyuk 写道:
>>>> virtio-net device uses bottom halves for callbacks.
>>>> These callbacks should be deterministic, because they affect VM state.
>>>> This patch replaces BH invocations with corresponding replay functions,
>>>> making them deterministic in record/replay mode.
>>>> This patch also disables guest announce timers for record/replay,
>>>> because they break correct loadvm in deterministic mode.
>>>
>>>
>>> Virtio-net can be configured to work in tx timer mode. Do we need to 
>>> care about that?
>>
>> What does it mean? This patch fixes interaction with TX timer. Is it 
>> related to that mode?
> 
> 
> I meant is the timer used by virtio_net_handle_tx_timer() safe consider 
> you disable announce timer.

It is related to virtio queue. I looked through it and it looks safe to me.

> 
> Thanks
> 
> 
>>
>>>
>>>
>>>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>>>> ---
>>>>   hw/net/virtio-net.c |   13 +++++++++----
>>>>   1 file changed, 9 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>>>> index 6b7e8dd04e..e876363236 100644
>>>> --- a/hw/net/virtio-net.c
>>>> +++ b/hw/net/virtio-net.c
>>>> @@ -44,6 +44,7 @@
>>>>   #include "hw/pci/pci.h"
>>>>   #include "net_rx_pkt.h"
>>>>   #include "hw/virtio/vhost.h"
>>>> +#include "sysemu/replay.h"
>>>>   #define VIRTIO_NET_VM_VERSION    11
>>>> @@ -394,7 +395,7 @@ static void virtio_net_set_status(struct 
>>>> VirtIODevice *vdev, uint8_t status)
>>>>                   timer_mod(q->tx_timer,
>>>> qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>>>>               } else {
>>>> -                qemu_bh_schedule(q->tx_bh);
>>>> +                replay_bh_schedule_event(q->tx_bh);
>>>>               }
>>>>           } else {
>>>>               if (q->tx_timer) {
>>>> @@ -2546,7 +2547,7 @@ static void 
>>>> virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>>>>           return;
>>>>       }
>>>>       virtio_queue_set_notification(vq, 0);
>>>> -    qemu_bh_schedule(q->tx_bh);
>>>> +    replay_bh_schedule_event(q->tx_bh);
>>>
>>>
>>> Not familiar with replay but any chance to change qemu_bh_schedule() 
>>> instead?
>>
>> It would be better, but sometimes qemu_bh_schedule is used for the 
>> callbacks that are not related to the guest state change.
>>
>>>
>>> Thanks
>>>
>>>
>>>>   }
>>>>   static void virtio_net_tx_timer(void *opaque)
>>>> @@ -2602,7 +2603,7 @@ static void virtio_net_tx_bh(void *opaque)
>>>>       /* If we flush a full burst of packets, assume there are
>>>>        * more coming and immediately reschedule */
>>>>       if (ret >= n->tx_burst) {
>>>> -        qemu_bh_schedule(q->tx_bh);
>>>> +        replay_bh_schedule_event(q->tx_bh);
>>>>           q->tx_waiting = 1;
>>>>           return;
>>>>       }
>>>> @@ -2616,7 +2617,7 @@ static void virtio_net_tx_bh(void *opaque)
>>>>           return;
>>>>       } else if (ret > 0) {
>>>>           virtio_queue_set_notification(q->tx_vq, 0);
>>>> -        qemu_bh_schedule(q->tx_bh);
>>>> +        replay_bh_schedule_event(q->tx_bh);
>>>>           q->tx_waiting = 1;
>>>>       }
>>>>   }
>>>> @@ -3206,6 +3207,10 @@ static void 
>>>> virtio_net_device_realize(DeviceState *dev, Error **errp)
>>>>           n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
>>>>       }
>>>> +    if (replay_mode != REPLAY_MODE_NONE) {
>>>> +        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
>>>> +    }
>>>> +
>>>>       if (n->net_conf.duplex_str) {
>>>>           if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
>>>>               n->net_conf.duplex = DUPLEX_HALF;
>>>>
>>>>
>>>
>>
>
Michael S. Tsirkin July 2, 2021, 3:11 p.m. UTC | #7
On Mon, May 17, 2021 at 04:04:20PM +0300, Pavel Dovgalyuk wrote:
> virtio-net device uses bottom halves for callbacks.
> These callbacks should be deterministic, because they affect VM state.
> This patch replaces BH invocations with corresponding replay functions,
> making them deterministic in record/replay mode.
> This patch also disables guest announce timers for record/replay,
> because they break correct loadvm in deterministic mode.
> 
> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>

Seems to make sense but clearly Jason's area.
Jason?


> ---
>  hw/net/virtio-net.c |   13 +++++++++----
>  1 file changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 6b7e8dd04e..e876363236 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -44,6 +44,7 @@
>  #include "hw/pci/pci.h"
>  #include "net_rx_pkt.h"
>  #include "hw/virtio/vhost.h"
> +#include "sysemu/replay.h"
>  
>  #define VIRTIO_NET_VM_VERSION    11
>  
> @@ -394,7 +395,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
>                  timer_mod(q->tx_timer,
>                                 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
>              } else {
> -                qemu_bh_schedule(q->tx_bh);
> +                replay_bh_schedule_event(q->tx_bh);
>              }
>          } else {
>              if (q->tx_timer) {
> @@ -2546,7 +2547,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>          return;
>      }
>      virtio_queue_set_notification(vq, 0);
> -    qemu_bh_schedule(q->tx_bh);
> +    replay_bh_schedule_event(q->tx_bh);
>  }
>  
>  static void virtio_net_tx_timer(void *opaque)
> @@ -2602,7 +2603,7 @@ static void virtio_net_tx_bh(void *opaque)
>      /* If we flush a full burst of packets, assume there are
>       * more coming and immediately reschedule */
>      if (ret >= n->tx_burst) {
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>          q->tx_waiting = 1;
>          return;
>      }
> @@ -2616,7 +2617,7 @@ static void virtio_net_tx_bh(void *opaque)
>          return;
>      } else if (ret > 0) {
>          virtio_queue_set_notification(q->tx_vq, 0);
> -        qemu_bh_schedule(q->tx_bh);
> +        replay_bh_schedule_event(q->tx_bh);
>          q->tx_waiting = 1;
>      }
>  }
> @@ -3206,6 +3207,10 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
>          n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
>      }
>  
> +    if (replay_mode != REPLAY_MODE_NONE) {
> +        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
> +    }
> +
>      if (n->net_conf.duplex_str) {
>          if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
>              n->net_conf.duplex = DUPLEX_HALF;
Philippe Mathieu-Daudé July 2, 2021, 3:22 p.m. UTC | #8
On 5/17/21 3:04 PM, Pavel Dovgalyuk wrote:
> virtio-net device uses bottom halves for callbacks.
> These callbacks should be deterministic, because they affect VM state.
> This patch replaces BH invocations with corresponding replay functions,
> making them deterministic in record/replay mode.

^ This is one change which I'm OK to give a R-b tag.

> This patch also disables guest announce timers for record/replay,
> because they break correct loadvm in deterministic mode.

^ This is another change, can you keep it separately? This
would help in case something need to be reverted.

> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> ---
>  hw/net/virtio-net.c |   13 +++++++++----
>  1 file changed, 9 insertions(+), 4 deletions(-)
Alex Bennée Oct. 20, 2021, 1:40 p.m. UTC | #9
Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:

> On 31.05.2021 07:55, Jason Wang wrote:
>> 在 2021/5/17 下午9:04, Pavel Dovgalyuk 写道:
<snip>
>>> @@ -2546,7 +2547,7 @@ static void
>>> virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
>>>           return;
>>>       }
>>>       virtio_queue_set_notification(vq, 0);
>>> -    qemu_bh_schedule(q->tx_bh);
>>> +    replay_bh_schedule_event(q->tx_bh);
>> Not familiar with replay but any chance to change qemu_bh_schedule()
>> instead?
>
> It would be better, but sometimes qemu_bh_schedule is used for the
> callbacks that are not related to the guest state change.

Maybe that indicates we should expand the API:

  void qemu_bh_schedule(QEMUBH *bh, bool guest_state);

or maybe explicit functions:

  void qemu_bh_schedule(QEMUBH *bh);
  void qemu_bh_schedule_guest_update(QEMUBH *bh);

And document the cases with proper prototypes in main-loop.h.

While I was poking around I also saw qemu_bh_schedule_idle which made me
wonder what happens if a guest change is triggered by this. Would this
be impossible to make deterministic because we don't know when the bh
actually get activated?

My concern with just adding replay_bh_schedule_event in the appropriate
places is we end up with a patchwork of support for different devices
and make it very easy to break things.
Michael S. Tsirkin Oct. 20, 2021, 2:50 p.m. UTC | #10
On Wed, Oct 20, 2021 at 02:40:18PM +0100, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
> 
> > On 31.05.2021 07:55, Jason Wang wrote:
> >> 在 2021/5/17 下午9:04, Pavel Dovgalyuk 写道:
> <snip>
> >>> @@ -2546,7 +2547,7 @@ static void
> >>> virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
> >>>           return;
> >>>       }
> >>>       virtio_queue_set_notification(vq, 0);
> >>> -    qemu_bh_schedule(q->tx_bh);
> >>> +    replay_bh_schedule_event(q->tx_bh);
> >> Not familiar with replay but any chance to change qemu_bh_schedule()
> >> instead?
> >
> > It would be better, but sometimes qemu_bh_schedule is used for the
> > callbacks that are not related to the guest state change.
> 
> Maybe that indicates we should expand the API:
> 
>   void qemu_bh_schedule(QEMUBH *bh, bool guest_state);
> 
> or maybe explicit functions:
> 
>   void qemu_bh_schedule(QEMUBH *bh);
>   void qemu_bh_schedule_guest_update(QEMUBH *bh);
> 
> And document the cases with proper prototypes in main-loop.h.
> 
> While I was poking around I also saw qemu_bh_schedule_idle which made me
> wonder what happens if a guest change is triggered by this. Would this
> be impossible to make deterministic because we don't know when the bh
> actually get activated?
> 
> My concern with just adding replay_bh_schedule_event in the appropriate
> places is we end up with a patchwork of support for different devices
> and make it very easy to break things.

Right. In fact I think the default should be guest update,
and some kind of new API to be used for things that are
not guest visible.

We really need static analysis to enforce this kind of
constraint, too.


> -- 
> Alex Bennée
diff mbox series

Patch

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 6b7e8dd04e..e876363236 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -44,6 +44,7 @@ 
 #include "hw/pci/pci.h"
 #include "net_rx_pkt.h"
 #include "hw/virtio/vhost.h"
+#include "sysemu/replay.h"
 
 #define VIRTIO_NET_VM_VERSION    11
 
@@ -394,7 +395,7 @@  static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
                 timer_mod(q->tx_timer,
                                qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
             } else {
-                qemu_bh_schedule(q->tx_bh);
+                replay_bh_schedule_event(q->tx_bh);
             }
         } else {
             if (q->tx_timer) {
@@ -2546,7 +2547,7 @@  static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
         return;
     }
     virtio_queue_set_notification(vq, 0);
-    qemu_bh_schedule(q->tx_bh);
+    replay_bh_schedule_event(q->tx_bh);
 }
 
 static void virtio_net_tx_timer(void *opaque)
@@ -2602,7 +2603,7 @@  static void virtio_net_tx_bh(void *opaque)
     /* If we flush a full burst of packets, assume there are
      * more coming and immediately reschedule */
     if (ret >= n->tx_burst) {
-        qemu_bh_schedule(q->tx_bh);
+        replay_bh_schedule_event(q->tx_bh);
         q->tx_waiting = 1;
         return;
     }
@@ -2616,7 +2617,7 @@  static void virtio_net_tx_bh(void *opaque)
         return;
     } else if (ret > 0) {
         virtio_queue_set_notification(q->tx_vq, 0);
-        qemu_bh_schedule(q->tx_bh);
+        replay_bh_schedule_event(q->tx_bh);
         q->tx_waiting = 1;
     }
 }
@@ -3206,6 +3207,10 @@  static void virtio_net_device_realize(DeviceState *dev, Error **errp)
         n->host_features |= (1ULL << VIRTIO_NET_F_MTU);
     }
 
+    if (replay_mode != REPLAY_MODE_NONE) {
+        n->host_features &= ~(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE);
+    }
+
     if (n->net_conf.duplex_str) {
         if (strncmp(n->net_conf.duplex_str, "half", 5) == 0) {
             n->net_conf.duplex = DUPLEX_HALF;