diff mbox series

[v3,09/15] replay: implement replay-seek command

Message ID 159903459923.28509.4300111201059622860.stgit@pasha-ThinkPad-X280
State New
Headers show
Series Reverse debugging | expand

Commit Message

Pavel Dovgalyuk Sept. 2, 2020, 8:16 a.m. UTC
From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>

This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
the execution to the specified instruction count.
The command automatically loads nearest snapshot and replays the execution
to find the desired instruction count.

Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
Acked-by: Markus Armbruster <armbru@redhat.com>
---
 hmp-commands.hx           |   18 +++++++++
 include/monitor/hmp.h     |    1 
 qapi/replay.json          |   20 ++++++++++
 replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 131 insertions(+)

Comments

Alex Bennée Sept. 7, 2020, 12:45 p.m. UTC | #1
Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:

> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>
> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
> the execution to the specified instruction count.
> The command automatically loads nearest snapshot and replays the execution
> to find the desired instruction count.
>
> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> Acked-by: Markus Armbruster <armbru@redhat.com>
> ---
>  hmp-commands.hx           |   18 +++++++++
>  include/monitor/hmp.h     |    1 
>  qapi/replay.json          |   20 ++++++++++
>  replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 131 insertions(+)
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index e8ce385879..4288274c4e 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1851,6 +1851,24 @@ SRST
>    The command is ignored when there are no replay breakpoints.
>  ERST
>  
> +    {
> +        .name       = "replay_seek",
> +        .args_type  = "icount:i",
> +        .params     = "icount",
> +        .help       = "replay execution to the specified instruction count",
> +        .cmd        = hmp_replay_seek,
> +    },
> +
> +SRST
> +``replay_seek`` *icount*
> +Automatically proceed to the instruction count *icount*, when
> +replaying the execution. The command automatically loads nearest
> +snapshot and replays the execution to find the desired instruction.
> +When there is no preceding snapshot or the execution is not replayed,
> +then the command fails.
> +*icount* for the reference may be observed with ``info replay`` command.
> +ERST
> +
>      {
>          .name       = "info",
>          .args_type  = "item:s?",


This seems to break the build:

  Warning, treated as error:
  /home/alex/lsrc/qemu.git/docs/../hmp-commands.hx:1863:Definition list ends without a blank line; unexpected unindent.
Alex Bennée Sept. 7, 2020, 12:58 p.m. UTC | #2
Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:

> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>
> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
> the execution to the specified instruction count.
> The command automatically loads nearest snapshot and replays the execution
> to find the desired instruction count.

Should there be an initial snapshot created at instruction 0? Using a
separate monitor channel:

  (qemu) replay_break 190505
  replay_break 190505
  (qemu) c
  (qemu) info replay
  info replay
  Replaying execution 'record.out': instruction count = 190505
  (qemu) replay_seek 190000
  replay_seek 190000
  snapshotting is disabled

And then the guest dies with a sigabort:

  ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out -drive file=record.qcow2,if=none,snapshot,id=rr -monitor telnet:127.0.0.1:4444 -S
  *** Booting Zephyr OS build zephyr-v2.3.0-1183-ge5628ad0faf3  ***
  Hello World! qemu_cortex_a53
  double free or corruption (out)
  fish: “./qemu-system-aarch64 -cpu cort…” terminated by signal SIGABRT (Abort)

>
> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
> Acked-by: Markus Armbruster <armbru@redhat.com>
> ---
>  hmp-commands.hx           |   18 +++++++++
>  include/monitor/hmp.h     |    1 
>  qapi/replay.json          |   20 ++++++++++
>  replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 131 insertions(+)
>
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index e8ce385879..4288274c4e 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1851,6 +1851,24 @@ SRST
>    The command is ignored when there are no replay breakpoints.
>  ERST
>  
> +    {
> +        .name       = "replay_seek",
> +        .args_type  = "icount:i",
> +        .params     = "icount",
> +        .help       = "replay execution to the specified instruction count",
> +        .cmd        = hmp_replay_seek,
> +    },
> +
> +SRST
> +``replay_seek`` *icount*
> +Automatically proceed to the instruction count *icount*, when
> +replaying the execution. The command automatically loads nearest
> +snapshot and replays the execution to find the desired instruction.
> +When there is no preceding snapshot or the execution is not replayed,
> +then the command fails.
> +*icount* for the reference may be observed with ``info replay`` command.
> +ERST
> +
>      {
>          .name       = "info",
>          .args_type  = "item:s?",
> diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
> index 21849bdda5..655eb81a4c 100644
> --- a/include/monitor/hmp.h
> +++ b/include/monitor/hmp.h
> @@ -133,5 +133,6 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict);
>  void hmp_info_replay(Monitor *mon, const QDict *qdict);
>  void hmp_replay_break(Monitor *mon, const QDict *qdict);
>  void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
> +void hmp_replay_seek(Monitor *mon, const QDict *qdict);
>  
>  #endif
> diff --git a/qapi/replay.json b/qapi/replay.json
> index 173ba76107..bfd83d7591 100644
> --- a/qapi/replay.json
> +++ b/qapi/replay.json
> @@ -99,3 +99,23 @@
>  #
>  ##
>  { 'command': 'replay-delete-break' }
> +
> +##
> +# @replay-seek:
> +#
> +# Automatically proceed to the instruction count @icount, when
> +# replaying the execution. The command automatically loads nearest
> +# snapshot and replays the execution to find the desired instruction.
> +# When there is no preceding snapshot or the execution is not replayed,
> +# then the command fails.
> +# icount for the reference may be obtained with @query-replay command.
> +#
> +# @icount: target instruction count
> +#
> +# Since: 5.2
> +#
> +# Example:
> +#
> +# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
> +##
> +{ 'command': 'replay-seek', 'data': { 'icount': 'int' } }
> diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
> index 86e19bb217..cfd0221692 100644
> --- a/replay/replay-debugging.c
> +++ b/replay/replay-debugging.c
> @@ -19,6 +19,8 @@
>  #include "qapi/qapi-commands-replay.h"
>  #include "qapi/qmp/qdict.h"
>  #include "qemu/timer.h"
> +#include "block/snapshot.h"
> +#include "migration/snapshot.h"
>  
>  void hmp_info_replay(Monitor *mon, const QDict *qdict)
>  {
> @@ -127,3 +129,93 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict)
>          return;
>      }
>  }
> +
> +static char *replay_find_nearest_snapshot(int64_t icount,
> +                                          int64_t *snapshot_icount)
> +{
> +    BlockDriverState *bs;
> +    QEMUSnapshotInfo *sn_tab;
> +    QEMUSnapshotInfo *nearest = NULL;
> +    char *ret = NULL;
> +    int nb_sns, i;
> +    AioContext *aio_context;
> +
> +    *snapshot_icount = -1;
> +
> +    bs = bdrv_all_find_vmstate_bs();
> +    if (!bs) {
> +        goto fail;
> +    }
> +    aio_context = bdrv_get_aio_context(bs);
> +
> +    aio_context_acquire(aio_context);
> +    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
> +    aio_context_release(aio_context);
> +
> +    for (i = 0; i < nb_sns; i++) {
> +        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) {
> +            if (sn_tab[i].icount != -1ULL
> +                && sn_tab[i].icount <= icount
> +                && (!nearest || nearest->icount < sn_tab[i].icount)) {
> +                nearest = &sn_tab[i];
> +            }
> +        }
> +    }
> +    if (nearest) {
> +        ret = g_strdup(nearest->name);
> +        *snapshot_icount = nearest->icount;
> +    }
> +    g_free(sn_tab);
> +
> +fail:
> +    return ret;
> +}
> +
> +static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp)
> +{
> +    char *snapshot = NULL;
> +    int64_t snapshot_icount;
> +
> +    if (replay_mode != REPLAY_MODE_PLAY) {
> +        error_setg(errp, "replay must be enabled to seek");
> +        return;
> +    }
> +    if (!replay_snapshot) {
> +        error_setg(errp, "snapshotting is disabled");
> +        return;
> +    }
> +
> +    snapshot = replay_find_nearest_snapshot(icount, &snapshot_icount);
> +    if (snapshot) {
> +        if (icount < replay_get_current_icount()
> +            || replay_get_current_icount() < snapshot_icount) {
> +            vm_stop(RUN_STATE_RESTORE_VM);
> +            load_snapshot(snapshot, errp);
> +        }
> +        g_free(snapshot);
> +    }
> +    if (replay_get_current_icount() <= icount) {
> +        replay_break(icount, callback, NULL);
> +        vm_start();
> +    } else {
> +        error_setg(errp, "cannot seek to the specified instruction count");
> +    }
> +}
> +
> +void qmp_replay_seek(int64_t icount, Error **errp)
> +{
> +    replay_seek(icount, replay_stop_vm, errp);
> +}
> +
> +void hmp_replay_seek(Monitor *mon, const QDict *qdict)
> +{
> +    int64_t icount = qdict_get_try_int(qdict, "icount", -1LL);
> +    Error *err = NULL;
> +
> +    qmp_replay_seek(icount, &err);
> +    if (err) {
> +        error_report_err(err);
> +        error_free(err);
> +        return;
> +    }
> +}
Pavel Dovgalyuk Sept. 7, 2020, 1:27 p.m. UTC | #3
On 07.09.2020 15:58, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
> 
>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>
>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>> the execution to the specified instruction count.
>> The command automatically loads nearest snapshot and replays the execution
>> to find the desired instruction count.
> 
> Should there be an initial snapshot created at instruction 0? Using a
> separate monitor channel:

Right, you can't go to the prior state, when there is no preceding 
snapshot available.

> 
>    (qemu) replay_break 190505
>    replay_break 190505
>    (qemu) c
>    (qemu) info replay
>    info replay
>    Replaying execution 'record.out': instruction count = 190505
>    (qemu) replay_seek 190000
>    replay_seek 190000
>    snapshotting is disabled
> 
> And then the guest dies with a sigabort:

This could be a bug, thanks.

> 
>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out -drive file=record.qcow2,if=none,snapshot,id=rr -monitor telnet:127.0.0.1:4444 -S
>    *** Booting Zephyr OS build zephyr-v2.3.0-1183-ge5628ad0faf3  ***
>    Hello World! qemu_cortex_a53
>    double free or corruption (out)
>    fish: “./qemu-system-aarch64 -cpu cort…” terminated by signal SIGABRT (Abort)
> 
>>
>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>> Acked-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>   hmp-commands.hx           |   18 +++++++++
>>   include/monitor/hmp.h     |    1
>>   qapi/replay.json          |   20 ++++++++++
>>   replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 131 insertions(+)
>>
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index e8ce385879..4288274c4e 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -1851,6 +1851,24 @@ SRST
>>     The command is ignored when there are no replay breakpoints.
>>   ERST
>>   
>> +    {
>> +        .name       = "replay_seek",
>> +        .args_type  = "icount:i",
>> +        .params     = "icount",
>> +        .help       = "replay execution to the specified instruction count",
>> +        .cmd        = hmp_replay_seek,
>> +    },
>> +
>> +SRST
>> +``replay_seek`` *icount*
>> +Automatically proceed to the instruction count *icount*, when
>> +replaying the execution. The command automatically loads nearest
>> +snapshot and replays the execution to find the desired instruction.
>> +When there is no preceding snapshot or the execution is not replayed,
>> +then the command fails.
>> +*icount* for the reference may be observed with ``info replay`` command.
>> +ERST
>> +
>>       {
>>           .name       = "info",
>>           .args_type  = "item:s?",
>> diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
>> index 21849bdda5..655eb81a4c 100644
>> --- a/include/monitor/hmp.h
>> +++ b/include/monitor/hmp.h
>> @@ -133,5 +133,6 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict);
>>   void hmp_info_replay(Monitor *mon, const QDict *qdict);
>>   void hmp_replay_break(Monitor *mon, const QDict *qdict);
>>   void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict);
>>   
>>   #endif
>> diff --git a/qapi/replay.json b/qapi/replay.json
>> index 173ba76107..bfd83d7591 100644
>> --- a/qapi/replay.json
>> +++ b/qapi/replay.json
>> @@ -99,3 +99,23 @@
>>   #
>>   ##
>>   { 'command': 'replay-delete-break' }
>> +
>> +##
>> +# @replay-seek:
>> +#
>> +# Automatically proceed to the instruction count @icount, when
>> +# replaying the execution. The command automatically loads nearest
>> +# snapshot and replays the execution to find the desired instruction.
>> +# When there is no preceding snapshot or the execution is not replayed,
>> +# then the command fails.
>> +# icount for the reference may be obtained with @query-replay command.
>> +#
>> +# @icount: target instruction count
>> +#
>> +# Since: 5.2
>> +#
>> +# Example:
>> +#
>> +# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
>> +##
>> +{ 'command': 'replay-seek', 'data': { 'icount': 'int' } }
>> diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
>> index 86e19bb217..cfd0221692 100644
>> --- a/replay/replay-debugging.c
>> +++ b/replay/replay-debugging.c
>> @@ -19,6 +19,8 @@
>>   #include "qapi/qapi-commands-replay.h"
>>   #include "qapi/qmp/qdict.h"
>>   #include "qemu/timer.h"
>> +#include "block/snapshot.h"
>> +#include "migration/snapshot.h"
>>   
>>   void hmp_info_replay(Monitor *mon, const QDict *qdict)
>>   {
>> @@ -127,3 +129,93 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict)
>>           return;
>>       }
>>   }
>> +
>> +static char *replay_find_nearest_snapshot(int64_t icount,
>> +                                          int64_t *snapshot_icount)
>> +{
>> +    BlockDriverState *bs;
>> +    QEMUSnapshotInfo *sn_tab;
>> +    QEMUSnapshotInfo *nearest = NULL;
>> +    char *ret = NULL;
>> +    int nb_sns, i;
>> +    AioContext *aio_context;
>> +
>> +    *snapshot_icount = -1;
>> +
>> +    bs = bdrv_all_find_vmstate_bs();
>> +    if (!bs) {
>> +        goto fail;
>> +    }
>> +    aio_context = bdrv_get_aio_context(bs);
>> +
>> +    aio_context_acquire(aio_context);
>> +    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
>> +    aio_context_release(aio_context);
>> +
>> +    for (i = 0; i < nb_sns; i++) {
>> +        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) {
>> +            if (sn_tab[i].icount != -1ULL
>> +                && sn_tab[i].icount <= icount
>> +                && (!nearest || nearest->icount < sn_tab[i].icount)) {
>> +                nearest = &sn_tab[i];
>> +            }
>> +        }
>> +    }
>> +    if (nearest) {
>> +        ret = g_strdup(nearest->name);
>> +        *snapshot_icount = nearest->icount;
>> +    }
>> +    g_free(sn_tab);
>> +
>> +fail:
>> +    return ret;
>> +}
>> +
>> +static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp)
>> +{
>> +    char *snapshot = NULL;
>> +    int64_t snapshot_icount;
>> +
>> +    if (replay_mode != REPLAY_MODE_PLAY) {
>> +        error_setg(errp, "replay must be enabled to seek");
>> +        return;
>> +    }
>> +    if (!replay_snapshot) {
>> +        error_setg(errp, "snapshotting is disabled");
>> +        return;
>> +    }
>> +
>> +    snapshot = replay_find_nearest_snapshot(icount, &snapshot_icount);
>> +    if (snapshot) {
>> +        if (icount < replay_get_current_icount()
>> +            || replay_get_current_icount() < snapshot_icount) {
>> +            vm_stop(RUN_STATE_RESTORE_VM);
>> +            load_snapshot(snapshot, errp);
>> +        }
>> +        g_free(snapshot);
>> +    }
>> +    if (replay_get_current_icount() <= icount) {
>> +        replay_break(icount, callback, NULL);
>> +        vm_start();
>> +    } else {
>> +        error_setg(errp, "cannot seek to the specified instruction count");
>> +    }
>> +}
>> +
>> +void qmp_replay_seek(int64_t icount, Error **errp)
>> +{
>> +    replay_seek(icount, replay_stop_vm, errp);
>> +}
>> +
>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict)
>> +{
>> +    int64_t icount = qdict_get_try_int(qdict, "icount", -1LL);
>> +    Error *err = NULL;
>> +
>> +    qmp_replay_seek(icount, &err);
>> +    if (err) {
>> +        error_report_err(err);
>> +        error_free(err);
>> +        return;
>> +    }
>> +}
> 
>
Pavel Dovgalyuk Sept. 7, 2020, 1:32 p.m. UTC | #4
On 07.09.2020 15:45, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
> 
>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>
>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>> the execution to the specified instruction count.
>> The command automatically loads nearest snapshot and replays the execution
>> to find the desired instruction count.
>>
>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>> Acked-by: Markus Armbruster <armbru@redhat.com>
>> ---
>>   hmp-commands.hx           |   18 +++++++++
>>   include/monitor/hmp.h     |    1
>>   qapi/replay.json          |   20 ++++++++++
>>   replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 131 insertions(+)
>>
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index e8ce385879..4288274c4e 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -1851,6 +1851,24 @@ SRST
>>     The command is ignored when there are no replay breakpoints.
>>   ERST
>>   
>> +    {
>> +        .name       = "replay_seek",
>> +        .args_type  = "icount:i",
>> +        .params     = "icount",
>> +        .help       = "replay execution to the specified instruction count",
>> +        .cmd        = hmp_replay_seek,
>> +    },
>> +
>> +SRST
>> +``replay_seek`` *icount*
>> +Automatically proceed to the instruction count *icount*, when
>> +replaying the execution. The command automatically loads nearest
>> +snapshot and replays the execution to find the desired instruction.
>> +When there is no preceding snapshot or the execution is not replayed,
>> +then the command fails.
>> +*icount* for the reference may be observed with ``info replay`` command.
>> +ERST
>> +
>>       {
>>           .name       = "info",
>>           .args_type  = "item:s?",
> 
> 
> This seems to break the build:
> 
>    Warning, treated as error:
>    /home/alex/lsrc/qemu.git/docs/../hmp-commands.hx:1863:Definition list ends without a blank line; unexpected unindent.

Thanks, I've added an indent.


Pavel Dovgalyuk
Alex Bennée Sept. 7, 2020, 2:59 p.m. UTC | #5
Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:

> On 07.09.2020 15:58, Alex Bennée wrote:
>> 
>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>> 
>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>
>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>> the execution to the specified instruction count.
>>> The command automatically loads nearest snapshot and replays the execution
>>> to find the desired instruction count.
>> 
>> Should there be an initial snapshot created at instruction 0? Using a
>> separate monitor channel:
>
> Right, you can't go to the prior state, when there is no preceding 
> snapshot available.

It seems creating an initial snapshot automatically would be more user
friendly? What can you do to trigger a snapshot, say for example on a
gdb connect?

>
>> 
>>    (qemu) replay_break 190505
>>    replay_break 190505
>>    (qemu) c
>>    (qemu) info replay
>>    info replay
>>    Replaying execution 'record.out': instruction count = 190505
>>    (qemu) replay_seek 190000
>>    replay_seek 190000
>>    snapshotting is disabled
>> 
>> And then the guest dies with a sigabort:
>
> This could be a bug, thanks.
>
>> 
>>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out -drive file=record.qcow2,if=none,snapshot,id=rr -monitor telnet:127.0.0.1:4444 -S
>>    *** Booting Zephyr OS build zephyr-v2.3.0-1183-ge5628ad0faf3  ***
>>    Hello World! qemu_cortex_a53
>>    double free or corruption (out)
>>    fish: “./qemu-system-aarch64 -cpu cort…” terminated by signal SIGABRT (Abort)
>> 
>>>
>>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>> ---
>>>   hmp-commands.hx           |   18 +++++++++
>>>   include/monitor/hmp.h     |    1
>>>   qapi/replay.json          |   20 ++++++++++
>>>   replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
>>>   4 files changed, 131 insertions(+)
>>>
>>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>>> index e8ce385879..4288274c4e 100644
>>> --- a/hmp-commands.hx
>>> +++ b/hmp-commands.hx
>>> @@ -1851,6 +1851,24 @@ SRST
>>>     The command is ignored when there are no replay breakpoints.
>>>   ERST
>>>   
>>> +    {
>>> +        .name       = "replay_seek",
>>> +        .args_type  = "icount:i",
>>> +        .params     = "icount",
>>> +        .help       = "replay execution to the specified instruction count",
>>> +        .cmd        = hmp_replay_seek,
>>> +    },
>>> +
>>> +SRST
>>> +``replay_seek`` *icount*
>>> +Automatically proceed to the instruction count *icount*, when
>>> +replaying the execution. The command automatically loads nearest
>>> +snapshot and replays the execution to find the desired instruction.
>>> +When there is no preceding snapshot or the execution is not replayed,
>>> +then the command fails.
>>> +*icount* for the reference may be observed with ``info replay`` command.
>>> +ERST
>>> +
>>>       {
>>>           .name       = "info",
>>>           .args_type  = "item:s?",
>>> diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
>>> index 21849bdda5..655eb81a4c 100644
>>> --- a/include/monitor/hmp.h
>>> +++ b/include/monitor/hmp.h
>>> @@ -133,5 +133,6 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict);
>>>   void hmp_info_replay(Monitor *mon, const QDict *qdict);
>>>   void hmp_replay_break(Monitor *mon, const QDict *qdict);
>>>   void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
>>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict);
>>>   
>>>   #endif
>>> diff --git a/qapi/replay.json b/qapi/replay.json
>>> index 173ba76107..bfd83d7591 100644
>>> --- a/qapi/replay.json
>>> +++ b/qapi/replay.json
>>> @@ -99,3 +99,23 @@
>>>   #
>>>   ##
>>>   { 'command': 'replay-delete-break' }
>>> +
>>> +##
>>> +# @replay-seek:
>>> +#
>>> +# Automatically proceed to the instruction count @icount, when
>>> +# replaying the execution. The command automatically loads nearest
>>> +# snapshot and replays the execution to find the desired instruction.
>>> +# When there is no preceding snapshot or the execution is not replayed,
>>> +# then the command fails.
>>> +# icount for the reference may be obtained with @query-replay command.
>>> +#
>>> +# @icount: target instruction count
>>> +#
>>> +# Since: 5.2
>>> +#
>>> +# Example:
>>> +#
>>> +# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
>>> +##
>>> +{ 'command': 'replay-seek', 'data': { 'icount': 'int' } }
>>> diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
>>> index 86e19bb217..cfd0221692 100644
>>> --- a/replay/replay-debugging.c
>>> +++ b/replay/replay-debugging.c
>>> @@ -19,6 +19,8 @@
>>>   #include "qapi/qapi-commands-replay.h"
>>>   #include "qapi/qmp/qdict.h"
>>>   #include "qemu/timer.h"
>>> +#include "block/snapshot.h"
>>> +#include "migration/snapshot.h"
>>>   
>>>   void hmp_info_replay(Monitor *mon, const QDict *qdict)
>>>   {
>>> @@ -127,3 +129,93 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict)
>>>           return;
>>>       }
>>>   }
>>> +
>>> +static char *replay_find_nearest_snapshot(int64_t icount,
>>> +                                          int64_t *snapshot_icount)
>>> +{
>>> +    BlockDriverState *bs;
>>> +    QEMUSnapshotInfo *sn_tab;
>>> +    QEMUSnapshotInfo *nearest = NULL;
>>> +    char *ret = NULL;
>>> +    int nb_sns, i;
>>> +    AioContext *aio_context;
>>> +
>>> +    *snapshot_icount = -1;
>>> +
>>> +    bs = bdrv_all_find_vmstate_bs();
>>> +    if (!bs) {
>>> +        goto fail;
>>> +    }
>>> +    aio_context = bdrv_get_aio_context(bs);
>>> +
>>> +    aio_context_acquire(aio_context);
>>> +    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
>>> +    aio_context_release(aio_context);
>>> +
>>> +    for (i = 0; i < nb_sns; i++) {
>>> +        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) {
>>> +            if (sn_tab[i].icount != -1ULL
>>> +                && sn_tab[i].icount <= icount
>>> +                && (!nearest || nearest->icount < sn_tab[i].icount)) {
>>> +                nearest = &sn_tab[i];
>>> +            }
>>> +        }
>>> +    }
>>> +    if (nearest) {
>>> +        ret = g_strdup(nearest->name);
>>> +        *snapshot_icount = nearest->icount;
>>> +    }
>>> +    g_free(sn_tab);
>>> +
>>> +fail:
>>> +    return ret;
>>> +}
>>> +
>>> +static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp)
>>> +{
>>> +    char *snapshot = NULL;
>>> +    int64_t snapshot_icount;
>>> +
>>> +    if (replay_mode != REPLAY_MODE_PLAY) {
>>> +        error_setg(errp, "replay must be enabled to seek");
>>> +        return;
>>> +    }
>>> +    if (!replay_snapshot) {
>>> +        error_setg(errp, "snapshotting is disabled");
>>> +        return;
>>> +    }
>>> +
>>> +    snapshot = replay_find_nearest_snapshot(icount, &snapshot_icount);
>>> +    if (snapshot) {
>>> +        if (icount < replay_get_current_icount()
>>> +            || replay_get_current_icount() < snapshot_icount) {
>>> +            vm_stop(RUN_STATE_RESTORE_VM);
>>> +            load_snapshot(snapshot, errp);
>>> +        }
>>> +        g_free(snapshot);
>>> +    }
>>> +    if (replay_get_current_icount() <= icount) {
>>> +        replay_break(icount, callback, NULL);
>>> +        vm_start();
>>> +    } else {
>>> +        error_setg(errp, "cannot seek to the specified instruction count");
>>> +    }
>>> +}
>>> +
>>> +void qmp_replay_seek(int64_t icount, Error **errp)
>>> +{
>>> +    replay_seek(icount, replay_stop_vm, errp);
>>> +}
>>> +
>>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict)
>>> +{
>>> +    int64_t icount = qdict_get_try_int(qdict, "icount", -1LL);
>>> +    Error *err = NULL;
>>> +
>>> +    qmp_replay_seek(icount, &err);
>>> +    if (err) {
>>> +        error_report_err(err);
>>> +        error_free(err);
>>> +        return;
>>> +    }
>>> +}
>> 
>>
Pavel Dovgalyuk Sept. 7, 2020, 3:46 p.m. UTC | #6
On 07.09.2020 17:59, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
> 
>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>
>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>
>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>
>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>> the execution to the specified instruction count.
>>>> The command automatically loads nearest snapshot and replays the execution
>>>> to find the desired instruction count.
>>>
>>> Should there be an initial snapshot created at instruction 0? Using a
>>> separate monitor channel:
>>
>> Right, you can't go to the prior state, when there is no preceding
>> snapshot available.
> 
> It seems creating an initial snapshot automatically would be more user

Please take a look at 'Snapshotting' section of docs/replay.txt.
Reverse debugging is considered to be run with disk image (overlay)
and rrsnapshot option of icount, which allows creating an initial
VM snapshot.

> friendly? What can you do to trigger a snapshot, say for example on a
> gdb connect?

This makes sense when executing with temporary overlay, thanks.

> 
>>
>>>
>>>     (qemu) replay_break 190505
>>>     replay_break 190505
>>>     (qemu) c
>>>     (qemu) info replay
>>>     info replay
>>>     Replaying execution 'record.out': instruction count = 190505
>>>     (qemu) replay_seek 190000
>>>     replay_seek 190000
>>>     snapshotting is disabled
>>>
>>> And then the guest dies with a sigabort:
>>
>> This could be a bug, thanks.
>>
>>>
>>>     ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out -drive file=record.qcow2,if=none,snapshot,id=rr -monitor telnet:127.0.0.1:4444 -S
>>>     *** Booting Zephyr OS build zephyr-v2.3.0-1183-ge5628ad0faf3  ***
>>>     Hello World! qemu_cortex_a53
>>>     double free or corruption (out)
>>>     fish: “./qemu-system-aarch64 -cpu cort…” terminated by signal SIGABRT (Abort)
>>>
>>>>
>>>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>> ---
>>>>    hmp-commands.hx           |   18 +++++++++
>>>>    include/monitor/hmp.h     |    1
>>>>    qapi/replay.json          |   20 ++++++++++
>>>>    replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
>>>>    4 files changed, 131 insertions(+)
>>>>
>>>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>>>> index e8ce385879..4288274c4e 100644
>>>> --- a/hmp-commands.hx
>>>> +++ b/hmp-commands.hx
>>>> @@ -1851,6 +1851,24 @@ SRST
>>>>      The command is ignored when there are no replay breakpoints.
>>>>    ERST
>>>>    
>>>> +    {
>>>> +        .name       = "replay_seek",
>>>> +        .args_type  = "icount:i",
>>>> +        .params     = "icount",
>>>> +        .help       = "replay execution to the specified instruction count",
>>>> +        .cmd        = hmp_replay_seek,
>>>> +    },
>>>> +
>>>> +SRST
>>>> +``replay_seek`` *icount*
>>>> +Automatically proceed to the instruction count *icount*, when
>>>> +replaying the execution. The command automatically loads nearest
>>>> +snapshot and replays the execution to find the desired instruction.
>>>> +When there is no preceding snapshot or the execution is not replayed,
>>>> +then the command fails.
>>>> +*icount* for the reference may be observed with ``info replay`` command.
>>>> +ERST
>>>> +
>>>>        {
>>>>            .name       = "info",
>>>>            .args_type  = "item:s?",
>>>> diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
>>>> index 21849bdda5..655eb81a4c 100644
>>>> --- a/include/monitor/hmp.h
>>>> +++ b/include/monitor/hmp.h
>>>> @@ -133,5 +133,6 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict);
>>>>    void hmp_info_replay(Monitor *mon, const QDict *qdict);
>>>>    void hmp_replay_break(Monitor *mon, const QDict *qdict);
>>>>    void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
>>>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict);
>>>>    
>>>>    #endif
>>>> diff --git a/qapi/replay.json b/qapi/replay.json
>>>> index 173ba76107..bfd83d7591 100644
>>>> --- a/qapi/replay.json
>>>> +++ b/qapi/replay.json
>>>> @@ -99,3 +99,23 @@
>>>>    #
>>>>    ##
>>>>    { 'command': 'replay-delete-break' }
>>>> +
>>>> +##
>>>> +# @replay-seek:
>>>> +#
>>>> +# Automatically proceed to the instruction count @icount, when
>>>> +# replaying the execution. The command automatically loads nearest
>>>> +# snapshot and replays the execution to find the desired instruction.
>>>> +# When there is no preceding snapshot or the execution is not replayed,
>>>> +# then the command fails.
>>>> +# icount for the reference may be obtained with @query-replay command.
>>>> +#
>>>> +# @icount: target instruction count
>>>> +#
>>>> +# Since: 5.2
>>>> +#
>>>> +# Example:
>>>> +#
>>>> +# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
>>>> +##
>>>> +{ 'command': 'replay-seek', 'data': { 'icount': 'int' } }
>>>> diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
>>>> index 86e19bb217..cfd0221692 100644
>>>> --- a/replay/replay-debugging.c
>>>> +++ b/replay/replay-debugging.c
>>>> @@ -19,6 +19,8 @@
>>>>    #include "qapi/qapi-commands-replay.h"
>>>>    #include "qapi/qmp/qdict.h"
>>>>    #include "qemu/timer.h"
>>>> +#include "block/snapshot.h"
>>>> +#include "migration/snapshot.h"
>>>>    
>>>>    void hmp_info_replay(Monitor *mon, const QDict *qdict)
>>>>    {
>>>> @@ -127,3 +129,93 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict)
>>>>            return;
>>>>        }
>>>>    }
>>>> +
>>>> +static char *replay_find_nearest_snapshot(int64_t icount,
>>>> +                                          int64_t *snapshot_icount)
>>>> +{
>>>> +    BlockDriverState *bs;
>>>> +    QEMUSnapshotInfo *sn_tab;
>>>> +    QEMUSnapshotInfo *nearest = NULL;
>>>> +    char *ret = NULL;
>>>> +    int nb_sns, i;
>>>> +    AioContext *aio_context;
>>>> +
>>>> +    *snapshot_icount = -1;
>>>> +
>>>> +    bs = bdrv_all_find_vmstate_bs();
>>>> +    if (!bs) {
>>>> +        goto fail;
>>>> +    }
>>>> +    aio_context = bdrv_get_aio_context(bs);
>>>> +
>>>> +    aio_context_acquire(aio_context);
>>>> +    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
>>>> +    aio_context_release(aio_context);
>>>> +
>>>> +    for (i = 0; i < nb_sns; i++) {
>>>> +        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) {
>>>> +            if (sn_tab[i].icount != -1ULL
>>>> +                && sn_tab[i].icount <= icount
>>>> +                && (!nearest || nearest->icount < sn_tab[i].icount)) {
>>>> +                nearest = &sn_tab[i];
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +    if (nearest) {
>>>> +        ret = g_strdup(nearest->name);
>>>> +        *snapshot_icount = nearest->icount;
>>>> +    }
>>>> +    g_free(sn_tab);
>>>> +
>>>> +fail:
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp)
>>>> +{
>>>> +    char *snapshot = NULL;
>>>> +    int64_t snapshot_icount;
>>>> +
>>>> +    if (replay_mode != REPLAY_MODE_PLAY) {
>>>> +        error_setg(errp, "replay must be enabled to seek");
>>>> +        return;
>>>> +    }
>>>> +    if (!replay_snapshot) {
>>>> +        error_setg(errp, "snapshotting is disabled");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    snapshot = replay_find_nearest_snapshot(icount, &snapshot_icount);
>>>> +    if (snapshot) {
>>>> +        if (icount < replay_get_current_icount()
>>>> +            || replay_get_current_icount() < snapshot_icount) {
>>>> +            vm_stop(RUN_STATE_RESTORE_VM);
>>>> +            load_snapshot(snapshot, errp);
>>>> +        }
>>>> +        g_free(snapshot);
>>>> +    }
>>>> +    if (replay_get_current_icount() <= icount) {
>>>> +        replay_break(icount, callback, NULL);
>>>> +        vm_start();
>>>> +    } else {
>>>> +        error_setg(errp, "cannot seek to the specified instruction count");
>>>> +    }
>>>> +}
>>>> +
>>>> +void qmp_replay_seek(int64_t icount, Error **errp)
>>>> +{
>>>> +    replay_seek(icount, replay_stop_vm, errp);
>>>> +}
>>>> +
>>>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict)
>>>> +{
>>>> +    int64_t icount = qdict_get_try_int(qdict, "icount", -1LL);
>>>> +    Error *err = NULL;
>>>> +
>>>> +    qmp_replay_seek(icount, &err);
>>>> +    if (err) {
>>>> +        error_report_err(err);
>>>> +        error_free(err);
>>>> +        return;
>>>> +    }
>>>> +}
>>>
>>>
> 
>
Alex Bennée Sept. 7, 2020, 4:25 p.m. UTC | #7
Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:

> On 07.09.2020 17:59, Alex Bennée wrote:
>> 
>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>> 
>>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>>
>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>
>>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>>
>>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>>> the execution to the specified instruction count.
>>>>> The command automatically loads nearest snapshot and replays the execution
>>>>> to find the desired instruction count.
>>>>
>>>> Should there be an initial snapshot created at instruction 0? Using a
>>>> separate monitor channel:
>>>
>>> Right, you can't go to the prior state, when there is no preceding
>>> snapshot available.
>> 
>> It seems creating an initial snapshot automatically would be more user
>
> Please take a look at 'Snapshotting' section of docs/replay.txt.
> Reverse debugging is considered to be run with disk image (overlay)
> and rrsnapshot option of icount, which allows creating an initial
> VM snapshot.

Given that I'm using the block device purely for VM snapshots I think it
would be useful to document the minimal "no disk" approach - i.e. where
the disk is only used for record/replay.

However I'm still having trouble. I can record the trace with:

  ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
    -machine virt -kernel zephyr.elf -net none \
    -icount shift=6,align=off,sleep=off,rr=record,rrfile=record.out,rrsnapshot=rrstart  \
    -drive file=record.qcow2,if=none,id=rr \
    -monitor telnet:127.0.0.1:4444 -S

which shows:

  (qemu) info snapshots
  info snapshots
  List of snapshots present on all disks:
  ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
  --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0

but do I need a whole separate overlay in the replay case? I thought
supplying snapshot to the drive would prevent the replay case
overwriting what has been recorded but with:

    -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out \
    -drive file=record.qcow2,if=none,id=rr,snapshot

but I get:

  (qemu) info snapshots
  info snapshots
  There is no snapshot available.

so if I drop the ,snapshot from the line I can at least see the snapshot
but continue doesn't seem to work:

  (qemu) info snapshots
  info snapshots
  List of snapshots present on all disks:
  ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
  --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
  (qemu) replay_break 190505
  replay_break 190505
  (qemu) c
  c
  (qemu) info replay
  info replay
  Replaying execution 'record.out': instruction count = 0
  (qemu)

If I manually loadvm then we get somewhere but replay_seek breaks:

  (qemu) loadvm rrstart
  loadvm rrstart
  (qemu) info replay
  info replay
  Replaying execution 'record.out': instruction count = 190505
  (qemu) replay_seek 190000
  replay_seek 190000
  snapshotting is disabled

with a crash:

  ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out
 -drive file=record.qcow2,if=none,id=rr -monitor telnet:127.0.0.1:4444 -S
*** Booting Zephyr OS build zephyr-v2.3.0-1183-ge5628ad0faf3  ***
Hello World! qemu_cortex_a53
free(): invalid pointer
fish: “./qemu-system-aarch64 -cpu cort…” terminated by signal SIGABRT (Abort)


>
>> friendly? What can you do to trigger a snapshot, say for example on a
>> gdb connect?
>
> This makes sense when executing with temporary overlay, thanks.
>
>> 
>>>
>>>>
>>>>     (qemu) replay_break 190505
>>>>     replay_break 190505
>>>>     (qemu) c
>>>>     (qemu) info replay
>>>>     info replay
>>>>     Replaying execution 'record.out': instruction count = 190505
>>>>     (qemu) replay_seek 190000
>>>>     replay_seek 190000
>>>>     snapshotting is disabled
>>>>
>>>> And then the guest dies with a sigabort:
>>>
>>> This could be a bug, thanks.
>>>
>>>>
>>>>     ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out -drive file=record.qcow2,if=none,snapshot,id=rr -monitor telnet:127.0.0.1:4444 -S
>>>>     *** Booting Zephyr OS build zephyr-v2.3.0-1183-ge5628ad0faf3  ***
>>>>     Hello World! qemu_cortex_a53
>>>>     double free or corruption (out)
>>>>     fish: “./qemu-system-aarch64 -cpu cort…” terminated by signal SIGABRT (Abort)
>>>>
>>>>>
>>>>> Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
>>>>> Acked-by: Markus Armbruster <armbru@redhat.com>
>>>>> ---
>>>>>    hmp-commands.hx           |   18 +++++++++
>>>>>    include/monitor/hmp.h     |    1
>>>>>    qapi/replay.json          |   20 ++++++++++
>>>>>    replay/replay-debugging.c |   92 +++++++++++++++++++++++++++++++++++++++++++++
>>>>>    4 files changed, 131 insertions(+)
>>>>>
>>>>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>>>>> index e8ce385879..4288274c4e 100644
>>>>> --- a/hmp-commands.hx
>>>>> +++ b/hmp-commands.hx
>>>>> @@ -1851,6 +1851,24 @@ SRST
>>>>>      The command is ignored when there are no replay breakpoints.
>>>>>    ERST
>>>>>    
>>>>> +    {
>>>>> +        .name       = "replay_seek",
>>>>> +        .args_type  = "icount:i",
>>>>> +        .params     = "icount",
>>>>> +        .help       = "replay execution to the specified instruction count",
>>>>> +        .cmd        = hmp_replay_seek,
>>>>> +    },
>>>>> +
>>>>> +SRST
>>>>> +``replay_seek`` *icount*
>>>>> +Automatically proceed to the instruction count *icount*, when
>>>>> +replaying the execution. The command automatically loads nearest
>>>>> +snapshot and replays the execution to find the desired instruction.
>>>>> +When there is no preceding snapshot or the execution is not replayed,
>>>>> +then the command fails.
>>>>> +*icount* for the reference may be observed with ``info replay`` command.
>>>>> +ERST
>>>>> +
>>>>>        {
>>>>>            .name       = "info",
>>>>>            .args_type  = "item:s?",
>>>>> diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
>>>>> index 21849bdda5..655eb81a4c 100644
>>>>> --- a/include/monitor/hmp.h
>>>>> +++ b/include/monitor/hmp.h
>>>>> @@ -133,5 +133,6 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict);
>>>>>    void hmp_info_replay(Monitor *mon, const QDict *qdict);
>>>>>    void hmp_replay_break(Monitor *mon, const QDict *qdict);
>>>>>    void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
>>>>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict);
>>>>>    
>>>>>    #endif
>>>>> diff --git a/qapi/replay.json b/qapi/replay.json
>>>>> index 173ba76107..bfd83d7591 100644
>>>>> --- a/qapi/replay.json
>>>>> +++ b/qapi/replay.json
>>>>> @@ -99,3 +99,23 @@
>>>>>    #
>>>>>    ##
>>>>>    { 'command': 'replay-delete-break' }
>>>>> +
>>>>> +##
>>>>> +# @replay-seek:
>>>>> +#
>>>>> +# Automatically proceed to the instruction count @icount, when
>>>>> +# replaying the execution. The command automatically loads nearest
>>>>> +# snapshot and replays the execution to find the desired instruction.
>>>>> +# When there is no preceding snapshot or the execution is not replayed,
>>>>> +# then the command fails.
>>>>> +# icount for the reference may be obtained with @query-replay command.
>>>>> +#
>>>>> +# @icount: target instruction count
>>>>> +#
>>>>> +# Since: 5.2
>>>>> +#
>>>>> +# Example:
>>>>> +#
>>>>> +# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
>>>>> +##
>>>>> +{ 'command': 'replay-seek', 'data': { 'icount': 'int' } }
>>>>> diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
>>>>> index 86e19bb217..cfd0221692 100644
>>>>> --- a/replay/replay-debugging.c
>>>>> +++ b/replay/replay-debugging.c
>>>>> @@ -19,6 +19,8 @@
>>>>>    #include "qapi/qapi-commands-replay.h"
>>>>>    #include "qapi/qmp/qdict.h"
>>>>>    #include "qemu/timer.h"
>>>>> +#include "block/snapshot.h"
>>>>> +#include "migration/snapshot.h"
>>>>>    
>>>>>    void hmp_info_replay(Monitor *mon, const QDict *qdict)
>>>>>    {
>>>>> @@ -127,3 +129,93 @@ void hmp_replay_delete_break(Monitor *mon, const QDict *qdict)
>>>>>            return;
>>>>>        }
>>>>>    }
>>>>> +
>>>>> +static char *replay_find_nearest_snapshot(int64_t icount,
>>>>> +                                          int64_t *snapshot_icount)
>>>>> +{
>>>>> +    BlockDriverState *bs;
>>>>> +    QEMUSnapshotInfo *sn_tab;
>>>>> +    QEMUSnapshotInfo *nearest = NULL;
>>>>> +    char *ret = NULL;
>>>>> +    int nb_sns, i;
>>>>> +    AioContext *aio_context;
>>>>> +
>>>>> +    *snapshot_icount = -1;
>>>>> +
>>>>> +    bs = bdrv_all_find_vmstate_bs();
>>>>> +    if (!bs) {
>>>>> +        goto fail;
>>>>> +    }
>>>>> +    aio_context = bdrv_get_aio_context(bs);
>>>>> +
>>>>> +    aio_context_acquire(aio_context);
>>>>> +    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
>>>>> +    aio_context_release(aio_context);
>>>>> +
>>>>> +    for (i = 0; i < nb_sns; i++) {
>>>>> +        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) {
>>>>> +            if (sn_tab[i].icount != -1ULL
>>>>> +                && sn_tab[i].icount <= icount
>>>>> +                && (!nearest || nearest->icount < sn_tab[i].icount)) {
>>>>> +                nearest = &sn_tab[i];
>>>>> +            }
>>>>> +        }
>>>>> +    }
>>>>> +    if (nearest) {
>>>>> +        ret = g_strdup(nearest->name);
>>>>> +        *snapshot_icount = nearest->icount;
>>>>> +    }
>>>>> +    g_free(sn_tab);
>>>>> +
>>>>> +fail:
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp)
>>>>> +{
>>>>> +    char *snapshot = NULL;
>>>>> +    int64_t snapshot_icount;
>>>>> +
>>>>> +    if (replay_mode != REPLAY_MODE_PLAY) {
>>>>> +        error_setg(errp, "replay must be enabled to seek");
>>>>> +        return;
>>>>> +    }
>>>>> +    if (!replay_snapshot) {
>>>>> +        error_setg(errp, "snapshotting is disabled");
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    snapshot = replay_find_nearest_snapshot(icount, &snapshot_icount);
>>>>> +    if (snapshot) {
>>>>> +        if (icount < replay_get_current_icount()
>>>>> +            || replay_get_current_icount() < snapshot_icount) {
>>>>> +            vm_stop(RUN_STATE_RESTORE_VM);
>>>>> +            load_snapshot(snapshot, errp);
>>>>> +        }
>>>>> +        g_free(snapshot);
>>>>> +    }
>>>>> +    if (replay_get_current_icount() <= icount) {
>>>>> +        replay_break(icount, callback, NULL);
>>>>> +        vm_start();
>>>>> +    } else {
>>>>> +        error_setg(errp, "cannot seek to the specified instruction count");
>>>>> +    }
>>>>> +}
>>>>> +
>>>>> +void qmp_replay_seek(int64_t icount, Error **errp)
>>>>> +{
>>>>> +    replay_seek(icount, replay_stop_vm, errp);
>>>>> +}
>>>>> +
>>>>> +void hmp_replay_seek(Monitor *mon, const QDict *qdict)
>>>>> +{
>>>>> +    int64_t icount = qdict_get_try_int(qdict, "icount", -1LL);
>>>>> +    Error *err = NULL;
>>>>> +
>>>>> +    qmp_replay_seek(icount, &err);
>>>>> +    if (err) {
>>>>> +        error_report_err(err);
>>>>> +        error_free(err);
>>>>> +        return;
>>>>> +    }
>>>>> +}
>>>>
>>>>
>> 
>>
Pavel Dovgalyuk Sept. 8, 2020, 7:44 a.m. UTC | #8
On 07.09.2020 19:25, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
> 
>> On 07.09.2020 17:59, Alex Bennée wrote:
>>>
>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>
>>>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>>>
>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>
>>>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>>>
>>>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>>>> the execution to the specified instruction count.
>>>>>> The command automatically loads nearest snapshot and replays the execution
>>>>>> to find the desired instruction count.
>>>>>
>>>>> Should there be an initial snapshot created at instruction 0? Using a
>>>>> separate monitor channel:
>>>>
>>>> Right, you can't go to the prior state, when there is no preceding
>>>> snapshot available.
>>>
>>> It seems creating an initial snapshot automatically would be more user
>>
>> Please take a look at 'Snapshotting' section of docs/replay.txt.
>> Reverse debugging is considered to be run with disk image (overlay)
>> and rrsnapshot option of icount, which allows creating an initial
>> VM snapshot.
> 
> Given that I'm using the block device purely for VM snapshots I think it
> would be useful to document the minimal "no disk" approach - i.e. where
> the disk is only used for record/replay.
> 
> However I'm still having trouble. I can record the trace with:
> 
>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
>      -machine virt -kernel zephyr.elf -net none \
>      -icount shift=6,align=off,sleep=off,rr=record,rrfile=record.out,rrsnapshot=rrstart  \
>      -drive file=record.qcow2,if=none,id=rr \
>      -monitor telnet:127.0.0.1:4444 -S

Can you provide your zephyr.elf image?

> 
> which shows:
> 
>    (qemu) info snapshots
>    info snapshots
>    List of snapshots present on all disks:
>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
> 
> but do I need a whole separate overlay in the replay case? I thought
> supplying snapshot to the drive would prevent the replay case
> overwriting what has been recorded but with:
> 
>      -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out \
>      -drive file=record.qcow2,if=none,id=rr,snapshot

When you provide qcow2 (overlay or not) for snapshotting, you don't need 
any 'snapshot' option on drive.

> 
> but I get:
> 
>    (qemu) info snapshots
>    info snapshots
>    There is no snapshot available.
> 
> so if I drop the ,snapshot from the line I can at least see the snapshot
> but continue doesn't seem to work:
> 
>    (qemu) info snapshots
>    info snapshots
>    List of snapshots present on all disks:
>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>    (qemu) replay_break 190505
>    replay_break 190505
>    (qemu) c
>    c
>    (qemu) info replay
>    info replay
>    Replaying execution 'record.out': instruction count = 0

It seems, that replay hangs. Can you try removing '-S' in record command 
line?

>    (qemu)
> 
> If I manually loadvm then we get somewhere but replay_seek breaks:
> 
>    (qemu) loadvm rrstart
>    loadvm rrstart
>    (qemu) info replay
>    info replay
>    Replaying execution 'record.out': instruction count = 190505
>    (qemu) replay_seek 190000
>    replay_seek 190000
>    snapshotting is disabled
> 
> with a crash:
> 
>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out
>   -drive file=record.qcow2,if=none,id=rr -monitor telnet:127.0.0.1:4444 -S
> *** Booting Zephyr OS build zephyr-v2.3.0-1183-ge5628ad0faf3  ***
> Hello World! qemu_cortex_a53
> free(): invalid pointer
> fish: “./qemu-system-aarch64 -cpu cort…” terminated by signal SIGABRT (Abort)
> 
> 
>>
>>> friendly? What can you do to trigger a snapshot, say for example on a
>>> gdb connect?
>>
>> This makes sense when executing with temporary overlay, thanks.
>>
>>>
>>>>
>>>>>

Pavel Dovgalyuk
Alex Bennée Sept. 8, 2020, 9:13 a.m. UTC | #9
Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:

> On 07.09.2020 19:25, Alex Bennée wrote:
>> 
>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>> 
>>> On 07.09.2020 17:59, Alex Bennée wrote:
>>>>
>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>
>>>>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>>>>
>>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>>
>>>>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>>>>
>>>>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>>>>> the execution to the specified instruction count.
>>>>>>> The command automatically loads nearest snapshot and replays the execution
>>>>>>> to find the desired instruction count.
>>>>>>
>>>>>> Should there be an initial snapshot created at instruction 0? Using a
>>>>>> separate monitor channel:
>>>>>
>>>>> Right, you can't go to the prior state, when there is no preceding
>>>>> snapshot available.
>>>>
>>>> It seems creating an initial snapshot automatically would be more user
>>>
>>> Please take a look at 'Snapshotting' section of docs/replay.txt.
>>> Reverse debugging is considered to be run with disk image (overlay)
>>> and rrsnapshot option of icount, which allows creating an initial
>>> VM snapshot.
>> 
>> Given that I'm using the block device purely for VM snapshots I think it
>> would be useful to document the minimal "no disk" approach - i.e. where
>> the disk is only used for record/replay.
>> 
>> However I'm still having trouble. I can record the trace with:
>> 
>>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
>>      -machine virt -kernel zephyr.elf -net none \
>>      -icount shift=6,align=off,sleep=off,rr=record,rrfile=record.out,rrsnapshot=rrstart  \
>>      -drive file=record.qcow2,if=none,id=rr \
>>      -monitor telnet:127.0.0.1:4444 -S
>
> Can you provide your zephyr.elf image?
>
>> 
>> which shows:
>> 
>>    (qemu) info snapshots
>>    info snapshots
>>    List of snapshots present on all disks:
>>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>> 
>> but do I need a whole separate overlay in the replay case? I thought
>> supplying snapshot to the drive would prevent the replay case
>> overwriting what has been recorded but with:
>> 
>>      -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out \
>>      -drive file=record.qcow2,if=none,id=rr,snapshot
>
> When you provide qcow2 (overlay or not) for snapshotting, you don't need 
> any 'snapshot' option on drive.
>
>> 
>> but I get:
>> 
>>    (qemu) info snapshots
>>    info snapshots
>>    There is no snapshot available.
>> 
>> so if I drop the ,snapshot from the line I can at least see the snapshot
>> but continue doesn't seem to work:
>> 
>>    (qemu) info snapshots
>>    info snapshots
>>    List of snapshots present on all disks:
>>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>>    (qemu) replay_break 190505
>>    replay_break 190505
>>    (qemu) c
>>    c
>>    (qemu) info replay
>>    info replay
>>    Replaying execution 'record.out': instruction count = 0
>
> It seems, that replay hangs. Can you try removing '-S' in record command 
> line?

That doesn't make any difference removing from both the record and
replay cases. It seems to need a loadvm to start things off.

I've sent you an image off list. Please let me know if you can replicate.
Pavel Dovgalyuk Sept. 8, 2020, 10:54 a.m. UTC | #10
On 07.09.2020 19:25, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
> 
>> On 07.09.2020 17:59, Alex Bennée wrote:
>>>
>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>
>>>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>>>
>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>
>>>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>>>
>>>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>>>> the execution to the specified instruction count.
>>>>>> The command automatically loads nearest snapshot and replays the execution
>>>>>> to find the desired instruction count.
>>>>>
>>>>> Should there be an initial snapshot created at instruction 0? Using a
>>>>> separate monitor channel:
>>>>
>>>> Right, you can't go to the prior state, when there is no preceding
>>>> snapshot available.
>>>
>>> It seems creating an initial snapshot automatically would be more user
>>
>> Please take a look at 'Snapshotting' section of docs/replay.txt.
>> Reverse debugging is considered to be run with disk image (overlay)
>> and rrsnapshot option of icount, which allows creating an initial
>> VM snapshot.
> 
> Given that I'm using the block device purely for VM snapshots I think it
> would be useful to document the minimal "no disk" approach - i.e. where
> the disk is only used for record/replay.
> 
> However I'm still having trouble. I can record the trace with:
> 
>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
>      -machine virt -kernel zephyr.elf -net none \
>      -icount shift=6,align=off,sleep=off,rr=record,rrfile=record.out,rrsnapshot=rrstart  \
>      -drive file=record.qcow2,if=none,id=rr \
>      -monitor telnet:127.0.0.1:4444 -S
> 
> which shows:
> 
>    (qemu) info snapshots
>    info snapshots
>    List of snapshots present on all disks:
>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
> 
> but do I need a whole separate overlay in the replay case? I thought
> supplying snapshot to the drive would prevent the replay case
> overwriting what has been recorded but with:
> 
>      -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out \
>      -drive file=record.qcow2,if=none,id=rr,snapshot
> 
> but I get:
> 
>    (qemu) info snapshots
>    info snapshots
>    There is no snapshot available.
> 
> so if I drop the ,snapshot from the line I can at least see the snapshot
> but continue doesn't seem to work:
> 
>    (qemu) info snapshots
>    info snapshots
>    List of snapshots present on all disks:
>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>    (qemu) replay_break 190505
>    replay_break 190505
>    (qemu) c
>    c
>    (qemu) info replay
>    info replay
>    Replaying execution 'record.out': instruction count = 0
>    (qemu)
> 
> If I manually loadvm then we get somewhere but replay_seek breaks:
> 
>    (qemu) loadvm rrstart
>    loadvm rrstart
>    (qemu) info replay
>    info replay
>    Replaying execution 'record.out': instruction count = 190505
>    (qemu) replay_seek 190000
>    replay_seek 190000
>    snapshotting is disabled
> 
> with a crash:
> 
>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio -machine virt -kernel zephyr.elf -net none -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out
>   -drive file=record.qcow2,if=none,id=rr -monitor telnet:127.0.0.1:4444 -S

I missed that you forgot rrsnapshot in replay command line.
The execution was recorded with initial snapshot. Therefore it should be 
replayed with it too.


Pavel Dovgalyuk
Pavel Dovgalyuk Sept. 8, 2020, 10:57 a.m. UTC | #11
On 08.09.2020 12:13, Alex Bennée wrote:
> 
> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
> 
>> On 07.09.2020 19:25, Alex Bennée wrote:
>>>
>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>
>>>> On 07.09.2020 17:59, Alex Bennée wrote:
>>>>>
>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>
>>>>>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>>>>>
>>>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>>>
>>>>>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>>>>>
>>>>>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>>>>>> the execution to the specified instruction count.
>>>>>>>> The command automatically loads nearest snapshot and replays the execution
>>>>>>>> to find the desired instruction count.
>>>>>>>
>>>>>>> Should there be an initial snapshot created at instruction 0? Using a
>>>>>>> separate monitor channel:
>>>>>>
>>>>>> Right, you can't go to the prior state, when there is no preceding
>>>>>> snapshot available.
>>>>>
>>>>> It seems creating an initial snapshot automatically would be more user
>>>>
>>>> Please take a look at 'Snapshotting' section of docs/replay.txt.
>>>> Reverse debugging is considered to be run with disk image (overlay)
>>>> and rrsnapshot option of icount, which allows creating an initial
>>>> VM snapshot.
>>>
>>> Given that I'm using the block device purely for VM snapshots I think it
>>> would be useful to document the minimal "no disk" approach - i.e. where
>>> the disk is only used for record/replay.
>>>
>>> However I'm still having trouble. I can record the trace with:
>>>
>>>     ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
>>>       -machine virt -kernel zephyr.elf -net none \
>>>       -icount shift=6,align=off,sleep=off,rr=record,rrfile=record.out,rrsnapshot=rrstart  \
>>>       -drive file=record.qcow2,if=none,id=rr \
>>>       -monitor telnet:127.0.0.1:4444 -S
>>
>> Can you provide your zephyr.elf image?
>>
>>>
>>> which shows:
>>>
>>>     (qemu) info snapshots
>>>     info snapshots
>>>     List of snapshots present on all disks:
>>>     ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>>     --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>>>
>>> but do I need a whole separate overlay in the replay case? I thought
>>> supplying snapshot to the drive would prevent the replay case
>>> overwriting what has been recorded but with:
>>>
>>>       -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out \
>>>       -drive file=record.qcow2,if=none,id=rr,snapshot
>>
>> When you provide qcow2 (overlay or not) for snapshotting, you don't need
>> any 'snapshot' option on drive.
>>
>>>
>>> but I get:
>>>
>>>     (qemu) info snapshots
>>>     info snapshots
>>>     There is no snapshot available.
>>>
>>> so if I drop the ,snapshot from the line I can at least see the snapshot
>>> but continue doesn't seem to work:
>>>
>>>     (qemu) info snapshots
>>>     info snapshots
>>>     List of snapshots present on all disks:
>>>     ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>>     --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>>>     (qemu) replay_break 190505
>>>     replay_break 190505
>>>     (qemu) c
>>>     c
>>>     (qemu) info replay
>>>     info replay
>>>     Replaying execution 'record.out': instruction count = 0
>>
>> It seems, that replay hangs. Can you try removing '-S' in record command
>> line?
> 
> That doesn't make any difference removing from both the record and
> replay cases. It seems to need a loadvm to start things off.
> 
> I've sent you an image off list. Please let me know if you can replicate.
> 

With rrsnapshot in replay reverse debugging of your image seem to be ok:

(gdb) set arch aarch64
The target architecture is assumed to be aarch64
(gdb) tar rem :1234
Remote debugging using :1234
warning: No executable has been specified and target does not support
determining executable automatically.  Try using the "file" command.
0x00000000400003f8 in ?? ()
(gdb) monitor info replay
Replaying execution 'record.out': instruction count = 0
(gdb) monitor replay_break 100000
(gdb) c
Continuing.

Program received signal SIGINT, Interrupt.
0x0000000040001690 in ?? ()
(gdb) monitor info replay
Replaying execution 'record.out': instruction count = 100000
(gdb) rsi
0x0000000040001670 in ?? ()
(gdb) monitor info replay
Replaying execution 'record.out': instruction count = 99999
(gdb)


Pavel Dovgalyuk
Alex Bennée Sept. 8, 2020, 11:10 a.m. UTC | #12
Alex Bennée <alex.bennee@linaro.org> writes:

> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>
>> On 07.09.2020 19:25, Alex Bennée wrote:
>>> 
>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>> 
>>>> On 07.09.2020 17:59, Alex Bennée wrote:
>>>>>
>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>
>>>>>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>>>>>
>>>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>>>
>>>>>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>>>>>
>>>>>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>>>>>> the execution to the specified instruction count.
>>>>>>>> The command automatically loads nearest snapshot and replays the execution
>>>>>>>> to find the desired instruction count.
>>>>>>>
>>>>>>> Should there be an initial snapshot created at instruction 0? Using a
>>>>>>> separate monitor channel:
>>>>>>
>>>>>> Right, you can't go to the prior state, when there is no preceding
>>>>>> snapshot available.
>>>>>
>>>>> It seems creating an initial snapshot automatically would be more user
>>>>
>>>> Please take a look at 'Snapshotting' section of docs/replay.txt.
>>>> Reverse debugging is considered to be run with disk image (overlay)
>>>> and rrsnapshot option of icount, which allows creating an initial
>>>> VM snapshot.
>>> 
>>> Given that I'm using the block device purely for VM snapshots I think it
>>> would be useful to document the minimal "no disk" approach - i.e. where
>>> the disk is only used for record/replay.
>>> 
>>> However I'm still having trouble. I can record the trace with:
>>> 
>>>    ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
>>>      -machine virt -kernel zephyr.elf -net none \
>>>      -icount shift=6,align=off,sleep=off,rr=record,rrfile=record.out,rrsnapshot=rrstart  \
>>>      -drive file=record.qcow2,if=none,id=rr \
>>>      -monitor telnet:127.0.0.1:4444 -S
>>
>> Can you provide your zephyr.elf image?
>>
>>> 
>>> which shows:
>>> 
>>>    (qemu) info snapshots
>>>    info snapshots
>>>    List of snapshots present on all disks:
>>>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>>> 
>>> but do I need a whole separate overlay in the replay case? I thought
>>> supplying snapshot to the drive would prevent the replay case
>>> overwriting what has been recorded but with:
>>> 
>>>      -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out \
>>>      -drive file=record.qcow2,if=none,id=rr,snapshot
>>
>> When you provide qcow2 (overlay or not) for snapshotting, you don't need 
>> any 'snapshot' option on drive.
>>
>>> 
>>> but I get:
>>> 
>>>    (qemu) info snapshots
>>>    info snapshots
>>>    There is no snapshot available.
>>> 
>>> so if I drop the ,snapshot from the line I can at least see the snapshot
>>> but continue doesn't seem to work:
>>> 
>>>    (qemu) info snapshots
>>>    info snapshots
>>>    List of snapshots present on all disks:
>>>    ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>>    --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>>>    (qemu) replay_break 190505
>>>    replay_break 190505
>>>    (qemu) c
>>>    c
>>>    (qemu) info replay
>>>    info replay
>>>    Replaying execution 'record.out': instruction count = 0
>>
>> It seems, that replay hangs. Can you try removing '-S' in record command 
>> line?
>
> That doesn't make any difference removing from both the record and
> replay cases. It seems to need a loadvm to start things off.
>
> I've sent you an image off list. Please let me know if you can
> replicate.

OK I can successfully use gdb to reverse debug the acceptance test (\o/)
so I suspect there are differences in the calling setup.

The first one is ensuring that rrsnapshot is set for both record and
replay. For this reason I think a more user friendly automatic snapshot
would be worth setting up when record/replay is being used.

-icount sleep=off definitely breaks things. Do we keep track of the
 icount bias as save and restore state?
Pavel Dovgalyuk Sept. 8, 2020, 12:15 p.m. UTC | #13
On 08.09.2020 14:10, Alex Bennée wrote:
> 
> Alex Bennée <alex.bennee@linaro.org> writes:
> 
>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>
>>> On 07.09.2020 19:25, Alex Bennée wrote:
>>>>
>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>
>>>>> On 07.09.2020 17:59, Alex Bennée wrote:
>>>>>>
>>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>>
>>>>>>> On 07.09.2020 15:58, Alex Bennée wrote:
>>>>>>>>
>>>>>>>> Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru> writes:
>>>>>>>>
>>>>>>>>> From: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru>
>>>>>>>>>
>>>>>>>>> This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
>>>>>>>>> the execution to the specified instruction count.
>>>>>>>>> The command automatically loads nearest snapshot and replays the execution
>>>>>>>>> to find the desired instruction count.
>>>>>>>>
>>>>>>>> Should there be an initial snapshot created at instruction 0? Using a
>>>>>>>> separate monitor channel:
>>>>>>>
>>>>>>> Right, you can't go to the prior state, when there is no preceding
>>>>>>> snapshot available.
>>>>>>
>>>>>> It seems creating an initial snapshot automatically would be more user
>>>>>
>>>>> Please take a look at 'Snapshotting' section of docs/replay.txt.
>>>>> Reverse debugging is considered to be run with disk image (overlay)
>>>>> and rrsnapshot option of icount, which allows creating an initial
>>>>> VM snapshot.
>>>>
>>>> Given that I'm using the block device purely for VM snapshots I think it
>>>> would be useful to document the minimal "no disk" approach - i.e. where
>>>> the disk is only used for record/replay.
>>>>
>>>> However I'm still having trouble. I can record the trace with:
>>>>
>>>>     ./qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
>>>>       -machine virt -kernel zephyr.elf -net none \
>>>>       -icount shift=6,align=off,sleep=off,rr=record,rrfile=record.out,rrsnapshot=rrstart  \
>>>>       -drive file=record.qcow2,if=none,id=rr \
>>>>       -monitor telnet:127.0.0.1:4444 -S
>>>
>>> Can you provide your zephyr.elf image?
>>>
>>>>
>>>> which shows:
>>>>
>>>>     (qemu) info snapshots
>>>>     info snapshots
>>>>     List of snapshots present on all disks:
>>>>     ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>>>     --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>>>>
>>>> but do I need a whole separate overlay in the replay case? I thought
>>>> supplying snapshot to the drive would prevent the replay case
>>>> overwriting what has been recorded but with:
>>>>
>>>>       -icount shift=6,align=off,sleep=off,rr=replay,rrfile=record.out \
>>>>       -drive file=record.qcow2,if=none,id=rr,snapshot
>>>
>>> When you provide qcow2 (overlay or not) for snapshotting, you don't need
>>> any 'snapshot' option on drive.
>>>
>>>>
>>>> but I get:
>>>>
>>>>     (qemu) info snapshots
>>>>     info snapshots
>>>>     There is no snapshot available.
>>>>
>>>> so if I drop the ,snapshot from the line I can at least see the snapshot
>>>> but continue doesn't seem to work:
>>>>
>>>>     (qemu) info snapshots
>>>>     info snapshots
>>>>     List of snapshots present on all disks:
>>>>     ID        TAG               VM SIZE                DATE     VM CLOCK     ICOUNT
>>>>     --        rrstart           653 KiB 2020-09-07 17:12:42 00:00:00.000          0
>>>>     (qemu) replay_break 190505
>>>>     replay_break 190505
>>>>     (qemu) c
>>>>     c
>>>>     (qemu) info replay
>>>>     info replay
>>>>     Replaying execution 'record.out': instruction count = 0
>>>
>>> It seems, that replay hangs. Can you try removing '-S' in record command
>>> line?
>>
>> That doesn't make any difference removing from both the record and
>> replay cases. It seems to need a loadvm to start things off.
>>
>> I've sent you an image off list. Please let me know if you can
>> replicate.
> 
> OK I can successfully use gdb to reverse debug the acceptance test (\o/)
> so I suspect there are differences in the calling setup.
> 
> The first one is ensuring that rrsnapshot is set for both record and
> replay. For this reason I think a more user friendly automatic snapshot
> would be worth setting up when record/replay is being used.
> 
> -icount sleep=off definitely breaks things. Do we keep track of the

It was ok for me:
qemu-system-aarch64 -cpu cortex-a53 -display none -serial stdio \
  -machine virt -kernel zephyr-64.elf -net none \
  -icount 
shift=6,align=off,sleep=off,rr=replay,rrfile=record.out,rrsnapshot=rrstart 
  \
  -drive file=record.qcow2,if=none,id=rr -s -S

>   icount bias as save and restore state?
> 

I don't know anything about sleep, but qemu_icount_bias is saved in 
vmstate when icount is enabled.

However, I noticed a strange condition at cpus.c:855
Shouldn't we check !sleep here instead of !icount_sleep?

     } else if (!icount_sleep) {
         error_setg(errp, "shift=auto and sleep=off are incompatible");
         return;
     }

     icount_sleep = sleep;
diff mbox series

Patch

diff --git a/hmp-commands.hx b/hmp-commands.hx
index e8ce385879..4288274c4e 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1851,6 +1851,24 @@  SRST
   The command is ignored when there are no replay breakpoints.
 ERST
 
+    {
+        .name       = "replay_seek",
+        .args_type  = "icount:i",
+        .params     = "icount",
+        .help       = "replay execution to the specified instruction count",
+        .cmd        = hmp_replay_seek,
+    },
+
+SRST
+``replay_seek`` *icount*
+Automatically proceed to the instruction count *icount*, when
+replaying the execution. The command automatically loads nearest
+snapshot and replays the execution to find the desired instruction.
+When there is no preceding snapshot or the execution is not replayed,
+then the command fails.
+*icount* for the reference may be observed with ``info replay`` command.
+ERST
+
     {
         .name       = "info",
         .args_type  = "item:s?",
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index 21849bdda5..655eb81a4c 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -133,5 +133,6 @@  void hmp_info_sev(Monitor *mon, const QDict *qdict);
 void hmp_info_replay(Monitor *mon, const QDict *qdict);
 void hmp_replay_break(Monitor *mon, const QDict *qdict);
 void hmp_replay_delete_break(Monitor *mon, const QDict *qdict);
+void hmp_replay_seek(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/qapi/replay.json b/qapi/replay.json
index 173ba76107..bfd83d7591 100644
--- a/qapi/replay.json
+++ b/qapi/replay.json
@@ -99,3 +99,23 @@ 
 #
 ##
 { 'command': 'replay-delete-break' }
+
+##
+# @replay-seek:
+#
+# Automatically proceed to the instruction count @icount, when
+# replaying the execution. The command automatically loads nearest
+# snapshot and replays the execution to find the desired instruction.
+# When there is no preceding snapshot or the execution is not replayed,
+# then the command fails.
+# icount for the reference may be obtained with @query-replay command.
+#
+# @icount: target instruction count
+#
+# Since: 5.2
+#
+# Example:
+#
+# -> { "execute": "replay-seek", "data": { "icount": 220414 } }
+##
+{ 'command': 'replay-seek', 'data': { 'icount': 'int' } }
diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
index 86e19bb217..cfd0221692 100644
--- a/replay/replay-debugging.c
+++ b/replay/replay-debugging.c
@@ -19,6 +19,8 @@ 
 #include "qapi/qapi-commands-replay.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/timer.h"
+#include "block/snapshot.h"
+#include "migration/snapshot.h"
 
 void hmp_info_replay(Monitor *mon, const QDict *qdict)
 {
@@ -127,3 +129,93 @@  void hmp_replay_delete_break(Monitor *mon, const QDict *qdict)
         return;
     }
 }
+
+static char *replay_find_nearest_snapshot(int64_t icount,
+                                          int64_t *snapshot_icount)
+{
+    BlockDriverState *bs;
+    QEMUSnapshotInfo *sn_tab;
+    QEMUSnapshotInfo *nearest = NULL;
+    char *ret = NULL;
+    int nb_sns, i;
+    AioContext *aio_context;
+
+    *snapshot_icount = -1;
+
+    bs = bdrv_all_find_vmstate_bs();
+    if (!bs) {
+        goto fail;
+    }
+    aio_context = bdrv_get_aio_context(bs);
+
+    aio_context_acquire(aio_context);
+    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+    aio_context_release(aio_context);
+
+    for (i = 0; i < nb_sns; i++) {
+        if (bdrv_all_find_snapshot(sn_tab[i].name, &bs) == 0) {
+            if (sn_tab[i].icount != -1ULL
+                && sn_tab[i].icount <= icount
+                && (!nearest || nearest->icount < sn_tab[i].icount)) {
+                nearest = &sn_tab[i];
+            }
+        }
+    }
+    if (nearest) {
+        ret = g_strdup(nearest->name);
+        *snapshot_icount = nearest->icount;
+    }
+    g_free(sn_tab);
+
+fail:
+    return ret;
+}
+
+static void replay_seek(int64_t icount, QEMUTimerCB callback, Error **errp)
+{
+    char *snapshot = NULL;
+    int64_t snapshot_icount;
+
+    if (replay_mode != REPLAY_MODE_PLAY) {
+        error_setg(errp, "replay must be enabled to seek");
+        return;
+    }
+    if (!replay_snapshot) {
+        error_setg(errp, "snapshotting is disabled");
+        return;
+    }
+
+    snapshot = replay_find_nearest_snapshot(icount, &snapshot_icount);
+    if (snapshot) {
+        if (icount < replay_get_current_icount()
+            || replay_get_current_icount() < snapshot_icount) {
+            vm_stop(RUN_STATE_RESTORE_VM);
+            load_snapshot(snapshot, errp);
+        }
+        g_free(snapshot);
+    }
+    if (replay_get_current_icount() <= icount) {
+        replay_break(icount, callback, NULL);
+        vm_start();
+    } else {
+        error_setg(errp, "cannot seek to the specified instruction count");
+    }
+}
+
+void qmp_replay_seek(int64_t icount, Error **errp)
+{
+    replay_seek(icount, replay_stop_vm, errp);
+}
+
+void hmp_replay_seek(Monitor *mon, const QDict *qdict)
+{
+    int64_t icount = qdict_get_try_int(qdict, "icount", -1LL);
+    Error *err = NULL;
+
+    qmp_replay_seek(icount, &err);
+    if (err) {
+        error_report_err(err);
+        error_free(err);
+        return;
+    }
+}