diff mbox series

[v2] contrib/plugins: Add a plugin to generate basic block vectors

Message ID 20240815-bb-v2-1-6222ee98297b@daynix.com
State New
Headers show
Series [v2] contrib/plugins: Add a plugin to generate basic block vectors | expand

Commit Message

Akihiko Odaki Aug. 15, 2024, 3:04 a.m. UTC
SimPoint is a widely used tool to find the ideal microarchitecture
simulation points so Valgrind[2] and Pin[3] support generating basic
block vectors for use with them. Let's add a corresponding plugin to
QEMU too.

Note that this plugin has a different goal with tests/plugin/bb.c.

This plugin creates a vector for each constant interval instead of
counting the execution of basic blocks for the entire run and able to
describe the change of execution behavior. Its output is also
syntactically simple and better suited for parsing, while the output of
tests/plugin/bb.c is more human-readable.

[1] https://cseweb.ucsd.edu/~calder/simpoint/
[2] https://valgrind.org/docs/manual/bbv-manual.html
[3] https://www.intel.com/content/www/us/en/developer/articles/tool/pin-a-dynamic-binary-instrumentation-tool.html

Signed-off-by: Yotaro Nada <yotaro.nada@gmail.com>
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
Changes in v2:
- Merged files variable into the global scoreboard.
- Added a lock for bbs.
- Added a summary to contrib/plugins/bbv.c.
- Rebased.
- Link to v1: https://lore.kernel.org/r/20240813-bb-v1-1-effbb77daebf@daynix.com
---
 docs/about/emulation.rst |  30 +++++++++
 contrib/plugins/bbv.c    | 158 +++++++++++++++++++++++++++++++++++++++++++++++
 contrib/plugins/Makefile |   1 +
 3 files changed, 189 insertions(+)


---
base-commit: 31669121a01a14732f57c49400bc239cf9fd505f
change-id: 20240618-bb-93387ddf765b

Best regards,

Comments

Pierrick Bouvier Aug. 15, 2024, 5:48 a.m. UTC | #1
On 8/14/24 20:04, Akihiko Odaki wrote:
> SimPoint is a widely used tool to find the ideal microarchitecture
> simulation points so Valgrind[2] and Pin[3] support generating basic
> block vectors for use with them. Let's add a corresponding plugin to
> QEMU too.
> 
> Note that this plugin has a different goal with tests/plugin/bb.c.
> 
> This plugin creates a vector for each constant interval instead of
> counting the execution of basic blocks for the entire run and able to
> describe the change of execution behavior. Its output is also
> syntactically simple and better suited for parsing, while the output of
> tests/plugin/bb.c is more human-readable.
> 
> [1] https://cseweb.ucsd.edu/~calder/simpoint/
> [2] https://valgrind.org/docs/manual/bbv-manual.html
> [3] https://www.intel.com/content/www/us/en/developer/articles/tool/pin-a-dynamic-binary-instrumentation-tool.html
> 
> Signed-off-by: Yotaro Nada <yotaro.nada@gmail.com>
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
> Changes in v2:
> - Merged files variable into the global scoreboard.
> - Added a lock for bbs.
> - Added a summary to contrib/plugins/bbv.c.
> - Rebased.
> - Link to v1: https://lore.kernel.org/r/20240813-bb-v1-1-effbb77daebf@daynix.com
> ---
>   docs/about/emulation.rst |  30 +++++++++
>   contrib/plugins/bbv.c    | 158 +++++++++++++++++++++++++++++++++++++++++++++++
>   contrib/plugins/Makefile |   1 +
>   3 files changed, 189 insertions(+)
> 
> diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst
> index c03033e4e956..72d7846ab6f8 100644
> --- a/docs/about/emulation.rst
> +++ b/docs/about/emulation.rst
> @@ -381,6 +381,36 @@ run::
>     160          1      0
>     135          1      0
>   
> +Basic Block Vectors
> +...................
> +
> +``contrib/plugins/bbv.c``
> +
> +The bbv plugin allows you to generate basic block vectors for use with the
> +`SimPoint <https://cseweb.ucsd.edu/~calder/simpoint/>`__ analysis tool.
> +
> +.. list-table:: Basic block vectors arguments
> +  :widths: 20 80
> +  :header-rows: 1
> +
> +  * - Option
> +    - Description
> +  * - interval=N
> +    - The interval to generate a basic block vector specified by the number of
> +      instructions (Default: N = 100000000)
> +  * - outfile=PATH
> +    - The path to output files.
> +      It will be suffixed with ``.N.bb`` where ``N`` is a vCPU index.
> +
> +Example::
> +
> +  $ qemu-aarch64 \
> +    -plugin contrib/plugins/libbbv.so,interval=100,outfile=sha1 \
> +    tests/tcg/aarch64-linux-user/sha1
> +  SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6
> +  $ du sha1.0.bb
> +  23128   sha1.0.bb
> +
>   Hot Blocks
>   ..........
>   
> diff --git a/contrib/plugins/bbv.c b/contrib/plugins/bbv.c
> new file mode 100644
> index 000000000000..41139f423fe2
> --- /dev/null
> +++ b/contrib/plugins/bbv.c
> @@ -0,0 +1,158 @@
> +/*
> + * Generate basic block vectors for use with the SimPoint analysis tool.
> + * SimPoint: https://cseweb.ucsd.edu/~calder/simpoint/
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <stdio.h>
> +#include <glib.h>
> +
> +#include <qemu-plugin.h>
> +
> +typedef struct Bb {
> +    struct qemu_plugin_scoreboard *count;
> +    unsigned int index;
> +} Bb;
> +
> +typedef struct Vcpu {
> +    uint64_t count;
> +    FILE *file;
> +} Vcpu;
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> +static GHashTable *bbs;
> +static GRWLock bbs_lock;
> +static char *filename;
> +static struct qemu_plugin_scoreboard *vcpus;
> +static uint64_t interval = 100000000;
> +
> +static void plugin_exit(qemu_plugin_id_t id, void *p)
> +{
> +    for (int i = 0; i < qemu_plugin_num_vcpus(); i++) {
> +        fclose(((Vcpu *)qemu_plugin_scoreboard_find(vcpus, i))->file);
> +    }
> +
> +    g_hash_table_unref(bbs);
> +    g_free(filename);
> +    qemu_plugin_scoreboard_free(vcpus);
> +}
> +
> +static void free_bb(void *data)
> +{
> +    qemu_plugin_scoreboard_free(((Bb *)data)->count);
> +    g_free(data);
> +}
> +
> +static qemu_plugin_u64 count_u64(void)
> +{
> +    return qemu_plugin_scoreboard_u64_in_struct(vcpus, Vcpu, count);
> +}
> +
> +static qemu_plugin_u64 bb_count_u64(Bb *bb)
> +{
> +    return qemu_plugin_scoreboard_u64(bb->count);
> +}
> +
> +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
> +{
> +    g_autofree gchar *vcpu_filename = NULL;
> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
> +
> +    vcpu_filename = g_strdup_printf("%s.%u.bb", filename, vcpu_index);
> +    vcpu->file = fopen(vcpu_filename, "w");
> +}
> +
> +static void vcpu_interval_exec(unsigned int vcpu_index, void *udata)
> +{
> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
> +    GHashTableIter iter;
> +    void *value;
> +
> +    if (!vcpu->file) {
> +        return;
> +    }
> +
> +    vcpu->count -= interval;
> +
> +    fputc('T', vcpu->file);
> +
> +    g_rw_lock_reader_lock(&bbs_lock);
> +    g_hash_table_iter_init(&iter, bbs);
> +
> +    while (g_hash_table_iter_next(&iter, NULL, &value)) {
> +        Bb *bb = value;
> +        uint64_t bb_count = qemu_plugin_u64_get(bb_count_u64(bb), vcpu_index);
> +
> +        if (!bb_count) {
> +            continue;
> +        }
> +
> +        fprintf(vcpu->file, ":%u:%" PRIu64 " ", bb->index, bb_count);
> +        qemu_plugin_u64_set(bb_count_u64(bb), vcpu_index, 0);
> +    }
> +
> +    g_rw_lock_reader_unlock(&bbs_lock);
> +    fputc('\n', vcpu->file);
> +}
> +
> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> +{
> +    uint64_t n_insns = qemu_plugin_tb_n_insns(tb);
> +    uint64_t vaddr = qemu_plugin_tb_vaddr(tb);
> +    Bb *bb = g_hash_table_lookup(bbs, &vaddr);

Missing a read_lock for this access.

> +
> +    if (!bb) {
> +        uint64_t *key = g_new(uint64_t, 1);
> +
> +        *key = vaddr;
> +        bb = g_new(Bb, 1);
> +        bb->count = qemu_plugin_scoreboard_new(sizeof(uint64_t));
> +        bb->index = g_hash_table_size(bbs);
> +        g_rw_lock_writer_lock(&bbs_lock);
> +        g_hash_table_insert(bbs, key, bb);
> +        g_rw_lock_writer_unlock(&bbs_lock);
> +    }
> +
> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, count_u64(), n_insns);
> +
> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, bb_count_u64(bb), n_insns);
> +
> +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
> +        tb, vcpu_interval_exec, QEMU_PLUGIN_CB_NO_REGS,
> +        QEMU_PLUGIN_COND_GE, count_u64(), interval, NULL);
> +}
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
> +                                           const qemu_info_t *info,
> +                                           int argc, char **argv)
> +{
> +    for (int i = 0; i < argc; i++) {
> +        char *opt = argv[i];
> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
> +        if (g_strcmp0(tokens[0], "interval") == 0) {
> +            interval = g_ascii_strtoull(tokens[1], NULL, 10);
> +        } else if (g_strcmp0(tokens[0], "outfile") == 0) {
> +            filename = tokens[1];
> +            tokens[1] = NULL;
> +        } else {
> +            fprintf(stderr, "option parsing failed: %s\n", opt);
> +            return -1;
> +        }
> +    }
> +
> +    if (!filename) {
> +        fputs("outfile unspecified\n", stderr);
> +        return -1;
> +    }
> +
> +    bbs = g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, free_bb);
> +    vcpus = qemu_plugin_scoreboard_new(sizeof(Vcpu));
> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> +
> +    return 0;
> +}
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> index edf256cd9d11..6936591b1022 100644
> --- a/contrib/plugins/Makefile
> +++ b/contrib/plugins/Makefile
> @@ -13,6 +13,7 @@ TOP_SRC_PATH = $(SRC_PATH)/../..
>   VPATH += $(SRC_PATH)
>   
>   NAMES :=
> +NAMES += bbv
>   NAMES += execlog
>   NAMES += hotblocks
>   NAMES += hotpages
> 
> ---
> base-commit: 31669121a01a14732f57c49400bc239cf9fd505f
> change-id: 20240618-bb-93387ddf765b
> 
> Best regards,

Otherwise,
Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Pierrick Bouvier Aug. 15, 2024, 5:50 a.m. UTC | #2
On 8/14/24 22:48, Pierrick Bouvier wrote:
> On 8/14/24 20:04, Akihiko Odaki wrote:
>> SimPoint is a widely used tool to find the ideal microarchitecture
>> simulation points so Valgrind[2] and Pin[3] support generating basic
>> block vectors for use with them. Let's add a corresponding plugin to
>> QEMU too.
>>
>> Note that this plugin has a different goal with tests/plugin/bb.c.
>>
>> This plugin creates a vector for each constant interval instead of
>> counting the execution of basic blocks for the entire run and able to
>> describe the change of execution behavior. Its output is also
>> syntactically simple and better suited for parsing, while the output of
>> tests/plugin/bb.c is more human-readable.
>>
>> [1] https://cseweb.ucsd.edu/~calder/simpoint/
>> [2] https://valgrind.org/docs/manual/bbv-manual.html
>> [3] https://www.intel.com/content/www/us/en/developer/articles/tool/pin-a-dynamic-binary-instrumentation-tool.html
>>
>> Signed-off-by: Yotaro Nada <yotaro.nada@gmail.com>
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>> ---
>> Changes in v2:
>> - Merged files variable into the global scoreboard.
>> - Added a lock for bbs.
>> - Added a summary to contrib/plugins/bbv.c.
>> - Rebased.
>> - Link to v1: https://lore.kernel.org/r/20240813-bb-v1-1-effbb77daebf@daynix.com
>> ---
>>    docs/about/emulation.rst |  30 +++++++++
>>    contrib/plugins/bbv.c    | 158 +++++++++++++++++++++++++++++++++++++++++++++++
>>    contrib/plugins/Makefile |   1 +
>>    3 files changed, 189 insertions(+)
>>
>> diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst
>> index c03033e4e956..72d7846ab6f8 100644
>> --- a/docs/about/emulation.rst
>> +++ b/docs/about/emulation.rst
>> @@ -381,6 +381,36 @@ run::
>>      160          1      0
>>      135          1      0
>>    
>> +Basic Block Vectors
>> +...................
>> +
>> +``contrib/plugins/bbv.c``
>> +
>> +The bbv plugin allows you to generate basic block vectors for use with the
>> +`SimPoint <https://cseweb.ucsd.edu/~calder/simpoint/>`__ analysis tool.
>> +
>> +.. list-table:: Basic block vectors arguments
>> +  :widths: 20 80
>> +  :header-rows: 1
>> +
>> +  * - Option
>> +    - Description
>> +  * - interval=N
>> +    - The interval to generate a basic block vector specified by the number of
>> +      instructions (Default: N = 100000000)
>> +  * - outfile=PATH
>> +    - The path to output files.
>> +      It will be suffixed with ``.N.bb`` where ``N`` is a vCPU index.
>> +
>> +Example::
>> +
>> +  $ qemu-aarch64 \
>> +    -plugin contrib/plugins/libbbv.so,interval=100,outfile=sha1 \
>> +    tests/tcg/aarch64-linux-user/sha1
>> +  SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6
>> +  $ du sha1.0.bb
>> +  23128   sha1.0.bb
>> +
>>    Hot Blocks
>>    ..........
>>    
>> diff --git a/contrib/plugins/bbv.c b/contrib/plugins/bbv.c
>> new file mode 100644
>> index 000000000000..41139f423fe2
>> --- /dev/null
>> +++ b/contrib/plugins/bbv.c
>> @@ -0,0 +1,158 @@
>> +/*
>> + * Generate basic block vectors for use with the SimPoint analysis tool.
>> + * SimPoint: https://cseweb.ucsd.edu/~calder/simpoint/
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include <stdio.h>
>> +#include <glib.h>
>> +
>> +#include <qemu-plugin.h>
>> +
>> +typedef struct Bb {
>> +    struct qemu_plugin_scoreboard *count;
>> +    unsigned int index;
>> +} Bb;
>> +
>> +typedef struct Vcpu {
>> +    uint64_t count;
>> +    FILE *file;
>> +} Vcpu;
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>> +static GHashTable *bbs;
>> +static GRWLock bbs_lock;
>> +static char *filename;
>> +static struct qemu_plugin_scoreboard *vcpus;
>> +static uint64_t interval = 100000000;
>> +
>> +static void plugin_exit(qemu_plugin_id_t id, void *p)
>> +{
>> +    for (int i = 0; i < qemu_plugin_num_vcpus(); i++) {
>> +        fclose(((Vcpu *)qemu_plugin_scoreboard_find(vcpus, i))->file);
>> +    }
>> +
>> +    g_hash_table_unref(bbs);
>> +    g_free(filename);
>> +    qemu_plugin_scoreboard_free(vcpus);
>> +}
>> +
>> +static void free_bb(void *data)
>> +{
>> +    qemu_plugin_scoreboard_free(((Bb *)data)->count);
>> +    g_free(data);
>> +}
>> +
>> +static qemu_plugin_u64 count_u64(void)
>> +{
>> +    return qemu_plugin_scoreboard_u64_in_struct(vcpus, Vcpu, count);
>> +}
>> +
>> +static qemu_plugin_u64 bb_count_u64(Bb *bb)
>> +{
>> +    return qemu_plugin_scoreboard_u64(bb->count);
>> +}
>> +
>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
>> +{
>> +    g_autofree gchar *vcpu_filename = NULL;
>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>> +
>> +    vcpu_filename = g_strdup_printf("%s.%u.bb", filename, vcpu_index);
>> +    vcpu->file = fopen(vcpu_filename, "w");
>> +}
>> +
>> +static void vcpu_interval_exec(unsigned int vcpu_index, void *udata)
>> +{
>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>> +    GHashTableIter iter;
>> +    void *value;
>> +
>> +    if (!vcpu->file) {
>> +        return;
>> +    }
>> +
>> +    vcpu->count -= interval;
>> +
>> +    fputc('T', vcpu->file);
>> +
>> +    g_rw_lock_reader_lock(&bbs_lock);
>> +    g_hash_table_iter_init(&iter, bbs);
>> +
>> +    while (g_hash_table_iter_next(&iter, NULL, &value)) {
>> +        Bb *bb = value;
>> +        uint64_t bb_count = qemu_plugin_u64_get(bb_count_u64(bb), vcpu_index);
>> +
>> +        if (!bb_count) {
>> +            continue;
>> +        }
>> +
>> +        fprintf(vcpu->file, ":%u:%" PRIu64 " ", bb->index, bb_count);
>> +        qemu_plugin_u64_set(bb_count_u64(bb), vcpu_index, 0);
>> +    }
>> +
>> +    g_rw_lock_reader_unlock(&bbs_lock);
>> +    fputc('\n', vcpu->file);
>> +}
>> +
>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>> +{
>> +    uint64_t n_insns = qemu_plugin_tb_n_insns(tb);
>> +    uint64_t vaddr = qemu_plugin_tb_vaddr(tb);
>> +    Bb *bb = g_hash_table_lookup(bbs, &vaddr);
> 
> Missing a read_lock for this access.
> 
>> +
>> +    if (!bb) {
>> +        uint64_t *key = g_new(uint64_t, 1);
>> +
>> +        *key = vaddr;
>> +        bb = g_new(Bb, 1);
>> +        bb->count = qemu_plugin_scoreboard_new(sizeof(uint64_t));
>> +        bb->index = g_hash_table_size(bbs);

And querying the size should be protected under the writer_lock too. (or 
another read_lock, but that would be less efficient).

>> +        g_rw_lock_writer_lock(&bbs_lock);
>> +        g_hash_table_insert(bbs, key, bb);
>> +        g_rw_lock_writer_unlock(&bbs_lock);
>> +    }
>> +
>> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, count_u64(), n_insns);
>> +
>> +    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>> +        tb, QEMU_PLUGIN_INLINE_ADD_U64, bb_count_u64(bb), n_insns);
>> +
>> +    qemu_plugin_register_vcpu_tb_exec_cond_cb(
>> +        tb, vcpu_interval_exec, QEMU_PLUGIN_CB_NO_REGS,
>> +        QEMU_PLUGIN_COND_GE, count_u64(), interval, NULL);
>> +}
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>> +                                           const qemu_info_t *info,
>> +                                           int argc, char **argv)
>> +{
>> +    for (int i = 0; i < argc; i++) {
>> +        char *opt = argv[i];
>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>> +        if (g_strcmp0(tokens[0], "interval") == 0) {
>> +            interval = g_ascii_strtoull(tokens[1], NULL, 10);
>> +        } else if (g_strcmp0(tokens[0], "outfile") == 0) {
>> +            filename = tokens[1];
>> +            tokens[1] = NULL;
>> +        } else {
>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    if (!filename) {
>> +        fputs("outfile unspecified\n", stderr);
>> +        return -1;
>> +    }
>> +
>> +    bbs = g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, free_bb);
>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(Vcpu));
>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>> +
>> +    return 0;
>> +}
>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>> index edf256cd9d11..6936591b1022 100644
>> --- a/contrib/plugins/Makefile
>> +++ b/contrib/plugins/Makefile
>> @@ -13,6 +13,7 @@ TOP_SRC_PATH = $(SRC_PATH)/../..
>>    VPATH += $(SRC_PATH)
>>    
>>    NAMES :=
>> +NAMES += bbv
>>    NAMES += execlog
>>    NAMES += hotblocks
>>    NAMES += hotpages
>>
>> ---
>> base-commit: 31669121a01a14732f57c49400bc239cf9fd505f
>> change-id: 20240618-bb-93387ddf765b
>>
>> Best regards,
> 
> Otherwise,
> Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Akihiko Odaki Aug. 16, 2024, 5:13 a.m. UTC | #3
On 2024/08/15 14:48, Pierrick Bouvier wrote:
> On 8/14/24 20:04, Akihiko Odaki wrote:
>> SimPoint is a widely used tool to find the ideal microarchitecture
>> simulation points so Valgrind[2] and Pin[3] support generating basic
>> block vectors for use with them. Let's add a corresponding plugin to
>> QEMU too.
>>
>> Note that this plugin has a different goal with tests/plugin/bb.c.
>>
>> This plugin creates a vector for each constant interval instead of
>> counting the execution of basic blocks for the entire run and able to
>> describe the change of execution behavior. Its output is also
>> syntactically simple and better suited for parsing, while the output of
>> tests/plugin/bb.c is more human-readable.
>>
>> [1] https://cseweb.ucsd.edu/~calder/simpoint/
>> [2] https://valgrind.org/docs/manual/bbv-manual.html
>> [3] 
>> https://www.intel.com/content/www/us/en/developer/articles/tool/pin-a-dynamic-binary-instrumentation-tool.html
>>
>> Signed-off-by: Yotaro Nada <yotaro.nada@gmail.com>
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>> ---
>> Changes in v2:
>> - Merged files variable into the global scoreboard.
>> - Added a lock for bbs.
>> - Added a summary to contrib/plugins/bbv.c.
>> - Rebased.
>> - Link to v1: 
>> https://lore.kernel.org/r/20240813-bb-v1-1-effbb77daebf@daynix.com
>> ---
>>   docs/about/emulation.rst |  30 +++++++++
>>   contrib/plugins/bbv.c    | 158 
>> +++++++++++++++++++++++++++++++++++++++++++++++
>>   contrib/plugins/Makefile |   1 +
>>   3 files changed, 189 insertions(+)
>>
>> diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst
>> index c03033e4e956..72d7846ab6f8 100644
>> --- a/docs/about/emulation.rst
>> +++ b/docs/about/emulation.rst
>> @@ -381,6 +381,36 @@ run::
>>     160          1      0
>>     135          1      0
>> +Basic Block Vectors
>> +...................
>> +
>> +``contrib/plugins/bbv.c``
>> +
>> +The bbv plugin allows you to generate basic block vectors for use 
>> with the
>> +`SimPoint <https://cseweb.ucsd.edu/~calder/simpoint/>`__ analysis tool.
>> +
>> +.. list-table:: Basic block vectors arguments
>> +  :widths: 20 80
>> +  :header-rows: 1
>> +
>> +  * - Option
>> +    - Description
>> +  * - interval=N
>> +    - The interval to generate a basic block vector specified by the 
>> number of
>> +      instructions (Default: N = 100000000)
>> +  * - outfile=PATH
>> +    - The path to output files.
>> +      It will be suffixed with ``.N.bb`` where ``N`` is a vCPU index.
>> +
>> +Example::
>> +
>> +  $ qemu-aarch64 \
>> +    -plugin contrib/plugins/libbbv.so,interval=100,outfile=sha1 \
>> +    tests/tcg/aarch64-linux-user/sha1
>> +  SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6
>> +  $ du sha1.0.bb
>> +  23128   sha1.0.bb
>> +
>>   Hot Blocks
>>   ..........
>> diff --git a/contrib/plugins/bbv.c b/contrib/plugins/bbv.c
>> new file mode 100644
>> index 000000000000..41139f423fe2
>> --- /dev/null
>> +++ b/contrib/plugins/bbv.c
>> @@ -0,0 +1,158 @@
>> +/*
>> + * Generate basic block vectors for use with the SimPoint analysis tool.
>> + * SimPoint: https://cseweb.ucsd.edu/~calder/simpoint/
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include <stdio.h>
>> +#include <glib.h>
>> +
>> +#include <qemu-plugin.h>
>> +
>> +typedef struct Bb {
>> +    struct qemu_plugin_scoreboard *count;
>> +    unsigned int index;
>> +} Bb;
>> +
>> +typedef struct Vcpu {
>> +    uint64_t count;
>> +    FILE *file;
>> +} Vcpu;
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>> +static GHashTable *bbs;
>> +static GRWLock bbs_lock;
>> +static char *filename;
>> +static struct qemu_plugin_scoreboard *vcpus;
>> +static uint64_t interval = 100000000;
>> +
>> +static void plugin_exit(qemu_plugin_id_t id, void *p)
>> +{
>> +    for (int i = 0; i < qemu_plugin_num_vcpus(); i++) {
>> +        fclose(((Vcpu *)qemu_plugin_scoreboard_find(vcpus, i))->file);
>> +    }
>> +
>> +    g_hash_table_unref(bbs);
>> +    g_free(filename);
>> +    qemu_plugin_scoreboard_free(vcpus);
>> +}
>> +
>> +static void free_bb(void *data)
>> +{
>> +    qemu_plugin_scoreboard_free(((Bb *)data)->count);
>> +    g_free(data);
>> +}
>> +
>> +static qemu_plugin_u64 count_u64(void)
>> +{
>> +    return qemu_plugin_scoreboard_u64_in_struct(vcpus, Vcpu, count);
>> +}
>> +
>> +static qemu_plugin_u64 bb_count_u64(Bb *bb)
>> +{
>> +    return qemu_plugin_scoreboard_u64(bb->count);
>> +}
>> +
>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
>> +{
>> +    g_autofree gchar *vcpu_filename = NULL;
>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>> +
>> +    vcpu_filename = g_strdup_printf("%s.%u.bb", filename, vcpu_index);
>> +    vcpu->file = fopen(vcpu_filename, "w");
>> +}
>> +
>> +static void vcpu_interval_exec(unsigned int vcpu_index, void *udata)
>> +{
>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>> +    GHashTableIter iter;
>> +    void *value;
>> +
>> +    if (!vcpu->file) {
>> +        return;
>> +    }
>> +
>> +    vcpu->count -= interval;
>> +
>> +    fputc('T', vcpu->file);
>> +
>> +    g_rw_lock_reader_lock(&bbs_lock);
>> +    g_hash_table_iter_init(&iter, bbs);
>> +
>> +    while (g_hash_table_iter_next(&iter, NULL, &value)) {
>> +        Bb *bb = value;
>> +        uint64_t bb_count = qemu_plugin_u64_get(bb_count_u64(bb), 
>> vcpu_index);
>> +
>> +        if (!bb_count) {
>> +            continue;
>> +        }
>> +
>> +        fprintf(vcpu->file, ":%u:%" PRIu64 " ", bb->index, bb_count);
>> +        qemu_plugin_u64_set(bb_count_u64(bb), vcpu_index, 0);
>> +    }
>> +
>> +    g_rw_lock_reader_unlock(&bbs_lock);
>> +    fputc('\n', vcpu->file);
>> +}
>> +
>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb 
>> *tb)
>> +{
>> +    uint64_t n_insns = qemu_plugin_tb_n_insns(tb);
>> +    uint64_t vaddr = qemu_plugin_tb_vaddr(tb);
>> +    Bb *bb = g_hash_table_lookup(bbs, &vaddr);
> 
> Missing a read_lock for this access.

I expect vcpu_tb_trans() will not be executed concurrently.
Akihiko Odaki Aug. 16, 2024, 5:28 a.m. UTC | #4
On 2024/08/16 14:13, Akihiko Odaki wrote:
> On 2024/08/15 14:48, Pierrick Bouvier wrote:
>> On 8/14/24 20:04, Akihiko Odaki wrote:
>>> SimPoint is a widely used tool to find the ideal microarchitecture
>>> simulation points so Valgrind[2] and Pin[3] support generating basic
>>> block vectors for use with them. Let's add a corresponding plugin to
>>> QEMU too.
>>>
>>> Note that this plugin has a different goal with tests/plugin/bb.c.
>>>
>>> This plugin creates a vector for each constant interval instead of
>>> counting the execution of basic blocks for the entire run and able to
>>> describe the change of execution behavior. Its output is also
>>> syntactically simple and better suited for parsing, while the output of
>>> tests/plugin/bb.c is more human-readable.
>>>
>>> [1] https://cseweb.ucsd.edu/~calder/simpoint/
>>> [2] https://valgrind.org/docs/manual/bbv-manual.html
>>> [3] 
>>> https://www.intel.com/content/www/us/en/developer/articles/tool/pin-a-dynamic-binary-instrumentation-tool.html
>>>
>>> Signed-off-by: Yotaro Nada <yotaro.nada@gmail.com>
>>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>>> ---
>>> Changes in v2:
>>> - Merged files variable into the global scoreboard.
>>> - Added a lock for bbs.
>>> - Added a summary to contrib/plugins/bbv.c.
>>> - Rebased.
>>> - Link to v1: 
>>> https://lore.kernel.org/r/20240813-bb-v1-1-effbb77daebf@daynix.com
>>> ---
>>>   docs/about/emulation.rst |  30 +++++++++
>>>   contrib/plugins/bbv.c    | 158 
>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>>   contrib/plugins/Makefile |   1 +
>>>   3 files changed, 189 insertions(+)
>>>
>>> diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst
>>> index c03033e4e956..72d7846ab6f8 100644
>>> --- a/docs/about/emulation.rst
>>> +++ b/docs/about/emulation.rst
>>> @@ -381,6 +381,36 @@ run::
>>>     160          1      0
>>>     135          1      0
>>> +Basic Block Vectors
>>> +...................
>>> +
>>> +``contrib/plugins/bbv.c``
>>> +
>>> +The bbv plugin allows you to generate basic block vectors for use 
>>> with the
>>> +`SimPoint <https://cseweb.ucsd.edu/~calder/simpoint/>`__ analysis tool.
>>> +
>>> +.. list-table:: Basic block vectors arguments
>>> +  :widths: 20 80
>>> +  :header-rows: 1
>>> +
>>> +  * - Option
>>> +    - Description
>>> +  * - interval=N
>>> +    - The interval to generate a basic block vector specified by the 
>>> number of
>>> +      instructions (Default: N = 100000000)
>>> +  * - outfile=PATH
>>> +    - The path to output files.
>>> +      It will be suffixed with ``.N.bb`` where ``N`` is a vCPU index.
>>> +
>>> +Example::
>>> +
>>> +  $ qemu-aarch64 \
>>> +    -plugin contrib/plugins/libbbv.so,interval=100,outfile=sha1 \
>>> +    tests/tcg/aarch64-linux-user/sha1
>>> +  SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6
>>> +  $ du sha1.0.bb
>>> +  23128   sha1.0.bb
>>> +
>>>   Hot Blocks
>>>   ..........
>>> diff --git a/contrib/plugins/bbv.c b/contrib/plugins/bbv.c
>>> new file mode 100644
>>> index 000000000000..41139f423fe2
>>> --- /dev/null
>>> +++ b/contrib/plugins/bbv.c
>>> @@ -0,0 +1,158 @@
>>> +/*
>>> + * Generate basic block vectors for use with the SimPoint analysis 
>>> tool.
>>> + * SimPoint: https://cseweb.ucsd.edu/~calder/simpoint/
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#include <stdio.h>
>>> +#include <glib.h>
>>> +
>>> +#include <qemu-plugin.h>
>>> +
>>> +typedef struct Bb {
>>> +    struct qemu_plugin_scoreboard *count;
>>> +    unsigned int index;
>>> +} Bb;
>>> +
>>> +typedef struct Vcpu {
>>> +    uint64_t count;
>>> +    FILE *file;
>>> +} Vcpu;
>>> +
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>> +static GHashTable *bbs;
>>> +static GRWLock bbs_lock;
>>> +static char *filename;
>>> +static struct qemu_plugin_scoreboard *vcpus;
>>> +static uint64_t interval = 100000000;
>>> +
>>> +static void plugin_exit(qemu_plugin_id_t id, void *p)
>>> +{
>>> +    for (int i = 0; i < qemu_plugin_num_vcpus(); i++) {
>>> +        fclose(((Vcpu *)qemu_plugin_scoreboard_find(vcpus, i))->file);
>>> +    }
>>> +
>>> +    g_hash_table_unref(bbs);
>>> +    g_free(filename);
>>> +    qemu_plugin_scoreboard_free(vcpus);
>>> +}
>>> +
>>> +static void free_bb(void *data)
>>> +{
>>> +    qemu_plugin_scoreboard_free(((Bb *)data)->count);
>>> +    g_free(data);
>>> +}
>>> +
>>> +static qemu_plugin_u64 count_u64(void)
>>> +{
>>> +    return qemu_plugin_scoreboard_u64_in_struct(vcpus, Vcpu, count);
>>> +}
>>> +
>>> +static qemu_plugin_u64 bb_count_u64(Bb *bb)
>>> +{
>>> +    return qemu_plugin_scoreboard_u64(bb->count);
>>> +}
>>> +
>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
>>> +{
>>> +    g_autofree gchar *vcpu_filename = NULL;
>>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>>> +
>>> +    vcpu_filename = g_strdup_printf("%s.%u.bb", filename, vcpu_index);
>>> +    vcpu->file = fopen(vcpu_filename, "w");
>>> +}
>>> +
>>> +static void vcpu_interval_exec(unsigned int vcpu_index, void *udata)
>>> +{
>>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>>> +    GHashTableIter iter;
>>> +    void *value;
>>> +
>>> +    if (!vcpu->file) {
>>> +        return;
>>> +    }
>>> +
>>> +    vcpu->count -= interval;
>>> +
>>> +    fputc('T', vcpu->file);
>>> +
>>> +    g_rw_lock_reader_lock(&bbs_lock);
>>> +    g_hash_table_iter_init(&iter, bbs);
>>> +
>>> +    while (g_hash_table_iter_next(&iter, NULL, &value)) {
>>> +        Bb *bb = value;
>>> +        uint64_t bb_count = qemu_plugin_u64_get(bb_count_u64(bb), 
>>> vcpu_index);
>>> +
>>> +        if (!bb_count) {
>>> +            continue;
>>> +        }
>>> +
>>> +        fprintf(vcpu->file, ":%u:%" PRIu64 " ", bb->index, bb_count);
>>> +        qemu_plugin_u64_set(bb_count_u64(bb), vcpu_index, 0);
>>> +    }
>>> +
>>> +    g_rw_lock_reader_unlock(&bbs_lock);
>>> +    fputc('\n', vcpu->file);
>>> +}
>>> +
>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb 
>>> *tb)
>>> +{
>>> +    uint64_t n_insns = qemu_plugin_tb_n_insns(tb);
>>> +    uint64_t vaddr = qemu_plugin_tb_vaddr(tb);
>>> +    Bb *bb = g_hash_table_lookup(bbs, &vaddr);
>>
>> Missing a read_lock for this access.
> 
> I expect vcpu_tb_trans() will not be executed concurrently.

No, QEMU doesn't lock unless it is userspace emulation. I'll add the 
read lock.

Regards,
Akihiko Odaki
Pierrick Bouvier Aug. 16, 2024, 3:42 p.m. UTC | #5
On 8/15/24 22:28, Akihiko Odaki wrote:
> On 2024/08/16 14:13, Akihiko Odaki wrote:
>> On 2024/08/15 14:48, Pierrick Bouvier wrote:
>>> On 8/14/24 20:04, Akihiko Odaki wrote:
>>>> SimPoint is a widely used tool to find the ideal microarchitecture
>>>> simulation points so Valgrind[2] and Pin[3] support generating basic
>>>> block vectors for use with them. Let's add a corresponding plugin to
>>>> QEMU too.
>>>>
>>>> Note that this plugin has a different goal with tests/plugin/bb.c.
>>>>
>>>> This plugin creates a vector for each constant interval instead of
>>>> counting the execution of basic blocks for the entire run and able to
>>>> describe the change of execution behavior. Its output is also
>>>> syntactically simple and better suited for parsing, while the output of
>>>> tests/plugin/bb.c is more human-readable.
>>>>
>>>> [1] https://cseweb.ucsd.edu/~calder/simpoint/
>>>> [2] https://valgrind.org/docs/manual/bbv-manual.html
>>>> [3]
>>>> https://www.intel.com/content/www/us/en/developer/articles/tool/pin-a-dynamic-binary-instrumentation-tool.html
>>>>
>>>> Signed-off-by: Yotaro Nada <yotaro.nada@gmail.com>
>>>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>>>> ---
>>>> Changes in v2:
>>>> - Merged files variable into the global scoreboard.
>>>> - Added a lock for bbs.
>>>> - Added a summary to contrib/plugins/bbv.c.
>>>> - Rebased.
>>>> - Link to v1:
>>>> https://lore.kernel.org/r/20240813-bb-v1-1-effbb77daebf@daynix.com
>>>> ---
>>>>    docs/about/emulation.rst |  30 +++++++++
>>>>    contrib/plugins/bbv.c    | 158
>>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>>>    contrib/plugins/Makefile |   1 +
>>>>    3 files changed, 189 insertions(+)
>>>>
>>>> diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst
>>>> index c03033e4e956..72d7846ab6f8 100644
>>>> --- a/docs/about/emulation.rst
>>>> +++ b/docs/about/emulation.rst
>>>> @@ -381,6 +381,36 @@ run::
>>>>      160          1      0
>>>>      135          1      0
>>>> +Basic Block Vectors
>>>> +...................
>>>> +
>>>> +``contrib/plugins/bbv.c``
>>>> +
>>>> +The bbv plugin allows you to generate basic block vectors for use
>>>> with the
>>>> +`SimPoint <https://cseweb.ucsd.edu/~calder/simpoint/>`__ analysis tool.
>>>> +
>>>> +.. list-table:: Basic block vectors arguments
>>>> +  :widths: 20 80
>>>> +  :header-rows: 1
>>>> +
>>>> +  * - Option
>>>> +    - Description
>>>> +  * - interval=N
>>>> +    - The interval to generate a basic block vector specified by the
>>>> number of
>>>> +      instructions (Default: N = 100000000)
>>>> +  * - outfile=PATH
>>>> +    - The path to output files.
>>>> +      It will be suffixed with ``.N.bb`` where ``N`` is a vCPU index.
>>>> +
>>>> +Example::
>>>> +
>>>> +  $ qemu-aarch64 \
>>>> +    -plugin contrib/plugins/libbbv.so,interval=100,outfile=sha1 \
>>>> +    tests/tcg/aarch64-linux-user/sha1
>>>> +  SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6
>>>> +  $ du sha1.0.bb
>>>> +  23128   sha1.0.bb
>>>> +
>>>>    Hot Blocks
>>>>    ..........
>>>> diff --git a/contrib/plugins/bbv.c b/contrib/plugins/bbv.c
>>>> new file mode 100644
>>>> index 000000000000..41139f423fe2
>>>> --- /dev/null
>>>> +++ b/contrib/plugins/bbv.c
>>>> @@ -0,0 +1,158 @@
>>>> +/*
>>>> + * Generate basic block vectors for use with the SimPoint analysis
>>>> tool.
>>>> + * SimPoint: https://cseweb.ucsd.edu/~calder/simpoint/
>>>> + *
>>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>>> + */
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <glib.h>
>>>> +
>>>> +#include <qemu-plugin.h>
>>>> +
>>>> +typedef struct Bb {
>>>> +    struct qemu_plugin_scoreboard *count;
>>>> +    unsigned int index;
>>>> +} Bb;
>>>> +
>>>> +typedef struct Vcpu {
>>>> +    uint64_t count;
>>>> +    FILE *file;
>>>> +} Vcpu;
>>>> +
>>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>>> +static GHashTable *bbs;
>>>> +static GRWLock bbs_lock;
>>>> +static char *filename;
>>>> +static struct qemu_plugin_scoreboard *vcpus;
>>>> +static uint64_t interval = 100000000;
>>>> +
>>>> +static void plugin_exit(qemu_plugin_id_t id, void *p)
>>>> +{
>>>> +    for (int i = 0; i < qemu_plugin_num_vcpus(); i++) {
>>>> +        fclose(((Vcpu *)qemu_plugin_scoreboard_find(vcpus, i))->file);
>>>> +    }
>>>> +
>>>> +    g_hash_table_unref(bbs);
>>>> +    g_free(filename);
>>>> +    qemu_plugin_scoreboard_free(vcpus);
>>>> +}
>>>> +
>>>> +static void free_bb(void *data)
>>>> +{
>>>> +    qemu_plugin_scoreboard_free(((Bb *)data)->count);
>>>> +    g_free(data);
>>>> +}
>>>> +
>>>> +static qemu_plugin_u64 count_u64(void)
>>>> +{
>>>> +    return qemu_plugin_scoreboard_u64_in_struct(vcpus, Vcpu, count);
>>>> +}
>>>> +
>>>> +static qemu_plugin_u64 bb_count_u64(Bb *bb)
>>>> +{
>>>> +    return qemu_plugin_scoreboard_u64(bb->count);
>>>> +}
>>>> +
>>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
>>>> +{
>>>> +    g_autofree gchar *vcpu_filename = NULL;
>>>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>>>> +
>>>> +    vcpu_filename = g_strdup_printf("%s.%u.bb", filename, vcpu_index);
>>>> +    vcpu->file = fopen(vcpu_filename, "w");
>>>> +}
>>>> +
>>>> +static void vcpu_interval_exec(unsigned int vcpu_index, void *udata)
>>>> +{
>>>> +    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
>>>> +    GHashTableIter iter;
>>>> +    void *value;
>>>> +
>>>> +    if (!vcpu->file) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    vcpu->count -= interval;
>>>> +
>>>> +    fputc('T', vcpu->file);
>>>> +
>>>> +    g_rw_lock_reader_lock(&bbs_lock);
>>>> +    g_hash_table_iter_init(&iter, bbs);
>>>> +
>>>> +    while (g_hash_table_iter_next(&iter, NULL, &value)) {
>>>> +        Bb *bb = value;
>>>> +        uint64_t bb_count = qemu_plugin_u64_get(bb_count_u64(bb),
>>>> vcpu_index);
>>>> +
>>>> +        if (!bb_count) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        fprintf(vcpu->file, ":%u:%" PRIu64 " ", bb->index, bb_count);
>>>> +        qemu_plugin_u64_set(bb_count_u64(bb), vcpu_index, 0);
>>>> +    }
>>>> +
>>>> +    g_rw_lock_reader_unlock(&bbs_lock);
>>>> +    fputc('\n', vcpu->file);
>>>> +}
>>>> +
>>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb
>>>> *tb)
>>>> +{
>>>> +    uint64_t n_insns = qemu_plugin_tb_n_insns(tb);
>>>> +    uint64_t vaddr = qemu_plugin_tb_vaddr(tb);
>>>> +    Bb *bb = g_hash_table_lookup(bbs, &vaddr);
>>>
>>> Missing a read_lock for this access.
>>
>> I expect vcpu_tb_trans() will not be executed concurrently.
> 
> No, QEMU doesn't lock unless it is userspace emulation. I'll add the
> read lock.
>

User mode uses a shared code cache while system mode uses a code cache 
per vcpu (thus, translation can happen concurrently).
https://www.qemu.org/docs/master/devel/multi-thread-tcg.html#global-tcg-state

> Regards,
> Akihiko Odaki
Richard Henderson Aug. 16, 2024, 10:12 p.m. UTC | #6
On 8/17/24 01:42, Pierrick Bouvier wrote:
> User mode uses a shared code cache while system mode uses a code cache per vcpu (thus, 
> translation can happen concurrently).

Not 100% accurate.  The system mode code cache is global, but is partitioned so that each 
partition has a different lock, so that multiple vcpus can translate concurrently.


r~
diff mbox series

Patch

diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst
index c03033e4e956..72d7846ab6f8 100644
--- a/docs/about/emulation.rst
+++ b/docs/about/emulation.rst
@@ -381,6 +381,36 @@  run::
   160          1      0
   135          1      0
 
+Basic Block Vectors
+...................
+
+``contrib/plugins/bbv.c``
+
+The bbv plugin allows you to generate basic block vectors for use with the
+`SimPoint <https://cseweb.ucsd.edu/~calder/simpoint/>`__ analysis tool.
+
+.. list-table:: Basic block vectors arguments
+  :widths: 20 80
+  :header-rows: 1
+
+  * - Option
+    - Description
+  * - interval=N
+    - The interval to generate a basic block vector specified by the number of
+      instructions (Default: N = 100000000)
+  * - outfile=PATH
+    - The path to output files.
+      It will be suffixed with ``.N.bb`` where ``N`` is a vCPU index.
+
+Example::
+
+  $ qemu-aarch64 \
+    -plugin contrib/plugins/libbbv.so,interval=100,outfile=sha1 \
+    tests/tcg/aarch64-linux-user/sha1
+  SHA1=15dd99a1991e0b3826fede3deffc1feba42278e6
+  $ du sha1.0.bb
+  23128   sha1.0.bb
+
 Hot Blocks
 ..........
 
diff --git a/contrib/plugins/bbv.c b/contrib/plugins/bbv.c
new file mode 100644
index 000000000000..41139f423fe2
--- /dev/null
+++ b/contrib/plugins/bbv.c
@@ -0,0 +1,158 @@ 
+/*
+ * Generate basic block vectors for use with the SimPoint analysis tool.
+ * SimPoint: https://cseweb.ucsd.edu/~calder/simpoint/
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+typedef struct Bb {
+    struct qemu_plugin_scoreboard *count;
+    unsigned int index;
+} Bb;
+
+typedef struct Vcpu {
+    uint64_t count;
+    FILE *file;
+} Vcpu;
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+static GHashTable *bbs;
+static GRWLock bbs_lock;
+static char *filename;
+static struct qemu_plugin_scoreboard *vcpus;
+static uint64_t interval = 100000000;
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+    for (int i = 0; i < qemu_plugin_num_vcpus(); i++) {
+        fclose(((Vcpu *)qemu_plugin_scoreboard_find(vcpus, i))->file);
+    }
+
+    g_hash_table_unref(bbs);
+    g_free(filename);
+    qemu_plugin_scoreboard_free(vcpus);
+}
+
+static void free_bb(void *data)
+{
+    qemu_plugin_scoreboard_free(((Bb *)data)->count);
+    g_free(data);
+}
+
+static qemu_plugin_u64 count_u64(void)
+{
+    return qemu_plugin_scoreboard_u64_in_struct(vcpus, Vcpu, count);
+}
+
+static qemu_plugin_u64 bb_count_u64(Bb *bb)
+{
+    return qemu_plugin_scoreboard_u64(bb->count);
+}
+
+static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
+{
+    g_autofree gchar *vcpu_filename = NULL;
+    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
+
+    vcpu_filename = g_strdup_printf("%s.%u.bb", filename, vcpu_index);
+    vcpu->file = fopen(vcpu_filename, "w");
+}
+
+static void vcpu_interval_exec(unsigned int vcpu_index, void *udata)
+{
+    Vcpu *vcpu = qemu_plugin_scoreboard_find(vcpus, vcpu_index);
+    GHashTableIter iter;
+    void *value;
+
+    if (!vcpu->file) {
+        return;
+    }
+
+    vcpu->count -= interval;
+
+    fputc('T', vcpu->file);
+
+    g_rw_lock_reader_lock(&bbs_lock);
+    g_hash_table_iter_init(&iter, bbs);
+
+    while (g_hash_table_iter_next(&iter, NULL, &value)) {
+        Bb *bb = value;
+        uint64_t bb_count = qemu_plugin_u64_get(bb_count_u64(bb), vcpu_index);
+
+        if (!bb_count) {
+            continue;
+        }
+
+        fprintf(vcpu->file, ":%u:%" PRIu64 " ", bb->index, bb_count);
+        qemu_plugin_u64_set(bb_count_u64(bb), vcpu_index, 0);
+    }
+
+    g_rw_lock_reader_unlock(&bbs_lock);
+    fputc('\n', vcpu->file);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    uint64_t n_insns = qemu_plugin_tb_n_insns(tb);
+    uint64_t vaddr = qemu_plugin_tb_vaddr(tb);
+    Bb *bb = g_hash_table_lookup(bbs, &vaddr);
+
+    if (!bb) {
+        uint64_t *key = g_new(uint64_t, 1);
+
+        *key = vaddr;
+        bb = g_new(Bb, 1);
+        bb->count = qemu_plugin_scoreboard_new(sizeof(uint64_t));
+        bb->index = g_hash_table_size(bbs);
+        g_rw_lock_writer_lock(&bbs_lock);
+        g_hash_table_insert(bbs, key, bb);
+        g_rw_lock_writer_unlock(&bbs_lock);
+    }
+
+    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+        tb, QEMU_PLUGIN_INLINE_ADD_U64, count_u64(), n_insns);
+
+    qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+        tb, QEMU_PLUGIN_INLINE_ADD_U64, bb_count_u64(bb), n_insns);
+
+    qemu_plugin_register_vcpu_tb_exec_cond_cb(
+        tb, vcpu_interval_exec, QEMU_PLUGIN_CB_NO_REGS,
+        QEMU_PLUGIN_COND_GE, count_u64(), interval, NULL);
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info,
+                                           int argc, char **argv)
+{
+    for (int i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
+        if (g_strcmp0(tokens[0], "interval") == 0) {
+            interval = g_ascii_strtoull(tokens[1], NULL, 10);
+        } else if (g_strcmp0(tokens[0], "outfile") == 0) {
+            filename = tokens[1];
+            tokens[1] = NULL;
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    if (!filename) {
+        fputs("outfile unspecified\n", stderr);
+        return -1;
+    }
+
+    bbs = g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, free_bb);
+    vcpus = qemu_plugin_scoreboard_new(sizeof(Vcpu));
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+
+    return 0;
+}
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index edf256cd9d11..6936591b1022 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -13,6 +13,7 @@  TOP_SRC_PATH = $(SRC_PATH)/../..
 VPATH += $(SRC_PATH)
 
 NAMES :=
+NAMES += bbv
 NAMES += execlog
 NAMES += hotblocks
 NAMES += hotpages