Message ID | 20210618091101.2802534-2-erdnaxe@crans.org |
---|---|
State | New |
Headers | show |
Series | execlog TCG plugin to log instructions | expand |
Alexandre Iooss <erdnaxe@crans.org> writes: > Log instruction execution and memory access to a file. > This plugin can be used for reverse engineering or for side-channel analysis > using QEMU. > > Signed-off-by: Alexandre Iooss <erdnaxe@crans.org> > --- > MAINTAINERS | 1 + > contrib/plugins/Makefile | 1 + > contrib/plugins/execlog.c | 123 ++++++++++++++++++++++++++++++++++++++ > 3 files changed, 125 insertions(+) > create mode 100644 contrib/plugins/execlog.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index 7d9cd29042..65942d5802 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -2974,6 +2974,7 @@ F: include/tcg/ > > TCG Plugins > M: Alex Bennée <alex.bennee@linaro.org> > +R: Alexandre Iooss <erdnaxe@crans.org> > S: Maintained > F: docs/devel/tcg-plugins.rst > F: plugins/ > diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile > index b9d7935e5e..51093acd17 100644 > --- a/contrib/plugins/Makefile > +++ b/contrib/plugins/Makefile > @@ -13,6 +13,7 @@ include $(BUILD_DIR)/config-host.mak > VPATH += $(SRC_PATH)/contrib/plugins > > NAMES := > +NAMES += execlog > NAMES += hotblocks > NAMES += hotpages > NAMES += howvec > diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c > new file mode 100644 > index 0000000000..995c4477f9 > --- /dev/null > +++ b/contrib/plugins/execlog.c > @@ -0,0 +1,123 @@ > +/* > + * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org> > + * > + * Log instruction execution with memory access. > + * > + * License: GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + */ > +#include <glib.h> > +#include <inttypes.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > + > +#include <qemu-plugin.h> > + > +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; > + > +/* Store last executed instruction on each vCPU */ > +GString **last_exec; > + > +/** > + * Add memory read or write information to current instruction log > + */ > +static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info, > + uint64_t vaddr, void *udata) > +{ > + if (qemu_plugin_mem_is_store(info)) { > + g_string_append(last_exec[cpu_index], ", store"); > + } else { > + g_string_append(last_exec[cpu_index], ", load"); > + } > + > + /* If full system emulation log physical address and device name > */ The comment and logic implies that we might be running in user-mode but... > + struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr); > + if (hwaddr) { > + uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr); > + const char *name = qemu_plugin_hwaddr_device_name(hwaddr); > + g_string_append_printf(last_exec[cpu_index], ", 0x%08"PRIx64", %s", > + addr, name); > + } else { > + g_string_append_printf(last_exec[cpu_index], ", 0x%08"PRIx64, vaddr); > + } > +} > + > +/** > + * Log instruction execution > + */ > +static void vcpu_insn_exec(unsigned int cpu_index, void *udata) > +{ > + /* Print previous instruction in cache */ > + if (last_exec[cpu_index]->str) { > + qemu_plugin_outs(last_exec[cpu_index]->str); > + qemu_plugin_outs("\n"); > + } > + > + /* Store new instruction in cache */ > + /* vcpu_mem will add memory access information to last_exec */ > + g_string_printf(last_exec[cpu_index], "%u, ", cpu_index); > + g_string_append(last_exec[cpu_index], (char *)udata); > +} > + > +/** > + * On translation block new translation > + * > + * QEMU convert code by translation block (TB). By hooking here we can then hook > + * a callback on each instruction and memory access. > + */ > +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) > +{ > + struct qemu_plugin_insn *insn; > + uint64_t insn_vaddr; > + uint32_t insn_opcode; > + char *insn_disas; > + > + size_t n = qemu_plugin_tb_n_insns(tb); > + for (size_t i = 0; i < n; i++) { > + /* > + * `insn` is shared between translations in QEMU, copy needed data here. > + * `output` is never freed as it might be used multiple times during > + * the emulation lifetime. > + * We only consider the first 32 bits of the instruction, this may be > + * a limitation for CISC architectures. > + */ > + insn = qemu_plugin_tb_get_insn(tb, i); > + insn_vaddr = qemu_plugin_insn_vaddr(insn); > + insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn)); > + insn_disas = qemu_plugin_insn_disas(insn); > + char *output = g_strdup_printf("0x%"PRIx64", 0x%"PRIx32", \"%s\"", > + insn_vaddr, insn_opcode, insn_disas); > + > + /* Register callback on memory read or write */ > + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem, > + QEMU_PLUGIN_CB_NO_REGS, > + QEMU_PLUGIN_MEM_RW, NULL); > + > + /* Register callback on instruction */ > + qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, > + QEMU_PLUGIN_CB_NO_REGS, output); > + } > +} > + > +/** > + * Install the plugin > + */ > +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, > + const qemu_info_t *info, int argc, > + char **argv) > +{ > + int i; > + > + /* Initialize instruction cache for each vCPU */ > + last_exec = calloc(info->system.max_vcpus, sizeof(GString *)); > + for (i = 0; i < info->system.max_vcpus; i++) { > + last_exec[i] = g_string_new(NULL); > + } We only allocate last_exec for system.max_vcpus here. You need to check the system_emulation bool before using that information and error out if it's not system emulation. > + > + /* Register translation block callback */ > + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); > + > + return 0; > +} Otherwise: Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
On 6/22/21 10:37 AM, Alex Bennée wrote: > We only allocate last_exec for system.max_vcpus here. You need to check > the system_emulation bool before using that information and error out if > it's not system emulation. My bad, I did not test user mode emulation after converting last_exec to an array. Should I consider only one vCPU in user mode emulation? -- Alexandre
Alexandre IOOSS <erdnaxe@crans.org> writes: > [[PGP Signed Part:Undecided]] > On 6/22/21 10:37 AM, Alex Bennée wrote: >> We only allocate last_exec for system.max_vcpus here. You need to check >> the system_emulation bool before using that information and error out if >> it's not system emulation. > > My bad, I did not test user mode emulation after converting last_exec > to an array. Should I consider only one vCPU in user mode emulation? It's up to you. The cpuid is essentially unbounded for linux-user so you could either dynamically assign new entries as they come up or just not load for non system emulation cases. If you attempt to run multi-threaded programs with a single entry in the array you will get weird interleaving issues. > > -- Alexandre > > [[End of PGP Signed Part]]
diff --git a/MAINTAINERS b/MAINTAINERS index 7d9cd29042..65942d5802 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2974,6 +2974,7 @@ F: include/tcg/ TCG Plugins M: Alex Bennée <alex.bennee@linaro.org> +R: Alexandre Iooss <erdnaxe@crans.org> S: Maintained F: docs/devel/tcg-plugins.rst F: plugins/ diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile index b9d7935e5e..51093acd17 100644 --- a/contrib/plugins/Makefile +++ b/contrib/plugins/Makefile @@ -13,6 +13,7 @@ include $(BUILD_DIR)/config-host.mak VPATH += $(SRC_PATH)/contrib/plugins NAMES := +NAMES += execlog NAMES += hotblocks NAMES += hotpages NAMES += howvec diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c new file mode 100644 index 0000000000..995c4477f9 --- /dev/null +++ b/contrib/plugins/execlog.c @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org> + * + * Log instruction execution with memory access. + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include <glib.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <qemu-plugin.h> + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +/* Store last executed instruction on each vCPU */ +GString **last_exec; + +/** + * Add memory read or write information to current instruction log + */ +static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info, + uint64_t vaddr, void *udata) +{ + if (qemu_plugin_mem_is_store(info)) { + g_string_append(last_exec[cpu_index], ", store"); + } else { + g_string_append(last_exec[cpu_index], ", load"); + } + + /* If full system emulation log physical address and device name */ + struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr); + if (hwaddr) { + uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr); + const char *name = qemu_plugin_hwaddr_device_name(hwaddr); + g_string_append_printf(last_exec[cpu_index], ", 0x%08"PRIx64", %s", + addr, name); + } else { + g_string_append_printf(last_exec[cpu_index], ", 0x%08"PRIx64, vaddr); + } +} + +/** + * Log instruction execution + */ +static void vcpu_insn_exec(unsigned int cpu_index, void *udata) +{ + /* Print previous instruction in cache */ + if (last_exec[cpu_index]->str) { + qemu_plugin_outs(last_exec[cpu_index]->str); + qemu_plugin_outs("\n"); + } + + /* Store new instruction in cache */ + /* vcpu_mem will add memory access information to last_exec */ + g_string_printf(last_exec[cpu_index], "%u, ", cpu_index); + g_string_append(last_exec[cpu_index], (char *)udata); +} + +/** + * On translation block new translation + * + * QEMU convert code by translation block (TB). By hooking here we can then hook + * a callback on each instruction and memory access. + */ +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ + struct qemu_plugin_insn *insn; + uint64_t insn_vaddr; + uint32_t insn_opcode; + char *insn_disas; + + size_t n = qemu_plugin_tb_n_insns(tb); + for (size_t i = 0; i < n; i++) { + /* + * `insn` is shared between translations in QEMU, copy needed data here. + * `output` is never freed as it might be used multiple times during + * the emulation lifetime. + * We only consider the first 32 bits of the instruction, this may be + * a limitation for CISC architectures. + */ + insn = qemu_plugin_tb_get_insn(tb, i); + insn_vaddr = qemu_plugin_insn_vaddr(insn); + insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn)); + insn_disas = qemu_plugin_insn_disas(insn); + char *output = g_strdup_printf("0x%"PRIx64", 0x%"PRIx32", \"%s\"", + insn_vaddr, insn_opcode, insn_disas); + + /* Register callback on memory read or write */ + qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem, + QEMU_PLUGIN_CB_NO_REGS, + QEMU_PLUGIN_MEM_RW, NULL); + + /* Register callback on instruction */ + qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec, + QEMU_PLUGIN_CB_NO_REGS, output); + } +} + +/** + * Install the plugin + */ +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, + const qemu_info_t *info, int argc, + char **argv) +{ + int i; + + /* Initialize instruction cache for each vCPU */ + last_exec = calloc(info->system.max_vcpus, sizeof(GString *)); + for (i = 0; i < info->system.max_vcpus; i++) { + last_exec[i] = g_string_new(NULL); + } + + /* Register translation block callback */ + qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); + + return 0; +}
Log instruction execution and memory access to a file. This plugin can be used for reverse engineering or for side-channel analysis using QEMU. Signed-off-by: Alexandre Iooss <erdnaxe@crans.org> --- MAINTAINERS | 1 + contrib/plugins/Makefile | 1 + contrib/plugins/execlog.c | 123 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 contrib/plugins/execlog.c