Message ID | 1444894224-9542-3-git-send-email-den@openvz.org |
---|---|
State | New |
Headers | show |
"Denis V. Lunev" <den@openvz.org> writes: > From: Pavel Butsykin <pbutsykin@virtuozzo.com> > > This log would be very welcome for long-term diagnostics of the system > in the production. This log is at least necessary to understand what > has been happened on the system and to identify issues at higher-level > subsystems (libvirt, etc). > > Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com> > Signed-off-by: Denis V. Lunev <den@openvz.org> > CC: Markus Armbruster <armbru@redhat.com> > CC: Luiz Capitulino <lcapitulino@redhat.com> > CC: Eric Blake <eblake@redhat.com> > CC: Peter Maydell <peter.maydell@linaro.org> > --- > include/qemu/log.h | 1 + > monitor.c | 18 ++++++++++++++++++ > qemu-log.c | 1 + > 3 files changed, 20 insertions(+) > > diff --git a/include/qemu/log.h b/include/qemu/log.h > index 57b8c96..ba86606 100644 > --- a/include/qemu/log.h > +++ b/include/qemu/log.h > @@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void) > #define LOG_UNIMP (1 << 10) > #define LOG_GUEST_ERROR (1 << 11) > #define CPU_LOG_MMU (1 << 12) > +#define LOG_CMD (1 << 13) > > /* Returns true if a bit is set in the current loglevel mask > */ > diff --git a/monitor.c b/monitor.c > index 4f1ba2f..71810bb 100644 > --- a/monitor.c > +++ b/monitor.c > @@ -434,6 +434,12 @@ static void monitor_protocol_emitter(Monitor *mon, QObject *data, > } > > monitor_json_emitter(mon, QOBJECT(qmp)); > + if (qemu_loglevel_mask(LOG_CMD)) { > + QString *output_json = qobject_to_json_pretty(QOBJECT(qmp)); > + qemu_log("qmp response:\n%s\n", output_json->string); > + QDECREF(output_json); > + } > + Uh, _pretty makes this multiple lines. Is this appropriate for the log file? Finding log message boundaries goes from regular to context-free... Nitpick: we generally spell it QMP. > QDECREF(qmp); > } > > @@ -474,6 +480,11 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *data, Error **errp) > evstate->last, > now); > > + if (qemu_loglevel_mask(LOG_CMD)) { > + QString *output_json = qobject_to_json_pretty(QOBJECT(data)); > + qemu_log("qmp event:\n%s\n", output_json->string); > + QDECREF(output_json); > + } This does *not* log QMP events (the ones put on the wire), it logs *QAPI* events, which may or may not put on the wire due to rate limiting. Are you sure we don't want to rate-limit log messages? If you really want to log QAPI events rather than QMP events, you get to explain why in the commit message, and you get to document the difference between the log and the actual wire. But I suspect you merely put this in the wrong place :) > /* Rate limit of 0 indicates no throttling */ > qemu_mutex_lock(&monitor_lock); > if (!evstate->rate) { Not logged: the QMP greeting. If you want to log QMP, log all of QMP. You can either put the logging of the transmit direction in monitor_json_emitter(), or you put it in each of its callers. > @@ -2848,6 +2859,8 @@ static void handle_hmp_command(Monitor *mon, const char *cmdline) > QDict *qdict; > const mon_cmd_t *cmd; > > + qemu_log_mask(LOG_CMD, "hmp request:\n%s\n", cmdline); > + > cmd = monitor_parse_command(mon, &cmdline, mon->cmd_table); > if (!cmd) { > return; > @@ -3823,6 +3836,11 @@ static void handle_qmp_command(JSONMessageParser *parser, QList *tokens) > goto err_out; > } > > + if (qemu_loglevel_mask(LOG_CMD)) { > + QString *output_json = output_json = qobject_to_json_pretty(obj); > + qemu_log("qmp request:\n%s\n", output_json->string); > + QDECREF(output_json); > + } > input = qmp_check_input_obj(obj, &local_err); > if (!input) { > qobject_decref(obj); Nitpick: QMP calls this a command, not a request. > diff --git a/qemu-log.c b/qemu-log.c > index e6d2b3f..ba7b34c 100644 > --- a/qemu-log.c > +++ b/qemu-log.c > @@ -98,6 +98,7 @@ const QEMULogItem qemu_log_items[] = { > { LOG_GUEST_ERROR, "guest_errors", > "log when the guest OS does something invalid (eg accessing a\n" > "non-existent register)" }, > + { LOG_CMD, "cmd", "log the hmp/qmp commands execution" }, > { 0, NULL, NULL }, > }; I can't quite see the use case for logging HMP, but since you wrote it, I assume you got one. Regardless, separate flags and patches for QMP and HMP, please.
On 16.10.2015 10:34, Markus Armbruster wrote: > "Denis V. Lunev" <den@openvz.org> writes: > >> From: Pavel Butsykin <pbutsykin@virtuozzo.com> >> >> This log would be very welcome for long-term diagnostics of the system >> in the production. This log is at least necessary to understand what >> has been happened on the system and to identify issues at higher-level >> subsystems (libvirt, etc). >> >> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com> >> Signed-off-by: Denis V. Lunev <den@openvz.org> >> CC: Markus Armbruster <armbru@redhat.com> >> CC: Luiz Capitulino <lcapitulino@redhat.com> >> CC: Eric Blake <eblake@redhat.com> >> CC: Peter Maydell <peter.maydell@linaro.org> >> --- >> include/qemu/log.h | 1 + >> monitor.c | 18 ++++++++++++++++++ >> qemu-log.c | 1 + >> 3 files changed, 20 insertions(+) >> >> diff --git a/include/qemu/log.h b/include/qemu/log.h >> index 57b8c96..ba86606 100644 >> --- a/include/qemu/log.h >> +++ b/include/qemu/log.h >> @@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void) >> #define LOG_UNIMP (1 << 10) >> #define LOG_GUEST_ERROR (1 << 11) >> #define CPU_LOG_MMU (1 << 12) >> +#define LOG_CMD (1 << 13) >> >> /* Returns true if a bit is set in the current loglevel mask >> */ >> diff --git a/monitor.c b/monitor.c >> index 4f1ba2f..71810bb 100644 >> --- a/monitor.c >> +++ b/monitor.c >> @@ -434,6 +434,12 @@ static void monitor_protocol_emitter(Monitor *mon, QObject *data, >> } >> >> monitor_json_emitter(mon, QOBJECT(qmp)); >> + if (qemu_loglevel_mask(LOG_CMD)) { >> + QString *output_json = qobject_to_json_pretty(QOBJECT(qmp)); >> + qemu_log("qmp response:\n%s\n", output_json->string); >> + QDECREF(output_json); >> + } >> + > > Uh, _pretty makes this multiple lines. Is this appropriate for the log > file? Finding log message boundaries goes from regular to > context-free... > I think so much more convenient to read the log, for example: qmp requested: { "execute": "query-block" } qmp responsed: { "return": [ { "io-status": "ok", "device": "drive-virtio-disk0", "locked": false, "removable": false, "inserted": { "iops_rd": 0, "detect_zeroes": "off", "image": { "snapshots": [ { "vm-clock-nsec": 235120743, "name": "snap4", "date-sec": 1444402741, "date-nsec": 246561000, "vm-clock-sec": 42, "id": "2", "vm-state-size": 247252333 }, ... if we just use qobject_to_json(), it will be a long string, which may be difficult to understand. > Nitpick: we generally spell it QMP. > Ok. >> QDECREF(qmp); >> } >> >> @@ -474,6 +480,11 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *data, Error **errp) >> evstate->last, >> now); >> >> + if (qemu_loglevel_mask(LOG_CMD)) { >> + QString *output_json = qobject_to_json_pretty(QOBJECT(data)); >> + qemu_log("qmp event:\n%s\n", output_json->string); >> + QDECREF(output_json); >> + } > > This does *not* log QMP events (the ones put on the wire), it logs > *QAPI* events, which may or may not put on the wire due to rate > limiting. > > Are you sure we don't want to rate-limit log messages? > > If you really want to log QAPI events rather than QMP events, you get to > explain why in the commit message, and you get to document the > difference between the log and the actual wire. > > But I suspect you merely put this in the wrong place :) > >> /* Rate limit of 0 indicates no throttling */ >> qemu_mutex_lock(&monitor_lock); >> if (!evstate->rate) { > > Not logged: the QMP greeting. If you want to log QMP, log all of QMP. > > You can either put the logging of the transmit direction in > monitor_json_emitter(), or you put it in each of its callers. > I agree, just missed this point. In this case, we are interested in logging QAPI and QMP events. For QAPI events point logging can be put in monitor_qapi_event_emit(), it should solve the problem with rate-limit. For QMP(and monitor_json_emitter) more suitable put it in each of its callers, because it is not known what is this call(it the request, response or event). >> @@ -2848,6 +2859,8 @@ static void handle_hmp_command(Monitor *mon, const char *cmdline) >> QDict *qdict; >> const mon_cmd_t *cmd; >> >> + qemu_log_mask(LOG_CMD, "hmp request:\n%s\n", cmdline); >> + >> cmd = monitor_parse_command(mon, &cmdline, mon->cmd_table); >> if (!cmd) { >> return; >> @@ -3823,6 +3836,11 @@ static void handle_qmp_command(JSONMessageParser *parser, QList *tokens) >> goto err_out; >> } >> >> + if (qemu_loglevel_mask(LOG_CMD)) { >> + QString *output_json = output_json = qobject_to_json_pretty(obj); >> + qemu_log("qmp request:\n%s\n", output_json->string); >> + QDECREF(output_json); >> + } >> input = qmp_check_input_obj(obj, &local_err); >> if (!input) { >> qobject_decref(obj); > > Nitpick: QMP calls this a command, not a request. > It is understood that the request came to execute the command. >> diff --git a/qemu-log.c b/qemu-log.c >> index e6d2b3f..ba7b34c 100644 >> --- a/qemu-log.c >> +++ b/qemu-log.c >> @@ -98,6 +98,7 @@ const QEMULogItem qemu_log_items[] = { >> { LOG_GUEST_ERROR, "guest_errors", >> "log when the guest OS does something invalid (eg accessing a\n" >> "non-existent register)" }, >> + { LOG_CMD, "cmd", "log the hmp/qmp commands execution" }, >> { 0, NULL, NULL }, >> }; > > I can't quite see the use case for logging HMP, but since you wrote it, > I assume you got one. > > Regardless, separate flags and patches for QMP and HMP, please. > Ok.
We have a couple of related features, and I'd like to get some clarity on how exactly they should be used. 1. Tracing Documented in docs/tracing.txt. Each trace event can be individually controlled with -trace and with monitor command trace-event. Wildcards work. Monitor command "info trace-event" shows their status. Supports a wealth of tracing backends: nop (compiles out tracing completely), stderr (prints trace to stderr), "simple", "ftrace", "dtrace", ... Range from "trivially easy" to "complex power tool". 2. Ad hoc printing We have 108 files with some #define DPRINTF(), and probably some more sailing under different flags. The ones that aren't useless should probably be tracepoints. 3. "qemu/log.h" logging Sports a "mask", where each bit enables a certain set of log messages. Control the mask with "-d item,..." Try "-d help" to see acceptable items. Logs to stderr by default, can be redirected with "-D <logfile>". Odd for a log file: no timestamp, but proposed "[PATCH 3/3] log: adds a timestamp to each log entry" adds one. Log message format is unclear. There are no locks beyond the one provided by stdio, and log entries appear to be build with multiple calls in places. I suspect logging from more than one thread can mess up the log. Currently defined items are: const QEMULogItem qemu_log_items[] = { { CPU_LOG_TB_OUT_ASM, "out_asm", "show generated host assembly code for each compiled TB" }, { CPU_LOG_TB_IN_ASM, "in_asm", "show target assembly code for each compiled TB" }, { CPU_LOG_TB_OP, "op", "show micro ops for each compiled TB" }, { CPU_LOG_TB_OP_OPT, "op_opt", "show micro ops (x86 only: before eflags optimization) and\n" "after liveness analysis" }, { CPU_LOG_INT, "int", "show interrupts/exceptions in short format" }, { CPU_LOG_EXEC, "exec", "show trace before each executed TB (lots of logs)" }, { CPU_LOG_TB_CPU, "cpu", "show CPU state before block translation" }, { CPU_LOG_MMU, "mmu", "log MMU-related activities" }, { CPU_LOG_PCALL, "pcall", "x86 only: show protected mode far calls/returns/exceptions" }, { CPU_LOG_RESET, "cpu_reset", "show CPU state before CPU resets" }, { CPU_LOG_IOPORT, "ioport", "show all i/o ports accesses" }, { LOG_UNIMP, "unimp", "log unimplemented functionality" }, { LOG_GUEST_ERROR, "guest_errors", "log when the guest OS does something invalid (eg accessing a\n" "non-existent register)" }, { 0, NULL, NULL }, }; Looks like this is basically TCG with a couple of random LOG_UNIMP and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose QEMU log in its current state. Should some of these items be tracepoints instead? Do we want a general purpose QEMU log? If yes, should this TCG-ish log become the general purpose QEMU log? I'm asking because "[PATCH 2/3] log: report hmp/qmp command and qmp event" proposes to log QMP traffic here. I understand the need for logging QMP traffic, but I'm not sure it fits here. 4. Error messages with timestamps "-msg timestamp" adds timestamps to the error messages that use the proper error reporting interfaces. I suspect this is basically a poor work-around for not having a log file.
Pavel Butsykin <pbutsykin@odin.com> writes: > On 16.10.2015 10:34, Markus Armbruster wrote: >> "Denis V. Lunev" <den@openvz.org> writes: >> >>> From: Pavel Butsykin <pbutsykin@virtuozzo.com> >>> >>> This log would be very welcome for long-term diagnostics of the system >>> in the production. This log is at least necessary to understand what >>> has been happened on the system and to identify issues at higher-level >>> subsystems (libvirt, etc). >>> >>> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com> >>> Signed-off-by: Denis V. Lunev <den@openvz.org> >>> CC: Markus Armbruster <armbru@redhat.com> >>> CC: Luiz Capitulino <lcapitulino@redhat.com> >>> CC: Eric Blake <eblake@redhat.com> >>> CC: Peter Maydell <peter.maydell@linaro.org> >>> --- >>> include/qemu/log.h | 1 + >>> monitor.c | 18 ++++++++++++++++++ >>> qemu-log.c | 1 + >>> 3 files changed, 20 insertions(+) >>> >>> diff --git a/include/qemu/log.h b/include/qemu/log.h >>> index 57b8c96..ba86606 100644 >>> --- a/include/qemu/log.h >>> +++ b/include/qemu/log.h >>> @@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void) >>> #define LOG_UNIMP (1 << 10) >>> #define LOG_GUEST_ERROR (1 << 11) >>> #define CPU_LOG_MMU (1 << 12) >>> +#define LOG_CMD (1 << 13) >>> >>> /* Returns true if a bit is set in the current loglevel mask >>> */ >>> diff --git a/monitor.c b/monitor.c >>> index 4f1ba2f..71810bb 100644 >>> --- a/monitor.c >>> +++ b/monitor.c >>> @@ -434,6 +434,12 @@ static void monitor_protocol_emitter(Monitor *mon, QObject *data, >>> } >>> >>> monitor_json_emitter(mon, QOBJECT(qmp)); >>> + if (qemu_loglevel_mask(LOG_CMD)) { >>> + QString *output_json = qobject_to_json_pretty(QOBJECT(qmp)); >>> + qemu_log("qmp response:\n%s\n", output_json->string); >>> + QDECREF(output_json); >>> + } >>> + >> >> Uh, _pretty makes this multiple lines. Is this appropriate for the log >> file? Finding log message boundaries goes from regular to >> context-free... >> > I think so much more convenient to read the log, for example: > qmp requested: > { > "execute": "query-block" > } > qmp responsed: > { > "return": [ > { > "io-status": "ok", > "device": "drive-virtio-disk0", > "locked": false, > "removable": false, > "inserted": { > "iops_rd": 0, > "detect_zeroes": "off", > "image": { > "snapshots": [ > { > "vm-clock-nsec": 235120743, > "name": "snap4", > "date-sec": 1444402741, > "date-nsec": 246561000, > "vm-clock-sec": 42, > "id": "2", > "vm-state-size": 247252333 > }, > ... > > if we just use qobject_to_json(), it will be a long string, which may > be difficult to understand. Log files should be utterly trivial to parse. That means a rigid format. The traditional format is one line per entry, prefixed with a timestamp. Extracting JSON from such lines and feeding it to a pretty-printer is bothersome, but it's at least feasible. Reliably finding log message boundaries in a log file without rules on message format generally isn't. I'm not sure how the log.h interface is currently used. Are there any rules, or is it a "barf whatever text you like to the log file" thing? If there are rules, we should stick to them. If there are none, I'd seriously consider giving up on this log file and create a new one that's actually parseable. See also "What's the intended use of log.h logging?" >> Nitpick: we generally spell it QMP. >> > Ok. > >>> QDECREF(qmp); >>> } >>> >>> @@ -474,6 +480,11 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *data, Error **errp) >>> evstate->last, >>> now); >>> >>> + if (qemu_loglevel_mask(LOG_CMD)) { >>> + QString *output_json = qobject_to_json_pretty(QOBJECT(data)); >>> + qemu_log("qmp event:\n%s\n", output_json->string); >>> + QDECREF(output_json); >>> + } >> >> This does *not* log QMP events (the ones put on the wire), it logs >> *QAPI* events, which may or may not put on the wire due to rate >> limiting. >> >> Are you sure we don't want to rate-limit log messages? >> >> If you really want to log QAPI events rather than QMP events, you get to >> explain why in the commit message, and you get to document the >> difference between the log and the actual wire. >> >> But I suspect you merely put this in the wrong place :) >> >>> /* Rate limit of 0 indicates no throttling */ >>> qemu_mutex_lock(&monitor_lock); >>> if (!evstate->rate) { >> >> Not logged: the QMP greeting. If you want to log QMP, log all of QMP. >> >> You can either put the logging of the transmit direction in >> monitor_json_emitter(), or you put it in each of its callers. >> > I agree, just missed this point. In this case, we are interested in > logging QAPI and QMP events. For QAPI events point logging can be put > in monitor_qapi_event_emit(), it should solve the problem with > rate-limit. For QMP(and monitor_json_emitter) more suitable put it in > each of its callers, because it is not known what is this call(it the > request, response or event). > >>> @@ -2848,6 +2859,8 @@ static void handle_hmp_command(Monitor *mon, const char *cmdline) >>> QDict *qdict; >>> const mon_cmd_t *cmd; >>> >>> + qemu_log_mask(LOG_CMD, "hmp request:\n%s\n", cmdline); >>> + >>> cmd = monitor_parse_command(mon, &cmdline, mon->cmd_table); >>> if (!cmd) { >>> return; >>> @@ -3823,6 +3836,11 @@ static void handle_qmp_command(JSONMessageParser *parser, QList *tokens) >>> goto err_out; >>> } >>> >>> + if (qemu_loglevel_mask(LOG_CMD)) { >>> + QString *output_json = output_json = qobject_to_json_pretty(obj); >>> + qemu_log("qmp request:\n%s\n", output_json->string); >>> + QDECREF(output_json); >>> + } >>> input = qmp_check_input_obj(obj, &local_err); >>> if (!input) { >>> qobject_decref(obj); >> >> Nitpick: QMP calls this a command, not a request. >> > It is understood that the request came to execute the command. It's just terminology, but I like *consistent* terminology. docs/qmp-spec.txt calls it a command, not a request. >>> diff --git a/qemu-log.c b/qemu-log.c >>> index e6d2b3f..ba7b34c 100644 >>> --- a/qemu-log.c >>> +++ b/qemu-log.c >>> @@ -98,6 +98,7 @@ const QEMULogItem qemu_log_items[] = { >>> { LOG_GUEST_ERROR, "guest_errors", >>> "log when the guest OS does something invalid (eg accessing a\n" >>> "non-existent register)" }, >>> + { LOG_CMD, "cmd", "log the hmp/qmp commands execution" }, >>> { 0, NULL, NULL }, >>> }; >> >> I can't quite see the use case for logging HMP, but since you wrote it, >> I assume you got one. >> >> Regardless, separate flags and patches for QMP and HMP, please. >> > Ok.
On 16/10/2015 14:33, Markus Armbruster wrote: > Looks like this is basically TCG with a couple of random LOG_UNIMP > and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose > QEMU log in its current state. I think these could become error_report. Some others (e.g. LOG_IOPORT) can be removed. LOG_MMU seems to be mostly a ppc thing, could also become a tracepoint. Likewise for LOG_PCALL and perhaps LOG_INT. However, we should at the same time add some functionality to enable tracepoints more easily, and perhaps enable the stderr tracing backend by default upstream (downstream can enable stderr+stap; or if they care a lot about performance disable stderr in favor of stap only). Paolo
On 16 October 2015 at 13:33, Markus Armbruster <armbru@redhat.com> wrote: > We have a couple of related features, and I'd like to get some clarity > on how exactly they should be used. > > 1. Tracing > > Documented in docs/tracing.txt. > > Each trace event can be individually controlled with -trace and with > monitor command trace-event. Wildcards work. Monitor command "info > trace-event" shows their status. > > Supports a wealth of tracing backends: nop (compiles out tracing > completely), stderr (prints trace to stderr), "simple", "ftrace", > "dtrace", ... Range from "trivially easy" to "complex power tool". The major problem with this is it is a compile time choice (especially the choice of backend), and our default backend is 'nop'. > 2. Ad hoc printing > > We have 108 files with some #define DPRINTF(), and probably some more > sailing under different flags. The ones that aren't useless should > probably be tracepoints. > > 3. "qemu/log.h" logging > > Sports a "mask", where each bit enables a certain set of log > messages. Control the mask with "-d item,..." Try "-d help" to see > acceptable items. > > Logs to stderr by default, can be redirected with "-D <logfile>". This, though it is a bit ad-hoc, always exists, always behaves the same way, and doesn't require anybody to rebuild QEMU to enable tracing. I think having a more consistent approach to logging would be great, though. thanks -- PMM
On 16 October 2015 at 13:48, Paolo Bonzini <pbonzini@redhat.com> wrote: > > > On 16/10/2015 14:33, Markus Armbruster wrote: >> Looks like this is basically TCG with a couple of random LOG_UNIMP >> and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose >> QEMU log in its current state. > > I think these could become error_report. No; it's important not to print these unless the user really asked for them (especially the GUEST_ERROR) kind. Otherwise it's (potentially quite a lot of) unnecessary noise. > Some others (e.g. LOG_IOPORT) can be removed. > > LOG_MMU seems to be mostly a ppc thing, could also become a tracepoint. > Likewise for LOG_PCALL and perhaps LOG_INT. It's also very useful to be able to enable whole *classes* of tracing (like "tell me whenever my guest OS does something dumb"); does the tracepoint code have any support for this? thanks -- PMM
On 16/10/2015 14:54, Peter Maydell wrote: > On 16 October 2015 at 13:48, Paolo Bonzini <pbonzini@redhat.com> wrote: >> >> >> On 16/10/2015 14:33, Markus Armbruster wrote: >>> Looks like this is basically TCG with a couple of random LOG_UNIMP >>> and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose >>> QEMU log in its current state. >> >> I think these could become error_report. > > No; it's important not to print these unless the user really > asked for them (especially the GUEST_ERROR) kind. Otherwise it's > (potentially quite a lot of) unnecessary noise. I guess it depends then. If the unimplemented feature is in all likelihood a showstopper (e.g. setend) it should be unconditionally enabled, I think. >> Some others (e.g. LOG_IOPORT) can be removed. >> >> LOG_MMU seems to be mostly a ppc thing, could also become a tracepoint. >> Likewise for LOG_PCALL and perhaps LOG_INT. > > It's also very useful to be able to enable whole *classes* of > tracing (like "tell me whenever my guest OS does something dumb"); > does the tracepoint code have any support for this? That's part of what I mentioned in my message ("add some functionality to enable tracepoints more easily"). It would be great to have something like "-d trace:scsi_*" on the command line, integrated with qemu-log. So perhaps the place of qemu-log is as a replacement for the stderr tracing backend? Paolo
On Fri, Oct 16, 2015 at 01:54:40PM +0100, Peter Maydell wrote: > On 16 October 2015 at 13:48, Paolo Bonzini <pbonzini@redhat.com> wrote: > > > > > > On 16/10/2015 14:33, Markus Armbruster wrote: > >> Looks like this is basically TCG with a couple of random LOG_UNIMP > >> and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose > >> QEMU log in its current state. > > > > I think these could become error_report. > > No; it's important not to print these unless the user really > asked for them (especially the GUEST_ERROR) kind. Otherwise it's > (potentially quite a lot of) unnecessary noise. > > > Some others (e.g. LOG_IOPORT) can be removed. > > > > LOG_MMU seems to be mostly a ppc thing, could also become a tracepoint. > > Likewise for LOG_PCALL and perhaps LOG_INT. > > It's also very useful to be able to enable whole *classes* of > tracing (like "tell me whenever my guest OS does something dumb"); > does the tracepoint code have any support for this? You can use globs in tracepoint names you want to enable. So if the tracepoints have structured / well considerd name prefixes, you can enable classses at once. Regards, Daniel
On 16 October 2015 at 14:26, Daniel P. Berrange <berrange@redhat.com> wrote: >> It's also very useful to be able to enable whole *classes* of >> tracing (like "tell me whenever my guest OS does something dumb"); >> does the tracepoint code have any support for this? > > You can use globs in tracepoint names you want to enable. So if the > tracepoints have structured / well considerd name prefixes, you can > enable classses at once. ...but in practice we don't really do that very much, and we don't have documented lists of classes which we support either. Guessing what might be a plausible glob of tracepoint names to enable seems like a definite step back from the formalized logging categories we have currently. -- PMM
On 16/10/2015 15:29, Peter Maydell wrote: >> > You can use globs in tracepoint names you want to enable. So if the >> > tracepoints have structured / well considerd name prefixes, you can >> > enable classses at once. > ...but in practice we don't really do that very much, and we > don't have documented lists of classes which we support either. > Guessing what might be a plausible glob of tracepoint names to > enable seems like a definite step back from the formalized > logging categories we have currently. We could add "-d trace:help" to list valid tracepoint names. Paolo
On 16 October 2015 at 14:30, Paolo Bonzini <pbonzini@redhat.com> wrote: > > > On 16/10/2015 15:29, Peter Maydell wrote: >>> > You can use globs in tracepoint names you want to enable. So if the >>> > tracepoints have structured / well considerd name prefixes, you can >>> > enable classses at once. >> ...but in practice we don't really do that very much, and we >> don't have documented lists of classes which we support either. >> Guessing what might be a plausible glob of tracepoint names to >> enable seems like a definite step back from the formalized >> logging categories we have currently. > > We could add "-d trace:help" to list valid tracepoint names. I think people are mostly going to care about categories (eg "enable tracing for device X", or "enable tracing for all unimplemented features", or "enable a medium level of debug tracing for device Y". If we want to get there we need to define standards for naming tracepoints and enforce them somehow (subsystem_category_level_tracepointname ?) thanks -- PMM
On 10/16/2015 04:00 PM, Paolo Bonzini wrote: > > On 16/10/2015 14:54, Peter Maydell wrote: >> On 16 October 2015 at 13:48, Paolo Bonzini <pbonzini@redhat.com> wrote: >>> >>> On 16/10/2015 14:33, Markus Armbruster wrote: >>>> Looks like this is basically TCG with a couple of random LOG_UNIMP >>>> and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose >>>> QEMU log in its current state. >>> I think these could become error_report. >> No; it's important not to print these unless the user really >> asked for them (especially the GUEST_ERROR) kind. Otherwise it's >> (potentially quite a lot of) unnecessary noise. > I guess it depends then. If the unimplemented feature is in all > likelihood a showstopper (e.g. setend) it should be unconditionally > enabled, I think. > >>> Some others (e.g. LOG_IOPORT) can be removed. >>> >>> LOG_MMU seems to be mostly a ppc thing, could also become a tracepoint. >>> Likewise for LOG_PCALL and perhaps LOG_INT. >> It's also very useful to be able to enable whole *classes* of >> tracing (like "tell me whenever my guest OS does something dumb"); >> does the tracepoint code have any support for this? > That's part of what I mentioned in my message ("add some functionality > to enable tracepoints more easily"). It would be great to have > something like "-d trace:scsi_*" on the command line, integrated with > qemu-log. > > So perhaps the place of qemu-log is as a replacement for the stderr > tracing backend? > > Paolo The motivation for the patchset is simple. These messages will be quite useful to understand how things are going. We do not think about automatic parsing of that logs at all. They should be human-readable for us to understand the problem. In general, we should have "normal" locking engine. Here I mean a non-blocking facility, which should allow to place arbitrary log message preserving order of that message in which events have been happen. In general, this is possible through lockless ring-buffer and a flush thread. Yes, messages could be lost if this is noted to the log. I am a bit old-fashioned with this approach, sorry. This information is at least necessary to guess what operations were performed with a VM from outside using conventional interfaces. Thus my approach to the topic is simple. I'd like to put minimal information at the moment using current API for 2.5 and expect this merged if possible, For 2.6 we will replace the engine with one I have described above. From my point of view this work should not start new sub-system but slowly evolve current one. All bits here a a bit technical without much usage details, but this is something which should be discussed. With an effective backend (which does not contradict to the current implementation) more could be allowed to be logged. Tracepoints, in general, are designated for the debugging. Logging is something for post-ship analysis from the customer report and speaking about [commercially] supported product running in _production_ you will not be able either to access customer's host or even customer's network. Den
On 16/10/2015 15:36, Peter Maydell wrote: >> We could add "-d trace:help" to list valid tracepoint names. > > I think people are mostly going to care about categories > (eg "enable tracing for device X", or "enable tracing for > all unimplemented features", or "enable a medium level of > debug tracing for device Y". I hadn't thought about levels, but honestly when I use KVM tracing (which is often measured in tens of megabytes per run) I don't care. I just get a .xz file from the person who reported the bug, with all the events, and use less/grep on it. > If we want to get there we need to define standards for > naming tracepoints and enforce them somehow > (subsystem_category_level_tracepointname ?) We're surprisingly good at naming tracepoints. Only 13 files do not have a common prefix for their tracepoints, conflicts are rare, and only a few files could arguably improve their naming: hw/usb/desc.c usb_desc, usb_set, usb_clear !! hw/vfio/platform.c vfio_platform (except for one) !! migration/savevm.c savevm, qemu_loadvm, vmstate, qemu_announce_self_iter !! target-s390x/mmu_helper.c (none) !! target-sparc/int32_helper.c int_helper !! target-sparc/int64_helper.c int_helper !! target-sparc/mmu_helper.c mmu_helper !! target-sparc/win_helper.c win_helper !! xen-hvm.c handle_ioreq, cpu_ioreq, xen !! Globbing should suffice, IMHO. Full list of prefixes under my signature. Paolo audio/alsaaudio.c alsa audio/ossaudio.c oss balloon.c virtio_balloon block/backup.c backup block.c bdrv block/commit.c commit blockdev.c qmp_block block/mirror.c mirror block/qcow2-cache.c qcow2 block/qcow2-cluster.c qcow2 block/qcow2.c qcow2 block/qed.c qed block/qed-l2-cache.c qed block/qed-table.c qed block/raw-posix.c paio_submit block/stream.c stream cpu-exec.c exec_tb crypto/tlscredsanon.c qcrypto_tls_creds_anon crypto/tlscreds.c qcrypto_tls_creds crypto/tlscredsx509.c qcrypto_tls_creds_x509 crypto/tlssession.c qcrypto_tls_session dma-helpers.c dma hw/9pfs/virtio-9p.c v9fs hw/acpi/memory_hotplug.c mhp_acpi hw/arm/virt-acpi-build.c virt_acpi hw/audio/cs4231.c cs4231 hw/audio/milkymist-ac97.c milkymist_ac97 hw/block/dataplane/virtio-blk.c virtio_blk_data_plane hw/block/hd-geometry.c hd_geometry hw/block/virtio-blk.c virtio_blk hw/char/escc.c escc hw/char/grlib_apbuart.c grlib_apbuart hw/char/lm32_juart.c lm32_juart hw/char/lm32_uart.c lm32_uart hw/char/milkymist-uart.c milkymist_uart hw/char/virtio-console.c virtio_console hw/char/virtio-serial-bus.c virtio_serial hw/display/g364fb.c g364fb hw/display/jazz_led.c jazz_led hw/display/milkymist-tmu2.c milkymist_tmu2 hw/display/milkymist-vgafb.c milkymist_vgafb_memory hw/display/qxl.c qxl hw/display/qxl-render.c qxl_render hw/display/virtio-gpu.c virtio_gpu hw/display/vmware_vga.c vmware hw/display/xenfb.c xenfb hw/dma/i8257.c i8257 hw/dma/rc4030.c rc4030, jazzio hw/dma/sparc32_dma.c sparc32, espdma, ledma hw/dma/sun4m_iommu.c sun4m_iommu hw/i386/pc.c mhp_pc_dimm hw/i386/xen/xen_platform.c xen hw/i386/xen/xen_pvdevice.c xen_pv_mmio hw/input/milkymist-softusb.c milkymist_softusb hw/input/ps2.c ps2 hw/intc/apic.c apic hw/intc/apic_common.c cpu, apic hw/intc/grlib_irqmp.c grlib_irqmp hw/intc/lm32_pic.c lm32_pic hw/intc/s390_flic_kvm.c flic hw/intc/slavio_intctl.c slavio hw/intc/xics.c xics hw/isa/pc87312.c pc87312 hw/misc/eccmemctl.c ecc hw/misc/milkymist-hpdmc.c milkymist_hpdmc_memory hw/misc/milkymist-pfpu.c milkymist_pfpu hw/misc/slavio_misc.c slavio hw/net/lance.c lance hw/net/milkymist-minimac2.c milkymist_minimac2 hw/net/mipsnet.c mipsnet hw/net/opencores_eth.c open_eth hw/net/pcnet.c pcnet hw/net/pcnet-pci.c pcnet hw/nvram/ds1225y.c nvram hw/nvram/fw_cfg.c fw_cfg hw/pci/pci.c pci_update_mappings hw/pci/pci_host.c pci_cfg hw/ppc/ppc.c ppc hw/ppc/spapr.c spapr_cas hw/ppc/spapr_hcall.c spapr_cas hw/ppc/spapr_iommu.c spapr_iommu hw/ppc/spapr_pci.c spapr_pci hw/s390x/css.c css hw/s390x/virtio-ccw.c virtio_ccw hw/scsi/esp.c esp hw/scsi/esp-pci.c esp_pci hw/scsi/megasas.c megasas hw/scsi/scsi-bus.c scsi_req, scsi hw/scsi/vmw_pvscsi.c pvscsi hw/sd/milkymist-memcard.c milkymist_memcard_memory hw/sparc/leon3.c leon3 hw/sparc/sun4m.c sun4m_cpu hw/timer/grlib_gptimer.c grlib_gptimer hw/timer/lm32_timer.c lm32_timer hw/timer/milkymist-sysctl.c milkymist_sysctl hw/timer/slavio_timer.c slavio hw/usb/bus.c usb_port hw/usb/core.c usb_packet_state hw/usb/dev-hub.c usb_hub hw/usb/dev-mtp.c usb_mtp hw/usb/dev-uas.c usb_uas hw/usb/hcd-ehci.c usb_ehci hw/usb/hcd-ohci.c usb_ohci hw/usb/hcd-uhci.c usb_uhci hw/usb/hcd-xhci.c usb_xhci hw/usb/host-libusb.c usb_host hw/vfio/pci.c vfio hw/vfio/pci-quirks. vfio_quirk hw/vfio/vfio-common.c vfio hw/virtio/dataplane/vring.c vring hw/virtio/virtio.c virtio, virtqueue hw/virtio/virtio-rng.c virtio_rng ioport.c cpu kvm-all.c kvm memory.c memory_region_ops migration.c migration, migrate migration/ram.c migration migration/rdma.c qemu_rdma, rdma monitor.c monitor_protocol qemu-coroutine.c qemu_coroutine qemu-coroutine-lock.c qemu_co qemu-file.c qemu_file qom/object.c object spice-qemu-char.c spice_vmc target-ppc/kvm.c kvm target-s390x/cpu.c cpu target-s390x/ioinst.c ioinst target-s390x/kvm.c kvm thread-pool.c thread_pool translate-all.c translate ui/console.c displaychangelistener, displaysurface, console ui/gtk.c gd ui/input.c input ui/spice-display.c qemu_spice ui/vnc.c vnc_key util/hbitmap.c hbitmap vl.c qemu_system vmstate.c vmstate xen-mapcache.c xen
On 16 October 2015 at 15:17, Paolo Bonzini <pbonzini@redhat.com> wrote: > On 16/10/2015 15:36, Peter Maydell wrote: >>> We could add "-d trace:help" to list valid tracepoint names. >> >> I think people are mostly going to care about categories >> (eg "enable tracing for device X", or "enable tracing for >> all unimplemented features", or "enable a medium level of >> debug tracing for device Y". > > I hadn't thought about levels, but honestly when I use KVM tracing (which > is often measured in tens of megabytes per run) I don't care. In a lot of cases, especially with the TCG logging, not enabling voluminous tracing is really important because if you turn it all on then the system is way too slow (and can behave differently as a result), and generates gigabytes of trace output. (-d exec and -d cpu will do this, for instance.) thanks -- PMM
Markus Armbruster <armbru@redhat.com> writes: > We have a couple of related features, and I'd like to get some clarity > on how exactly they should be used. FWIW this is how I view them. > > 1. Tracing > > Documented in docs/tracing.txt. > > Each trace event can be individually controlled with -trace and with > monitor command trace-event. Wildcards work. Monitor command "info > trace-event" shows their status. > > Supports a wealth of tracing backends: nop (compiles out tracing > completely), stderr (prints trace to stderr), "simple", "ftrace", > "dtrace", ... Range from "trivially easy" to "complex power tool". While not enabled by default this is the sort of thing that can be enabled by distros to enable non-invasive tracing of complex behaviour and performance bottle-necks. Ignoring nop/stderr backends the others can be used when your looking as system-wide behaviour and interactions between QEMU, KVM and the kernel. > > 2. Ad hoc printing > > We have 108 files with some #define DPRINTF(), and probably some more > sailing under different flags. The ones that aren't useless should > probably be tracepoints. These are basically debug crutches for individual files that are currently in development or particularly tricky bits of code that the relevant developer would enable when debugging. > > 3. "qemu/log.h" logging > > Sports a "mask", where each bit enables a certain set of log > messages. Control the mask with "-d item,..." Try "-d help" to see > acceptable items. > > Logs to stderr by default, can be redirected with "-D <logfile>". > > Odd for a log file: no timestamp, but proposed "[PATCH 3/3] log: adds > a timestamp to each log entry" adds one. > > Log message format is unclear. > > There are no locks beyond the one provided by stdio, and log entries > appear to be build with multiple calls in places. I suspect logging > from more than one thread can mess up the log. As you note bellow most of these are TCG related and currently unlikely to race with each other due to TCG being single threaded (for now ;-) > Currently defined items are: > > const QEMULogItem qemu_log_items[] = { > { CPU_LOG_TB_OUT_ASM, "out_asm", > "show generated host assembly code for each compiled TB" }, > { CPU_LOG_TB_IN_ASM, "in_asm", > "show target assembly code for each compiled TB" }, > { CPU_LOG_TB_OP, "op", > "show micro ops for each compiled TB" }, > { CPU_LOG_TB_OP_OPT, "op_opt", > "show micro ops (x86 only: before eflags optimization) and\n" > "after liveness analysis" }, > { CPU_LOG_INT, "int", > "show interrupts/exceptions in short format" }, > { CPU_LOG_EXEC, "exec", > "show trace before each executed TB (lots of logs)" }, > { CPU_LOG_TB_CPU, "cpu", > "show CPU state before block translation" }, > { CPU_LOG_MMU, "mmu", > "log MMU-related activities" }, > { CPU_LOG_PCALL, "pcall", > "x86 only: show protected mode far calls/returns/exceptions" }, > { CPU_LOG_RESET, "cpu_reset", > "show CPU state before CPU resets" }, > { CPU_LOG_IOPORT, "ioport", > "show all i/o ports accesses" }, > { LOG_UNIMP, "unimp", > "log unimplemented functionality" }, > { LOG_GUEST_ERROR, "guest_errors", > "log when the guest OS does something invalid (eg accessing a\n" > "non-existent register)" }, > { 0, NULL, NULL }, > }; > > Looks like this is basically TCG with a couple of random LOG_UNIMP > and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose > QEMU log in its current state. However it is useful when users find broken TCG stuff and you want a log of what happened. Having said that I'm not sure how often that happens because any non-trivial stuff generates huge logs. It is the logging I use the most though. > Should some of these items be tracepoints instead? IMHO no because stitching together tracepoints with the log file would be a pain. > Do we want a general purpose QEMU log? It depends. If you are talking about a standardised log style thing then maybe but all the current TCG reporting is very specialised and often over multiple lines. To my mind a general purpose log should be fairly quiet and just log extraordinary events (maybe via syslog). The sort of stuff that would be useful for a user to report when tailing a log before something went wrong. > > If yes, should this TCG-ish log become the general purpose QEMU log? > > I'm asking because "[PATCH 2/3] log: report hmp/qmp command and qmp > event" proposes to log QMP traffic here. I understand the need for > logging QMP traffic, but I'm not sure it fits here. Yeah I'm unsure about this as well. To my mind as the QMP is a whole separate API I'd be tempted to log it separately. > 4. Error messages with timestamps > > "-msg timestamp" adds timestamps to the error messages that use the > proper error reporting interfaces. > > I suspect this is basically a poor work-around for not having a log > file. Aren't these meant to be user visible errors? "I died because of X"?
On 16/10/2015 16:31, Peter Maydell wrote: > > I hadn't thought about levels, but honestly when I use KVM tracing (which > > is often measured in tens of megabytes per run) I don't care. > > In a lot of cases, especially with the TCG logging, not enabling > voluminous tracing is really important because if you turn it all > on then the system is way too slow (and can behave differently > as a result), and generates gigabytes of trace output. (-d exec > and -d cpu will do this, for instance.) Yes, but device models are going to generate orders of magnitude smaller traces than the CPU. Since I'm not proposing to drop -d together, but rather to integrate tracing with it, we can cross that river when we reach it. Paolo
Peter Maydell <peter.maydell@linaro.org> writes: > On 16 October 2015 at 15:17, Paolo Bonzini <pbonzini@redhat.com> wrote: >> On 16/10/2015 15:36, Peter Maydell wrote: >>>> We could add "-d trace:help" to list valid tracepoint names. >>> >>> I think people are mostly going to care about categories >>> (eg "enable tracing for device X", or "enable tracing for >>> all unimplemented features", or "enable a medium level of >>> debug tracing for device Y". >> >> I hadn't thought about levels, but honestly when I use KVM tracing (which >> is often measured in tens of megabytes per run) I don't care. > > In a lot of cases, especially with the TCG logging, not enabling > voluminous tracing is really important because if you turn it all > on then the system is way too slow (and can behave differently > as a result), and generates gigabytes of trace output. (-d exec > and -d cpu will do this, for instance.) This is at least as much an argument for use of tracing as against it. Tracing is a lot more flexible than log.h, and with the right backend, it's much more efficient, too. If the appropriate trace patterns turn out to be too hard to remember, we can provide canned trace patterns with names that are easy to remember. -d could become sugar for a suitable trace patterns.
On 19/10/2015 15:17, Markus Armbruster wrote: > This is at least as much an argument for use of tracing as against it. > Tracing is a lot more flexible than log.h, and with the right backend, > it's much more efficient, too. > > If the appropriate trace patterns turn out to be too hard to remember, > we can provide canned trace patterns with names that are easy to > remember. > > -d could become sugar for a suitable trace patterns. I think that's putting the cart a bit before the horse, since "-d" is currently not related to tracing at all. :) Paolo
On 19 October 2015 at 14:17, Markus Armbruster <armbru@redhat.com> wrote: > Peter Maydell <peter.maydell@linaro.org> writes: >> In a lot of cases, especially with the TCG logging, not enabling >> voluminous tracing is really important because if you turn it all >> on then the system is way too slow (and can behave differently >> as a result), and generates gigabytes of trace output. (-d exec >> and -d cpu will do this, for instance.) > > This is at least as much an argument for use of tracing as against it. > Tracing is a lot more flexible than log.h, and with the right backend, > it's much more efficient, too. > > If the appropriate trace patterns turn out to be too hard to remember, > we can provide canned trace patterns with names that are easy to > remember. > > -d could become sugar for a suitable trace patterns. I don't object to the use of tracing under the hood, as long as the user-facing experience remains as good as what we have for -d at the moment (in terms of it being always present, working the same for everybody, easily discoverable and simple to use). thanks -- PMM
Peter Maydell <peter.maydell@linaro.org> writes: > On 16 October 2015 at 13:33, Markus Armbruster <armbru@redhat.com> wrote: >> We have a couple of related features, and I'd like to get some clarity >> on how exactly they should be used. >> >> 1. Tracing >> >> Documented in docs/tracing.txt. >> >> Each trace event can be individually controlled with -trace and with >> monitor command trace-event. Wildcards work. Monitor command "info >> trace-event" shows their status. >> >> Supports a wealth of tracing backends: nop (compiles out tracing >> completely), stderr (prints trace to stderr), "simple", "ftrace", >> "dtrace", ... Range from "trivially easy" to "complex power tool". > > The major problem with this is it is a compile time choice > (especially the choice of backend), and our default backend > is 'nop'. I think the default is / has become wrong. Easy enough to fix. Let's compare log.h and tracing. Both let you control stuff on the command line and in the monitor. log.h's unit of control is a mask bit, which controls a canned group of related messages. Tracing's unit of control is the individual message. To control a group of messages, use globbing. As long as we use sane names, this is strictly more powerful than canned group. When you don't need the power, globbing can be harder to use than canned group with sensible names. log.h can log to stderr, log to a file, or not log at all (default). Tracing's capabilities depend on a compile time choice: * You can pick multiple backends. They're all simultaneously active. If we want to support enabling configured backends selectively at runtime, that shouldn't be hard. * If you compile out tracing (configure only backend 'nop'), you can't trace. That's a feature. * If you pick 'stderr', you can trace to stderr. Turning it into a backend that could alternatively trace to a file (-trace file=FNAME) would be easy. Picking just 'stderr' would be feature-equivalent to log.h then. >> 2. Ad hoc printing >> >> We have 108 files with some #define DPRINTF(), and probably some more >> sailing under different flags. The ones that aren't useless should >> probably be tracepoints. >> >> 3. "qemu/log.h" logging >> >> Sports a "mask", where each bit enables a certain set of log >> messages. Control the mask with "-d item,..." Try "-d help" to see >> acceptable items. >> >> Logs to stderr by default, can be redirected with "-D <logfile>". > > This, though it is a bit ad-hoc, always exists, always behaves the > same way, and doesn't require anybody to rebuild QEMU to enable > tracing. "Always exists" and "doesn't require rebuild" are the same. For me, "exists unless you compiled it out" would be a (minor) improvement over "always exists". That's how tracing will be once we fix the default backend. "Always behaves the same way" I interpret as complaint about tracing user interface complexity. Flexibility breeds complexity, and when you don't need the former, you don't want the latter. While that's a valid argument for a simpler user interface, it's hardly one for having two separate subsystems! Pretty sure we could provide the existing log.h user interface as sugar for tracing if we wanted. > I think having a more consistent approach to logging would be great, > though. Points I'd like to make: 1. Logging is not tracing. Logging logs events interesting for the user. Tracing logs code execution. It's a debugging aid. The two overlap to a degree, but they're not the same. 2. The current use of log.h seems closer to tracing than to logging. 3. I figure our tracing needs could be served by the tracing subsystem with a few improvements. The few things log.h can do that tracing can't yet do should be easy enough to add. Why have two separate subsystems then? 4. Logging would be useful, but I feel it shouldn't be shoehorned into log.h.
On 19 October 2015 at 15:29, Markus Armbruster <armbru@redhat.com> wrote: > Points I'd like to make: > > 1. Logging is not tracing. Logging logs events interesting for the > user. Tracing logs code execution. It's a debugging aid. The two > overlap to a degree, but they're not the same. In the case of QEMU, the event the user is often interested in is (guest) code execution... > 2. The current use of log.h seems closer to tracing than to logging. Maybe. It gets a fair amount of use for "figure out what my guest is doing" questions. > 3. I figure our tracing needs could be served by the tracing subsystem > with a few improvements. The few things log.h can do that tracing can't > yet do should be easy enough to add. Why have two separate subsystems > then? I just feel that the logging subsystem is very simple and easy to use. The tracing subsystem is far from easy to comprehend. I'm apprehensive that in the name of simplification we'll end up deleting the easy to use logging without making the tracing as easy for end users to use. > 4. Logging would be useful, but I feel it shouldn't be shoehorned into > log.h. The log.h code is definitely intended for our end users to run. It's true that at the moment we log the things that are easy to log rather than having the flexibility to log the things end users would ideally like; but the solution to that is to improve the logging... In particular, ideally the logging should allow you to debug or instrument your guest program without having to care about QEMU's internals particularly. LOG_UNIMP and LOG_GUEST_ERROR are moving in that direction. thanks -- PMM
Alex Bennée <alex.bennee@linaro.org> writes: > Markus Armbruster <armbru@redhat.com> writes: > >> We have a couple of related features, and I'd like to get some clarity >> on how exactly they should be used. > > FWIW this is how I view them. > >> >> 1. Tracing >> >> Documented in docs/tracing.txt. >> >> Each trace event can be individually controlled with -trace and with >> monitor command trace-event. Wildcards work. Monitor command "info >> trace-event" shows their status. >> >> Supports a wealth of tracing backends: nop (compiles out tracing >> completely), stderr (prints trace to stderr), "simple", "ftrace", >> "dtrace", ... Range from "trivially easy" to "complex power tool". > > While not enabled by default this is the sort of thing that can be > enabled by distros to enable non-invasive tracing of complex behaviour > and performance bottle-necks. Ignoring nop/stderr backends the others > can be used when your looking as system-wide behaviour and interactions > between QEMU, KVM and the kernel. > >> >> 2. Ad hoc printing >> >> We have 108 files with some #define DPRINTF(), and probably some more >> sailing under different flags. The ones that aren't useless should >> probably be tracepoints. > > These are basically debug crutches for individual files that are > currently in development or particularly tricky bits of code that > the relevant developer would enable when debugging. Modern code should probably use tracing for this purpose except where its (small!) overhead can't be tolerated. >> 3. "qemu/log.h" logging >> >> Sports a "mask", where each bit enables a certain set of log >> messages. Control the mask with "-d item,..." Try "-d help" to see >> acceptable items. >> >> Logs to stderr by default, can be redirected with "-D <logfile>". >> >> Odd for a log file: no timestamp, but proposed "[PATCH 3/3] log: adds >> a timestamp to each log entry" adds one. >> >> Log message format is unclear. >> >> There are no locks beyond the one provided by stdio, and log entries >> appear to be build with multiple calls in places. I suspect logging >> from more than one thread can mess up the log. > > As you note bellow most of these are TCG related and currently unlikely > to race with each other due to TCG being single threaded (for now ;-) Argument against proliferation to subsystems that can run in other threads. >> Currently defined items are: >> >> const QEMULogItem qemu_log_items[] = { >> { CPU_LOG_TB_OUT_ASM, "out_asm", >> "show generated host assembly code for each compiled TB" }, >> { CPU_LOG_TB_IN_ASM, "in_asm", >> "show target assembly code for each compiled TB" }, >> { CPU_LOG_TB_OP, "op", >> "show micro ops for each compiled TB" }, >> { CPU_LOG_TB_OP_OPT, "op_opt", >> "show micro ops (x86 only: before eflags optimization) and\n" >> "after liveness analysis" }, >> { CPU_LOG_INT, "int", >> "show interrupts/exceptions in short format" }, >> { CPU_LOG_EXEC, "exec", >> "show trace before each executed TB (lots of logs)" }, >> { CPU_LOG_TB_CPU, "cpu", >> "show CPU state before block translation" }, >> { CPU_LOG_MMU, "mmu", >> "log MMU-related activities" }, >> { CPU_LOG_PCALL, "pcall", >> "x86 only: show protected mode far calls/returns/exceptions" }, >> { CPU_LOG_RESET, "cpu_reset", >> "show CPU state before CPU resets" }, >> { CPU_LOG_IOPORT, "ioport", >> "show all i/o ports accesses" }, >> { LOG_UNIMP, "unimp", >> "log unimplemented functionality" }, >> { LOG_GUEST_ERROR, "guest_errors", >> "log when the guest OS does something invalid (eg accessing a\n" >> "non-existent register)" }, >> { 0, NULL, NULL }, >> }; >> >> Looks like this is basically TCG with a couple of random LOG_UNIMP >> and LOG_GUEST_ERROR thrown in. It's definitely not a general purpose >> QEMU log in its current state. > > However it is useful when users find broken TCG stuff and you want a log > of what happened. Having said that I'm not sure how often that happens > because any non-trivial stuff generates huge logs. > > It is the logging I use the most though. > >> Should some of these items be tracepoints instead? > > IMHO no because stitching together tracepoints with the log file would > be a pain. Should *all* of them use tracepoints? If not, sufficiently fine-grained timestamps could reduce the pain. Alternatively, we could have a "trace to text file" backend that uses the same file as log.h. >> Do we want a general purpose QEMU log? > > It depends. If you are talking about a standardised log style thing then > maybe but all the current TCG reporting is very specialised and often > over multiple lines. My current understanding of the TCG reporting is that it's a debugging aid, not a log for users. > To my mind a general purpose log should be fairly quiet and just log > extraordinary events (maybe via syslog). The sort of stuff that would be > useful for a user to report when tailing a log before something went > wrong. Yes. >> >> If yes, should this TCG-ish log become the general purpose QEMU log? >> >> I'm asking because "[PATCH 2/3] log: report hmp/qmp command and qmp >> event" proposes to log QMP traffic here. I understand the need for >> logging QMP traffic, but I'm not sure it fits here. > > Yeah I'm unsure about this as well. To my mind as the QMP is a whole > separate API I'd be tempted to log it separately. > >> 4. Error messages with timestamps >> >> "-msg timestamp" adds timestamps to the error messages that use the >> proper error reporting interfaces. >> >> I suspect this is basically a poor work-around for not having a log >> file. > > Aren't these meant to be user visible errors? "I died because of X"? If we had a general purpose log, then errors would also go there, and get timestamped there like everything else. No real need to optionally timestamp them on stderr then.
On 19 October 2015 at 15:52, Markus Armbruster <armbru@redhat.com> wrote: > My current understanding of the TCG reporting is that it's a debugging > aid, not a log for users. It is in my view an aid for users to debug their guest code. (Sometimes it turns out that the problem is "a QEMU bug resulted in our doing the wrong thing for a valid guest", but very often the problem is "the guest code was buggy and QEMU correctly handled it as it crashed or went into an infinite loop". Being able to look at logs of what got executed is very handy in demonstrating this.) -- PMM
* Peter Maydell (peter.maydell@linaro.org) wrote: > On 19 October 2015 at 15:29, Markus Armbruster <armbru@redhat.com> wrote: > > Points I'd like to make: > > > > 1. Logging is not tracing. Logging logs events interesting for the > > user. Tracing logs code execution. It's a debugging aid. The two > > overlap to a degree, but they're not the same. > > In the case of QEMU, the event the user is often interested in > is (guest) code execution... > > > 2. The current use of log.h seems closer to tracing than to logging. > > Maybe. It gets a fair amount of use for "figure out what my > guest is doing" questions. > > > 3. I figure our tracing needs could be served by the tracing subsystem > > with a few improvements. The few things log.h can do that tracing can't > > yet do should be easy enough to add. Why have two separate subsystems > > then? > > I just feel that the logging subsystem is very simple and easy > to use. The tracing subsystem is far from easy to comprehend. > I'm apprehensive that in the name of simplification we'll end up > deleting the easy to use logging without making the tracing > as easy for end users to use. When I first looked at it, I thought the same way, but then I found the stderr mode, and it's really very easy. Dave > > 4. Logging would be useful, but I feel it shouldn't be shoehorned into > > log.h. > > The log.h code is definitely intended for our end users to run. > It's true that at the moment we log the things that are easy > to log rather than having the flexibility to log the things > end users would ideally like; but the solution to that is to > improve the logging... > > In particular, ideally the logging should allow you to debug or > instrument your guest program without having to care about QEMU's > internals particularly. LOG_UNIMP and LOG_GUEST_ERROR are moving > in that direction. > > thanks > -- PMM > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
* Markus Armbruster (armbru@redhat.com) wrote: > Peter Maydell <peter.maydell@linaro.org> writes: > > > On 16 October 2015 at 13:33, Markus Armbruster <armbru@redhat.com> wrote: > >> We have a couple of related features, and I'd like to get some clarity > >> on how exactly they should be used. > >> > >> 1. Tracing > >> > >> Documented in docs/tracing.txt. > >> > >> Each trace event can be individually controlled with -trace and with > >> monitor command trace-event. Wildcards work. Monitor command "info > >> trace-event" shows their status. > >> > >> Supports a wealth of tracing backends: nop (compiles out tracing > >> completely), stderr (prints trace to stderr), "simple", "ftrace", > >> "dtrace", ... Range from "trivially easy" to "complex power tool". > > > > The major problem with this is it is a compile time choice > > (especially the choice of backend), and our default backend > > is 'nop'. > > I think the default is / has become wrong. Easy enough to fix. > > Let's compare log.h and tracing. > > Both let you control stuff on the command line and in the monitor. > > log.h's unit of control is a mask bit, which controls a canned group of > related messages. > > Tracing's unit of control is the individual message. To control a group > of messages, use globbing. As long as we use sane names, this is > strictly more powerful than canned group. When you don't need the > power, globbing can be harder to use than canned group with sensible > names. Hmm; I don't thinking globbing actually works that well here; the naming scheme for tracing is a bit difficult since it's based on function name, and the things you might want to trace a particular behaviour are typically scattered a bit. Having said that, the 'canned group' can be solved by some premade cans of trace elements, although it would be nice to generate those automatically. > log.h can log to stderr, log to a file, or not log at all (default). > > Tracing's capabilities depend on a compile time choice: > > * You can pick multiple backends. They're all simultaneously active. > If we want to support enabling configured backends selectively at > runtime, that shouldn't be hard. > > * If you compile out tracing (configure only backend 'nop'), you can't > trace. That's a feature. > > * If you pick 'stderr', you can trace to stderr. Turning it into a > backend that could alternatively trace to a file (-trace file=FNAME) > would be easy. Picking just 'stderr' would be feature-equivalent to > log.h then. Yes, that would be useful. TBH the biggest problem I have with tracing is the flat name space and the way it's all in one trace-events file/one header. The names start getting long/non-obvious quickly, and if you use them in lots of your patches then bisecting gets really slow since every change causes recompilation of the entire code base due to the changed trace header. I started looking at trying to split trace-events but it's non-trivial since it's going to need to build multiple trace enums. Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Am 19.10.2015 um 16:29 hat Markus Armbruster geschrieben: > Points I'd like to make: > > 1. Logging is not tracing. Logging logs events interesting for the > user. Tracing logs code execution. It's a debugging aid. The two > overlap to a degree, but they're not the same. It's hard to draw a line here because qemu is a tool for debugging guests. So yes, we're logging code execution, which you call tracing. It's still output meant for the user. I've been using -d often enough, and you can easily check that I'm not a regular TCG developer. My goal was debugging the guest, not qemu. (By the way, it's a shame that -d int doesn't work with KVM.) > 2. The current use of log.h seems closer to tracing than to logging. > > 3. I figure our tracing needs could be served by the tracing subsystem > with a few improvements. The few things log.h can do that tracing can't > yet do should be easy enough to add. Why have two separate subsystems > then? I'm not objecting to this as long as the currently supported logging doesn't get worse. One important point for me is that our logging gives me nicely formatted messages, for example register dumps. I'm trying to imagine this being printed through our existing tracing functions - the horror. It would be like using QMP query-* commands instead of HMP info, simply not made for human readers. Kevin
Two useful aspects for observability: 1. Logging Error messages, rate-limited warnings, and important troubleshooting information for users. Audience: Users, system administrators, developers 2. Tracing Internal state changes and events for performance analysis. Audience: Developers DPRINTF() falls under both #1 and #2. Care needs to be taken to separate the logging from the tracing when converting DPRINTF(). Side note: we don't have effect "once only" or rate-limiting built into error_report(). Guest-triggerable errors or warnings should be limited to prevent log flooding. It would be nice to unify TCG's "qemu log" and error_report() into a single logging mechanism that is thread-safe and has rate-limiting. Stefan
On 10/21/2015 01:41 PM, Stefan Hajnoczi wrote: > Two useful aspects for observability: > > 1. Logging > > Error messages, rate-limited warnings, and important troubleshooting > information for users. > > Audience: Users, system administrators, developers > > 2. Tracing > > Internal state changes and events for performance analysis. > > Audience: Developers > > > DPRINTF() falls under both #1 and #2. Care needs to be taken to > separate the logging from the tracing when converting DPRINTF(). > > Side note: we don't have effect "once only" or rate-limiting built into > error_report(). Guest-triggerable errors or warnings should be limited > to prevent log flooding. > > It would be nice to unify TCG's "qemu log" and error_report() into a > single logging mechanism that is thread-safe and has rate-limiting. > > Stefan Sounds very good to me. Short and simple. Den
On 21 October 2015 at 11:41, Stefan Hajnoczi <stefanha@redhat.com> wrote: > DPRINTF() falls under both #1 and #2. Care needs to be taken to > separate the logging from the tracing when converting DPRINTF(). > > Side note: we don't have effect "once only" or rate-limiting built into > error_report(). Guest-triggerable errors or warnings should be limited > to prevent log flooding. > > It would be nice to unify TCG's "qemu log" and error_report() into a > single logging mechanism that is thread-safe and has rate-limiting. As long as you can disable the rate-limiting: for almost all of the use cases for the qemu-log code you really don't want to have it drop logging messages. thanks -- PMM
On Wed, Oct 21, 2015 at 01:22:22PM +0100, Peter Maydell wrote: > On 21 October 2015 at 11:41, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > DPRINTF() falls under both #1 and #2. Care needs to be taken to > > separate the logging from the tracing when converting DPRINTF(). > > > > Side note: we don't have effect "once only" or rate-limiting built into > > error_report(). Guest-triggerable errors or warnings should be limited > > to prevent log flooding. > > > > It would be nice to unify TCG's "qemu log" and error_report() into a > > single logging mechanism that is thread-safe and has rate-limiting. > > As long as you can disable the rate-limiting: for almost all > of the use cases for the qemu-log code you really don't want > to have it drop logging messages. In the Linux kernel there are a few primitives: printk() - plain old printf printk_once() - uses a static bool for once-only semantics, modulo races between CPUs printk_ratelimited() - time-based rate-limiting (And if you need to configure the ratelimit threshold, you can use the underlying __ratelimit() function.) The same could work for QEMU. Existing "qemu log" stuff is mostly printk(). Any messages that are not errors or warnings don't need rate-limiting because they shouldn't be enabled by default (and when you do want them for troubleshooting, then rate-limiting gets in the way). Stefan
On 22/10/2015 14:26, Stefan Hajnoczi wrote: > printk() - plain old printf > printk_once() - uses a static bool for once-only semantics, > modulo races between CPUs > printk_ratelimited() - time-based rate-limiting > > (And if you need to configure the ratelimit threshold, you can use the > underlying __ratelimit() function.) > > The same could work for QEMU. Existing "qemu log" stuff is mostly > printk(). Any messages that are not errors or warnings don't need > rate-limiting because they shouldn't be enabled by default (and when you > do want them for troubleshooting, then rate-limiting gets in the way). Actually, most of the "qemu log" stuff is TCG tracing. UNIMP and GUEST_ERROR are not tracing, and could indeed go to stderr with a rate-limiter (with a "-msg ratelimit=no" option). I agree with you about DPRINTF being sometimes tracing and sometimes logging. I'm planning to integrate the stderr tracing backend with qemu log. I think this is the most pressing need, and I think I can have some patches ready for comments on Monday. It depends on whether I'll decide to play with QEMU or LEGO Digital Designer on the way to Seoul. Second, we can integrate qemu log with "-msg timestamp=yes" and add the above rate-limiting functionality. Paolo
diff --git a/include/qemu/log.h b/include/qemu/log.h index 57b8c96..ba86606 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -41,6 +41,7 @@ static inline bool qemu_log_enabled(void) #define LOG_UNIMP (1 << 10) #define LOG_GUEST_ERROR (1 << 11) #define CPU_LOG_MMU (1 << 12) +#define LOG_CMD (1 << 13) /* Returns true if a bit is set in the current loglevel mask */ diff --git a/monitor.c b/monitor.c index 4f1ba2f..71810bb 100644 --- a/monitor.c +++ b/monitor.c @@ -434,6 +434,12 @@ static void monitor_protocol_emitter(Monitor *mon, QObject *data, } monitor_json_emitter(mon, QOBJECT(qmp)); + if (qemu_loglevel_mask(LOG_CMD)) { + QString *output_json = qobject_to_json_pretty(QOBJECT(qmp)); + qemu_log("qmp response:\n%s\n", output_json->string); + QDECREF(output_json); + } + QDECREF(qmp); } @@ -474,6 +480,11 @@ monitor_qapi_event_queue(QAPIEvent event, QDict *data, Error **errp) evstate->last, now); + if (qemu_loglevel_mask(LOG_CMD)) { + QString *output_json = qobject_to_json_pretty(QOBJECT(data)); + qemu_log("qmp event:\n%s\n", output_json->string); + QDECREF(output_json); + } /* Rate limit of 0 indicates no throttling */ qemu_mutex_lock(&monitor_lock); if (!evstate->rate) { @@ -2848,6 +2859,8 @@ static void handle_hmp_command(Monitor *mon, const char *cmdline) QDict *qdict; const mon_cmd_t *cmd; + qemu_log_mask(LOG_CMD, "hmp request:\n%s\n", cmdline); + cmd = monitor_parse_command(mon, &cmdline, mon->cmd_table); if (!cmd) { return; @@ -3823,6 +3836,11 @@ static void handle_qmp_command(JSONMessageParser *parser, QList *tokens) goto err_out; } + if (qemu_loglevel_mask(LOG_CMD)) { + QString *output_json = output_json = qobject_to_json_pretty(obj); + qemu_log("qmp request:\n%s\n", output_json->string); + QDECREF(output_json); + } input = qmp_check_input_obj(obj, &local_err); if (!input) { qobject_decref(obj); diff --git a/qemu-log.c b/qemu-log.c index e6d2b3f..ba7b34c 100644 --- a/qemu-log.c +++ b/qemu-log.c @@ -98,6 +98,7 @@ const QEMULogItem qemu_log_items[] = { { LOG_GUEST_ERROR, "guest_errors", "log when the guest OS does something invalid (eg accessing a\n" "non-existent register)" }, + { LOG_CMD, "cmd", "log the hmp/qmp commands execution" }, { 0, NULL, NULL }, };