@@ -1459,6 +1459,7 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
static
int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
{
+ int64_t start_ts_each, end_ts_each;
SaveStateEntry *se;
int ret;
@@ -1475,6 +1476,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
continue;
}
}
+
+ start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_savevm_section_start(se->idstr, se->section_id);
save_section_header(f, se, QEMU_VM_SECTION_END);
@@ -1486,6 +1489,9 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
qemu_file_set_error(f, ret);
return -1;
}
+ end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
+ end_ts_each - start_ts_each);
}
return 0;
@@ -1496,6 +1502,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
bool inactivate_disks)
{
MigrationState *ms = migrate_get_current();
+ int64_t start_ts_each, end_ts_each;
JSONWriter *vmdesc = ms->vmdesc;
int vmdesc_len;
SaveStateEntry *se;
@@ -1507,11 +1514,17 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
continue;
}
+ start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+
ret = vmstate_save(f, se, vmdesc);
if (ret) {
qemu_file_set_error(f, ret);
return ret;
}
+
+ end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
+ end_ts_each - start_ts_each);
}
if (inactivate_disks) {
@@ -2506,9 +2519,12 @@ static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
}
static int
-qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
+ uint8_t type)
{
+ bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
uint32_t instance_id, version_id, section_id;
+ int64_t start_ts, end_ts;
SaveStateEntry *se;
char idstr[256];
int ret;
@@ -2557,12 +2573,23 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
return -EINVAL;
}
+ if (trace_downtime) {
+ start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ }
+
ret = vmstate_load(f, se);
if (ret < 0) {
error_report("error while loading state for instance 0x%"PRIx32" of"
" device '%s'", instance_id, idstr);
return ret;
}
+
+ if (trace_downtime) {
+ end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_load("non-iterable", se->idstr,
+ se->instance_id, end_ts - start_ts);
+ }
+
if (!check_section_footer(f, se)) {
return -EINVAL;
}
@@ -2571,8 +2598,11 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
}
static int
-qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
+qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
+ uint8_t type)
{
+ bool trace_downtime = (type == QEMU_VM_SECTION_END);
+ int64_t start_ts, end_ts;
uint32_t section_id;
SaveStateEntry *se;
int ret;
@@ -2597,12 +2627,23 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
return -EINVAL;
}
+ if (trace_downtime) {
+ start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ }
+
ret = vmstate_load(f, se);
if (ret < 0) {
error_report("error while loading state section id %d(%s)",
section_id, se->idstr);
return ret;
}
+
+ if (trace_downtime) {
+ end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ trace_vmstate_downtime_load("iterable", se->idstr,
+ se->instance_id, end_ts - start_ts);
+ }
+
if (!check_section_footer(f, se)) {
return -EINVAL;
}
@@ -2791,14 +2832,14 @@ retry:
switch (section_type) {
case QEMU_VM_SECTION_START:
case QEMU_VM_SECTION_FULL:
- ret = qemu_loadvm_section_start_full(f, mis);
+ ret = qemu_loadvm_section_start_full(f, mis, section_type);
if (ret < 0) {
goto out;
}
break;
case QEMU_VM_SECTION_PART:
case QEMU_VM_SECTION_END:
- ret = qemu_loadvm_section_part_end(f, mis);
+ ret = qemu_loadvm_section_part_end(f, mis, section_type);
if (ret < 0) {
goto out;
}
@@ -48,6 +48,8 @@ savevm_state_cleanup(void) ""
savevm_state_complete_precopy(void) ""
vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
+vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
+vmstate_downtime_load(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
postcopy_pause_incoming(void) ""
postcopy_pause_incoming_continued(void) ""
postcopy_page_req_sync(void *host_addr) "sync page req %p"
We have a bunch of savevm_section* tracepoints, they're good to analyze migration stream, but not always suitable if someone would like to analyze the migration downtime. Two major problems: - savevm_section* tracepoints are dumping all sections, we only care about the sections that contribute to the downtime - They don't have an identifier to show the type of sections, so no way to filter downtime information either easily. We can add type into the tracepoints, but instead of doing so, this patch kept them untouched, instead of adding a bunch of downtime specific tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a full picture of how the downtime is distributed across iterative and non-iterative vmstate save/load. Note that here both save() and load() need to be traced, because both of them may contribute to the downtime. The contribution is not a simple "add them together", though: consider when the src is doing a save() of device1 while the dest can be load()ing for device2, so they can happen concurrently. Tracking both sides make sense because device load() and save() can be imbalanced, one device can save() super fast, but load() super slow, vice versa. We can't figure that out without tracing both. Signed-off-by: Peter Xu <peterx@redhat.com> --- migration/savevm.c | 49 ++++++++++++++++++++++++++++++++++++++---- migration/trace-events | 2 ++ 2 files changed, 47 insertions(+), 4 deletions(-)