Message ID | 1448592497-2462-4-git-send-email-peterx@redhat.com |
---|---|
State | New |
Headers | show |
On Fri, 11/27 10:48, Peter Xu wrote: > This patch implements "detach" for "dump-guest-memory" command. When > specified, one background thread is created to do the dump work. > > When there is a dump running in background, the commands "stop", > "cont" and "dump-guest-memory" will fail directly, with a hint > telling user: "there is a dump in progress". > > Although it is not quite essential for now, the new codes are trying > to make sure the dump is thread safe. To do this, one list is added > into GuestPhysBlockList to track all the referenced MemoryRegions > during dump process. We should make sure each MemoryRegion would > only be referenced for once. > > One global struct "GlobalDumpState" is added to store some global > informations for dump procedures. One mutex is used to protect the > global dump info. This patch doesn't handle the incoming migration case, i.e. when QEMU is started with "-incoming", or "-incoming defer". Dump can go wrong when incoming migration happens at the same time. > > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > dump.c | 114 +++++++++++++++++++++++++++++++++++++--- > include/qemu-common.h | 4 ++ > include/sysemu/dump.h | 10 ++++ > include/sysemu/memory_mapping.h | 8 +++ > memory_mapping.c | 46 +++++++++++++++- > qmp.c | 14 +++++ > 6 files changed, 188 insertions(+), 8 deletions(-) > > diff --git a/dump.c b/dump.c > index d79e0ed..dfd56aa 100644 > --- a/dump.c > +++ b/dump.c > @@ -73,6 +73,7 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val) > static int dump_cleanup(DumpState *s) > { > guest_phys_blocks_free(&s->guest_phys_blocks); > + guest_memory_region_free(&s->guest_phys_blocks); > memory_mapping_list_free(&s->list); > close(s->fd); > if (s->resume) { > @@ -1427,6 +1428,9 @@ static void dump_init(DumpState *s, int fd, bool has_format, > Error *err = NULL; > int ret; > > + s->has_format = has_format; > + s->format = format; > + > /* kdump-compressed is conflict with paging and filter */ > if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > assert(!paging && !has_filter); > @@ -1580,6 +1584,86 @@ cleanup: > dump_cleanup(s); > } > > +extern GlobalDumpState global_dump_state; dump_state_get_global() returns a pointer to the local static variable, why do you need this? But what I really wonder is why a single boolean is not enough: the DumpState pointer can be passed to dump_thread, so it doesn't need to be global, then you don't need the mutex. > + > +static GlobalDumpState *dump_state_get_global(void) > +{ > + static bool init = false; > + static GlobalDumpState global_dump_state; > + if (unlikely(!init)) { > + qemu_mutex_init(&global_dump_state.gds_mutex); > + global_dump_state.gds_cur = NULL; > + init = true; > + } > + return &global_dump_state; > +} > + > +/* Returns non-zero if there is existing dump in progress, otherwise > + * zero is returned. */ > +bool dump_in_progress(void) > +{ > + GlobalDumpState *global = dump_state_get_global(); > + /* we do not need to take the mutex here if we are checking the > + * pointer only. */ > + return (!!global->gds_cur); > +} > + > +/* Trying to create one DumpState. This will fail if there is an > + * existing one. Return DumpState pointer if succeeded, otherwise > + * NULL is returned. */ > +static DumpState *dump_state_create(GlobalDumpState *global) > +{ > + DumpState *cur = NULL; > + qemu_mutex_lock(&global->gds_mutex); > + > + cur = global->gds_cur; > + if (cur) { > + /* one dump in progress */ > + cur = NULL; > + } else { > + /* we are the first! */ > + cur = g_malloc0(sizeof(*cur)); > + global->gds_cur = cur; > + } > + > + qemu_mutex_unlock(&global->gds_mutex); > + return cur; > +} > + > +/* release current DumpState structure */ > +static void dump_state_release(GlobalDumpState *global) > +{ > + DumpState *cur = NULL; > + qemu_mutex_lock(&global->gds_mutex); > + > + cur = global->gds_cur; > + /* we should never call release before having one */ > + assert(cur); > + global->gds_cur = NULL; > + > + qemu_mutex_unlock(&global->gds_mutex); > + dump_cleanup(cur); > + g_free(cur); > +} > + > +/* this operation might be time consuming. */ > +static void dump_process(DumpState *s, Error **errp) > +{ > + if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > + create_kdump_vmcore(s, errp); > + } else { > + create_vmcore(s, errp); > + } > +} > + > +static void *dump_thread(void *data) > +{ > + GlobalDumpState *global = (GlobalDumpState *)data; > + dump_process(global->gds_cur, NULL); > + dump_state_release(global); > + return NULL; > +} > + > void qmp_dump_guest_memory(bool paging, const char *file, > bool has_detach, bool detach, > bool has_begin, int64_t begin, bool has_length, > @@ -1590,6 +1674,14 @@ void qmp_dump_guest_memory(bool paging, const char *file, > int fd = -1; > DumpState *s; > Error *local_err = NULL; > + /* by default, we are keeping the old style, which is sync dump */ > + bool detach_p = false; > + GlobalDumpState *global = dump_state_get_global(); > + > + if (runstate_check(RUN_STATE_INMIGRATE)) { > + error_setg(errp, "Dump not allowed during incoming migration."); > + return; > + } > > /* > * kdump-compressed format need the whole memory dumped, so paging or > @@ -1609,6 +1701,9 @@ void qmp_dump_guest_memory(bool paging, const char *file, > error_setg(errp, QERR_MISSING_PARAMETER, "begin"); > return; > } > + if (has_detach) { > + detach_p = detach; > + } > > /* check whether lzo/snappy is supported */ > #ifndef CONFIG_LZO > @@ -1647,7 +1742,11 @@ void qmp_dump_guest_memory(bool paging, const char *file, > return; > } > > - s = g_malloc0(sizeof(DumpState)); > + s = dump_state_create(global); > + if (!s) { > + error_setg(errp, "One dump is in progress."); > + return; > + } > > dump_init(s, fd, has_format, format, paging, has_begin, > begin, length, &local_err); > @@ -1657,14 +1756,15 @@ void qmp_dump_guest_memory(bool paging, const char *file, > return; > } > > - if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > - create_kdump_vmcore(s, errp); > + if (!detach_p) { > + /* sync dump */ > + dump_process(s, errp); > + dump_state_release(global); > } else { > - create_vmcore(s, errp); > + qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread, > + global, QEMU_THREAD_DETACHED); > + /* DumpState is freed in dump thread */ > } > - > - dump_cleanup(s); > - g_free(s); > } > > DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp) > diff --git a/include/qemu-common.h b/include/qemu-common.h > index 405364f..7b74961 100644 > --- a/include/qemu-common.h > +++ b/include/qemu-common.h > @@ -501,4 +501,8 @@ int parse_debug_env(const char *name, int max, int initial); > const char *qemu_ether_ntoa(const MACAddr *mac); > void page_size_init(void); > > +/* returns non-zero if dump is in progress, otherwise zero is > + * returned. */ > +bool dump_in_progress(void); > + > #endif > diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h > index 7e4ec5c..13c913e 100644 > --- a/include/sysemu/dump.h > +++ b/include/sysemu/dump.h > @@ -183,9 +183,19 @@ typedef struct DumpState { > off_t offset_page; /* offset of page part in vmcore */ > size_t num_dumpable; /* number of page that can be dumped */ > uint32_t flag_compress; /* indicate the compression format */ > + > + QemuThread dump_thread; /* only used when do async dump */ > + bool has_format; /* whether format is provided */ > + DumpGuestMemoryFormat format; /* valid only if has_format == true */ > } DumpState; > > +typedef struct GlobalDumpState { > + QemuMutex gds_mutex; /* protector for GlobalDumpState itself */ > + DumpState *gds_cur; /* current DumpState (might be NULL) */ > +} GlobalDumpState; > + > uint16_t cpu_to_dump16(DumpState *s, uint16_t val); > uint32_t cpu_to_dump32(DumpState *s, uint32_t val); > uint64_t cpu_to_dump64(DumpState *s, uint64_t val); > + > #endif > diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h > index a75d59a..dd56fc7 100644 > --- a/include/sysemu/memory_mapping.h > +++ b/include/sysemu/memory_mapping.h > @@ -30,10 +30,17 @@ typedef struct GuestPhysBlock { > QTAILQ_ENTRY(GuestPhysBlock) next; > } GuestPhysBlock; > > +typedef struct GuestMemoryRegion { > + MemoryRegion *gmr_mr; > + QTAILQ_ENTRY(GuestMemoryRegion) next; > +} GuestMemoryRegion; > + > /* point-in-time snapshot of guest-visible physical mappings */ > typedef struct GuestPhysBlockList { > unsigned num; > QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head; > + /* housekeep all the MemoryRegion that is referenced */ > + QTAILQ_HEAD(GuestMemRegionHead, GuestMemoryRegion) head_mr; > } GuestPhysBlockList; > > /* The physical and virtual address in the memory mapping are contiguous. */ > @@ -65,6 +72,7 @@ void memory_mapping_list_free(MemoryMappingList *list); > void memory_mapping_list_init(MemoryMappingList *list); > > void guest_phys_blocks_free(GuestPhysBlockList *list); > +void guest_memory_region_free(GuestPhysBlockList *list); > void guest_phys_blocks_init(GuestPhysBlockList *list); > void guest_phys_blocks_append(GuestPhysBlockList *list); > > diff --git a/memory_mapping.c b/memory_mapping.c > index 36d6b26..a279552 100644 > --- a/memory_mapping.c > +++ b/memory_mapping.c > @@ -186,6 +186,7 @@ void guest_phys_blocks_init(GuestPhysBlockList *list) > { > list->num = 0; > QTAILQ_INIT(&list->head); > + QTAILQ_INIT(&list->head_mr); > } > > typedef struct GuestPhysListener { > @@ -193,6 +194,39 @@ typedef struct GuestPhysListener { > MemoryListener listener; > } GuestPhysListener; > > +static void guest_memory_region_add(GuestPhysBlockList *list, > + MemoryRegion *mr) > +{ > + GuestMemoryRegion *gmr = NULL; > + QTAILQ_FOREACH(gmr, &list->head_mr, next) { > + if (gmr->gmr_mr == mr) { > + /* we already refererenced the MemoryRegion */ > + return; > + } This is O(N^2). I think it should be fine to skip this check and just grab the reference. If that is the case, we can probably embed the mr pointer in GuestPhysBlock. Paolo? > + } > + /* reference the MemoryRegion to make sure it's valid during dump */ > +#ifdef DEBUG_DUMP_GUEST_MEMORY > + fprintf(stderr, "DUMP: ref mem region: %s\n", mr->name); > +#endif > + memory_region_ref(mr); > + gmr = g_malloc0(sizeof(*gmr)); > + gmr->gmr_mr = mr; > + QTAILQ_INSERT_TAIL(&list->head_mr, gmr, next); > +} > + > +void guest_memory_region_free(GuestPhysBlockList *list) > +{ > + GuestMemoryRegion *gmr = NULL, *q = NULL; > + QTAILQ_FOREACH_SAFE(gmr, &list->head_mr, next, q) { > +#ifdef DEBUG_DUMP_GUEST_MEMORY > + fprintf(stderr, "DUMP: unref mem region: %s\n", gmr->gmr_mr->name); > +#endif > + QTAILQ_REMOVE(&list->head_mr, gmr, next); > + memory_region_unref(gmr->gmr_mr); > + g_free(gmr); > + } > +} > + > static void guest_phys_blocks_region_add(MemoryListener *listener, > MemoryRegionSection *section) > { > @@ -241,6 +275,10 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, > block->target_end = target_end; > block->host_addr = host_addr; > > + /* keep all the MemoryRegion that is referenced by the dump > + * process */ > + guest_memory_region_add(g->list, section->mr); > + > QTAILQ_INSERT_TAIL(&g->list->head, block, next); > ++g->list->num; > } else { > @@ -250,7 +288,7 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, > predecessor->target_end = target_end; > } > > -#ifdef DEBUG_GUEST_PHYS_REGION_ADD > +#ifdef DEBUG_DUMP_GUEST_MEMORY > fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end=" > TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start, > target_end, predecessor ? "joined" : "added", g->list->num); > @@ -263,8 +301,14 @@ void guest_phys_blocks_append(GuestPhysBlockList *list) > > g.list = list; > g.listener.region_add = &guest_phys_blocks_region_add; > + /* We should make sure all the memory objects are valid during > + * add dump regions. Before releasing it, we should also make > + * sure all the MemoryRegions to be used during dump is > + * referenced. */ > + rcu_read_lock(); > memory_listener_register(&g.listener, &address_space_memory); > memory_listener_unregister(&g.listener); > + rcu_read_unlock(); I don't think this is necessary if VM is stopped and incoming migration is not running - address_space_memory will be safe to access as usual. Otherwise this is a separate problem. > } > > static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) > diff --git a/qmp.c b/qmp.c > index 0a1fa19..055586d 100644 > --- a/qmp.c > +++ b/qmp.c > @@ -102,6 +102,13 @@ void qmp_quit(Error **errp) > > void qmp_stop(Error **errp) > { > + /* if there is a dump in background, we should wait until the dump > + * finished */ > + if (dump_in_progress()) { > + error_setg(errp, "There is a dump in process, please wait."); > + return; > + } > + > if (runstate_check(RUN_STATE_INMIGRATE)) { > autostart = 0; > } else { > @@ -174,6 +181,13 @@ void qmp_cont(Error **errp) > BlockBackend *blk; > BlockDriverState *bs; > > + /* if there is a dump in background, we should wait until the dump > + * finished */ > + if (dump_in_progress()) { > + error_setg(errp, "There is a dump in process, please wait."); > + return; > + } > + > if (runstate_needs_reset()) { > error_setg(errp, "Resetting the Virtual Machine is required"); > return; > -- > 2.4.3 > >
On Fri, 11/27 13:14, Fam Zheng wrote: > But what I really wonder is why a single boolean is not enough: the DumpState > pointer can be passed to dump_thread, so it doesn't need to be global, then you > don't need the mutex. Never mind, it's useful in later patches. Fam
On 11/27/2015 01:14 PM, Fam Zheng wrote: > On Fri, 11/27 10:48, Peter Xu wrote: >> This patch implements "detach" for "dump-guest-memory" command. When >> specified, one background thread is created to do the dump work. >> >> When there is a dump running in background, the commands "stop", >> "cont" and "dump-guest-memory" will fail directly, with a hint >> telling user: "there is a dump in progress". >> >> Although it is not quite essential for now, the new codes are trying >> to make sure the dump is thread safe. To do this, one list is added >> into GuestPhysBlockList to track all the referenced MemoryRegions >> during dump process. We should make sure each MemoryRegion would >> only be referenced for once. >> >> One global struct "GlobalDumpState" is added to store some global >> informations for dump procedures. One mutex is used to protect the >> global dump info. > > This patch doesn't handle the incoming migration case, i.e. when QEMU is > started with "-incoming", or "-incoming defer". Dump can go wrong when incoming > migration happens at the same time. > >> >> Signed-off-by: Peter Xu <peterx@redhat.com> >> --- >> dump.c | 114 +++++++++++++++++++++++++++++++++++++--- >> include/qemu-common.h | 4 ++ >> include/sysemu/dump.h | 10 ++++ >> include/sysemu/memory_mapping.h | 8 +++ >> memory_mapping.c | 46 +++++++++++++++- >> qmp.c | 14 +++++ >> 6 files changed, 188 insertions(+), 8 deletions(-) >> >> diff --git a/dump.c b/dump.c >> index d79e0ed..dfd56aa 100644 >> --- a/dump.c >> +++ b/dump.c >> @@ -73,6 +73,7 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val) >> static int dump_cleanup(DumpState *s) >> { >> guest_phys_blocks_free(&s->guest_phys_blocks); >> + guest_memory_region_free(&s->guest_phys_blocks); >> memory_mapping_list_free(&s->list); >> close(s->fd); >> if (s->resume) { >> @@ -1427,6 +1428,9 @@ static void dump_init(DumpState *s, int fd, bool has_format, >> Error *err = NULL; >> int ret; >> >> + s->has_format = has_format; >> + s->format = format; >> + >> /* kdump-compressed is conflict with paging and filter */ >> if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { >> assert(!paging && !has_filter); >> @@ -1580,6 +1584,86 @@ cleanup: >> dump_cleanup(s); >> } >> >> +extern GlobalDumpState global_dump_state; > > dump_state_get_global() returns a pointer to the local static variable, why do > you need this? > > But what I really wonder is why a single boolean is not enough: the DumpState > pointer can be passed to dump_thread, so it doesn't need to be global, then you > don't need the mutex. > >> + >> +static GlobalDumpState *dump_state_get_global(void) >> +{ >> + static bool init = false; >> + static GlobalDumpState global_dump_state; >> + if (unlikely(!init)) { >> + qemu_mutex_init(&global_dump_state.gds_mutex); >> + global_dump_state.gds_cur = NULL; >> + init = true; >> + } >> + return &global_dump_state; >> +} >> + >> +/* Returns non-zero if there is existing dump in progress, otherwise >> + * zero is returned. */ >> +bool dump_in_progress(void) >> +{ >> + GlobalDumpState *global = dump_state_get_global(); >> + /* we do not need to take the mutex here if we are checking the >> + * pointer only. */ >> + return (!!global->gds_cur); >> +} >> + >> +/* Trying to create one DumpState. This will fail if there is an >> + * existing one. Return DumpState pointer if succeeded, otherwise >> + * NULL is returned. */ >> +static DumpState *dump_state_create(GlobalDumpState *global) >> +{ >> + DumpState *cur = NULL; >> + qemu_mutex_lock(&global->gds_mutex); >> + >> + cur = global->gds_cur; >> + if (cur) { >> + /* one dump in progress */ >> + cur = NULL; >> + } else { >> + /* we are the first! */ >> + cur = g_malloc0(sizeof(*cur)); >> + global->gds_cur = cur; >> + } >> + >> + qemu_mutex_unlock(&global->gds_mutex); >> + return cur; >> +} >> + >> +/* release current DumpState structure */ >> +static void dump_state_release(GlobalDumpState *global) >> +{ >> + DumpState *cur = NULL; >> + qemu_mutex_lock(&global->gds_mutex); >> + >> + cur = global->gds_cur; >> + /* we should never call release before having one */ >> + assert(cur); >> + global->gds_cur = NULL; >> + >> + qemu_mutex_unlock(&global->gds_mutex); >> + dump_cleanup(cur); >> + g_free(cur); >> +} >> + >> +/* this operation might be time consuming. */ >> +static void dump_process(DumpState *s, Error **errp) >> +{ >> + if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { >> + create_kdump_vmcore(s, errp); >> + } else { >> + create_vmcore(s, errp); >> + } >> +} >> + >> +static void *dump_thread(void *data) >> +{ >> + GlobalDumpState *global = (GlobalDumpState *)data; >> + dump_process(global->gds_cur, NULL); >> + dump_state_release(global); >> + return NULL; >> +} >> + >> void qmp_dump_guest_memory(bool paging, const char *file, >> bool has_detach, bool detach, >> bool has_begin, int64_t begin, bool has_length, >> @@ -1590,6 +1674,14 @@ void qmp_dump_guest_memory(bool paging, const char *file, >> int fd = -1; >> DumpState *s; >> Error *local_err = NULL; >> + /* by default, we are keeping the old style, which is sync dump */ >> + bool detach_p = false; >> + GlobalDumpState *global = dump_state_get_global(); >> + >> + if (runstate_check(RUN_STATE_INMIGRATE)) { >> + error_setg(errp, "Dump not allowed during incoming migration."); >> + return; >> + } >> >> /* >> * kdump-compressed format need the whole memory dumped, so paging or >> @@ -1609,6 +1701,9 @@ void qmp_dump_guest_memory(bool paging, const char *file, >> error_setg(errp, QERR_MISSING_PARAMETER, "begin"); >> return; >> } >> + if (has_detach) { >> + detach_p = detach; >> + } >> >> /* check whether lzo/snappy is supported */ >> #ifndef CONFIG_LZO >> @@ -1647,7 +1742,11 @@ void qmp_dump_guest_memory(bool paging, const char *file, >> return; >> } >> >> - s = g_malloc0(sizeof(DumpState)); >> + s = dump_state_create(global); >> + if (!s) { >> + error_setg(errp, "One dump is in progress."); >> + return; >> + } >> >> dump_init(s, fd, has_format, format, paging, has_begin, >> begin, length, &local_err); >> @@ -1657,14 +1756,15 @@ void qmp_dump_guest_memory(bool paging, const char *file, >> return; >> } >> >> - if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { >> - create_kdump_vmcore(s, errp); >> + if (!detach_p) { >> + /* sync dump */ >> + dump_process(s, errp); >> + dump_state_release(global); >> } else { >> - create_vmcore(s, errp); >> + qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread, >> + global, QEMU_THREAD_DETACHED); >> + /* DumpState is freed in dump thread */ >> } >> - >> - dump_cleanup(s); >> - g_free(s); >> } >> >> DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp) >> diff --git a/include/qemu-common.h b/include/qemu-common.h >> index 405364f..7b74961 100644 >> --- a/include/qemu-common.h >> +++ b/include/qemu-common.h >> @@ -501,4 +501,8 @@ int parse_debug_env(const char *name, int max, int initial); >> const char *qemu_ether_ntoa(const MACAddr *mac); >> void page_size_init(void); >> >> +/* returns non-zero if dump is in progress, otherwise zero is >> + * returned. */ >> +bool dump_in_progress(void); >> + >> #endif >> diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h >> index 7e4ec5c..13c913e 100644 >> --- a/include/sysemu/dump.h >> +++ b/include/sysemu/dump.h >> @@ -183,9 +183,19 @@ typedef struct DumpState { >> off_t offset_page; /* offset of page part in vmcore */ >> size_t num_dumpable; /* number of page that can be dumped */ >> uint32_t flag_compress; /* indicate the compression format */ >> + >> + QemuThread dump_thread; /* only used when do async dump */ >> + bool has_format; /* whether format is provided */ >> + DumpGuestMemoryFormat format; /* valid only if has_format == true */ >> } DumpState; >> >> +typedef struct GlobalDumpState { >> + QemuMutex gds_mutex; /* protector for GlobalDumpState itself */ >> + DumpState *gds_cur; /* current DumpState (might be NULL) */ >> +} GlobalDumpState; >> + >> uint16_t cpu_to_dump16(DumpState *s, uint16_t val); >> uint32_t cpu_to_dump32(DumpState *s, uint32_t val); >> uint64_t cpu_to_dump64(DumpState *s, uint64_t val); >> + >> #endif >> diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h >> index a75d59a..dd56fc7 100644 >> --- a/include/sysemu/memory_mapping.h >> +++ b/include/sysemu/memory_mapping.h >> @@ -30,10 +30,17 @@ typedef struct GuestPhysBlock { >> QTAILQ_ENTRY(GuestPhysBlock) next; >> } GuestPhysBlock; >> >> +typedef struct GuestMemoryRegion { >> + MemoryRegion *gmr_mr; >> + QTAILQ_ENTRY(GuestMemoryRegion) next; >> +} GuestMemoryRegion; >> + >> /* point-in-time snapshot of guest-visible physical mappings */ >> typedef struct GuestPhysBlockList { >> unsigned num; >> QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head; >> + /* housekeep all the MemoryRegion that is referenced */ >> + QTAILQ_HEAD(GuestMemRegionHead, GuestMemoryRegion) head_mr; >> } GuestPhysBlockList; >> >> /* The physical and virtual address in the memory mapping are contiguous. */ >> @@ -65,6 +72,7 @@ void memory_mapping_list_free(MemoryMappingList *list); >> void memory_mapping_list_init(MemoryMappingList *list); >> >> void guest_phys_blocks_free(GuestPhysBlockList *list); >> +void guest_memory_region_free(GuestPhysBlockList *list); >> void guest_phys_blocks_init(GuestPhysBlockList *list); >> void guest_phys_blocks_append(GuestPhysBlockList *list); >> >> diff --git a/memory_mapping.c b/memory_mapping.c >> index 36d6b26..a279552 100644 >> --- a/memory_mapping.c >> +++ b/memory_mapping.c >> @@ -186,6 +186,7 @@ void guest_phys_blocks_init(GuestPhysBlockList *list) >> { >> list->num = 0; >> QTAILQ_INIT(&list->head); >> + QTAILQ_INIT(&list->head_mr); >> } >> >> typedef struct GuestPhysListener { >> @@ -193,6 +194,39 @@ typedef struct GuestPhysListener { >> MemoryListener listener; >> } GuestPhysListener; >> >> +static void guest_memory_region_add(GuestPhysBlockList *list, >> + MemoryRegion *mr) >> +{ >> + GuestMemoryRegion *gmr = NULL; >> + QTAILQ_FOREACH(gmr, &list->head_mr, next) { >> + if (gmr->gmr_mr == mr) { >> + /* we already refererenced the MemoryRegion */ >> + return; >> + } > > This is O(N^2). I think it should be fine to skip this check and just grab the > reference. If that is the case, we can probably embed the mr pointer in > GuestPhysBlock. Paolo? If embed the mr pointer into GuestPhyBlock, then we might need to ref/unref one MemoryRegion for multiple times (and I am not sure how many, maybe it could be a very big number especially the MemoryRegionSections are scattered?). Not sure whether it is more efficient. For what I see, the number of MemoryRegions should be not big (<5 in my case). So even with O(N^2), it should merely like O(N). Not sure about this too. Would like to hear more review comments from Paolo and others. > >> + } >> + /* reference the MemoryRegion to make sure it's valid during dump */ >> +#ifdef DEBUG_DUMP_GUEST_MEMORY >> + fprintf(stderr, "DUMP: ref mem region: %s\n", mr->name); >> +#endif >> + memory_region_ref(mr); >> + gmr = g_malloc0(sizeof(*gmr)); >> + gmr->gmr_mr = mr; >> + QTAILQ_INSERT_TAIL(&list->head_mr, gmr, next); >> +} >> + >> +void guest_memory_region_free(GuestPhysBlockList *list) >> +{ >> + GuestMemoryRegion *gmr = NULL, *q = NULL; >> + QTAILQ_FOREACH_SAFE(gmr, &list->head_mr, next, q) { >> +#ifdef DEBUG_DUMP_GUEST_MEMORY >> + fprintf(stderr, "DUMP: unref mem region: %s\n", gmr->gmr_mr->name); >> +#endif >> + QTAILQ_REMOVE(&list->head_mr, gmr, next); >> + memory_region_unref(gmr->gmr_mr); >> + g_free(gmr); >> + } >> +} >> + >> static void guest_phys_blocks_region_add(MemoryListener *listener, >> MemoryRegionSection *section) >> { >> @@ -241,6 +275,10 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, >> block->target_end = target_end; >> block->host_addr = host_addr; >> >> + /* keep all the MemoryRegion that is referenced by the dump >> + * process */ >> + guest_memory_region_add(g->list, section->mr); >> + >> QTAILQ_INSERT_TAIL(&g->list->head, block, next); >> ++g->list->num; >> } else { >> @@ -250,7 +288,7 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, >> predecessor->target_end = target_end; >> } >> >> -#ifdef DEBUG_GUEST_PHYS_REGION_ADD >> +#ifdef DEBUG_DUMP_GUEST_MEMORY >> fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end=" >> TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start, >> target_end, predecessor ? "joined" : "added", g->list->num); >> @@ -263,8 +301,14 @@ void guest_phys_blocks_append(GuestPhysBlockList *list) >> >> g.list = list; >> g.listener.region_add = &guest_phys_blocks_region_add; >> + /* We should make sure all the memory objects are valid during >> + * add dump regions. Before releasing it, we should also make >> + * sure all the MemoryRegions to be used during dump is >> + * referenced. */ >> + rcu_read_lock(); >> memory_listener_register(&g.listener, &address_space_memory); >> memory_listener_unregister(&g.listener); >> + rcu_read_unlock(); > > I don't think this is necessary if VM is stopped and incoming migration is > not running - address_space_memory will be safe to access as usual. Otherwise > this is a separate problem. Yes, I think it should work too without the two lines. I am just taking Paolo's suggestions. It should work too if we do not take the reference counts for MemoryRegion. However, without those things (ref counts, rcu locks), it's not thread safe even it could work. Not sure whether I am understanding it correctly. Thanks! Peter > >> } >> >> static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) >> diff --git a/qmp.c b/qmp.c >> index 0a1fa19..055586d 100644 >> --- a/qmp.c >> +++ b/qmp.c >> @@ -102,6 +102,13 @@ void qmp_quit(Error **errp) >> >> void qmp_stop(Error **errp) >> { >> + /* if there is a dump in background, we should wait until the dump >> + * finished */ >> + if (dump_in_progress()) { >> + error_setg(errp, "There is a dump in process, please wait."); >> + return; >> + } >> + >> if (runstate_check(RUN_STATE_INMIGRATE)) { >> autostart = 0; >> } else { >> @@ -174,6 +181,13 @@ void qmp_cont(Error **errp) >> BlockBackend *blk; >> BlockDriverState *bs; >> >> + /* if there is a dump in background, we should wait until the dump >> + * finished */ >> + if (dump_in_progress()) { >> + error_setg(errp, "There is a dump in process, please wait."); >> + return; >> + } >> + >> if (runstate_needs_reset()) { >> error_setg(errp, "Resetting the Virtual Machine is required"); >> return; >> -- >> 2.4.3 >> >>
On 27/11/2015 07:27, Peter Xu wrote: > If embed the mr pointer into GuestPhyBlock, then we might need to > ref/unref one MemoryRegion for multiple times (and I am not sure how > many, maybe it could be a very big number especially the > MemoryRegionSections are scattered?). Not sure whether it is more > efficient. > > For what I see, the number of MemoryRegions should be not big (<5 in > my case). So even with O(N^2), it should merely like O(N). Not sure > about this too. > > Would like to hear more review comments from Paolo and others. > Fam suggestion is a good one, ref'ing one MemoryRegion many times is not a problem. Also I noticed now that you do the dump_init in the main thread (using a listener), so the RCU lock/unlock is not needed. I don't know this code very well. It's worth adding a comment at the top of functions that are called from a separate thread. Thanks, Paolo
On 27/11/2015 03:48, Peter Xu wrote: > This patch implements "detach" for "dump-guest-memory" command. When > specified, one background thread is created to do the dump work. > > When there is a dump running in background, the commands "stop", > "cont" and "dump-guest-memory" will fail directly, with a hint > telling user: "there is a dump in progress". > > Although it is not quite essential for now, the new codes are trying > to make sure the dump is thread safe. To do this, one list is added > into GuestPhysBlockList to track all the referenced MemoryRegions > during dump process. We should make sure each MemoryRegion would > only be referenced for once. > > One global struct "GlobalDumpState" is added to store some global > informations for dump procedures. One mutex is used to protect the > global dump info. > > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > dump.c | 114 +++++++++++++++++++++++++++++++++++++--- > include/qemu-common.h | 4 ++ > include/sysemu/dump.h | 10 ++++ > include/sysemu/memory_mapping.h | 8 +++ > memory_mapping.c | 46 +++++++++++++++- > qmp.c | 14 +++++ > 6 files changed, 188 insertions(+), 8 deletions(-) > > diff --git a/dump.c b/dump.c > index d79e0ed..dfd56aa 100644 > --- a/dump.c > +++ b/dump.c > @@ -73,6 +73,7 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val) > static int dump_cleanup(DumpState *s) > { > guest_phys_blocks_free(&s->guest_phys_blocks); > + guest_memory_region_free(&s->guest_phys_blocks); > memory_mapping_list_free(&s->list); > close(s->fd); > if (s->resume) { > @@ -1427,6 +1428,9 @@ static void dump_init(DumpState *s, int fd, bool has_format, > Error *err = NULL; > int ret; > > + s->has_format = has_format; > + s->format = format; > + > /* kdump-compressed is conflict with paging and filter */ > if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > assert(!paging && !has_filter); > @@ -1580,6 +1584,86 @@ cleanup: > dump_cleanup(s); > } > > +extern GlobalDumpState global_dump_state; > + > +static GlobalDumpState *dump_state_get_global(void) > +{ > + static bool init = false; > + static GlobalDumpState global_dump_state; > + if (unlikely(!init)) { > + qemu_mutex_init(&global_dump_state.gds_mutex); The mutex is not necessary. The structure is always created in the main thread and released by the dump thread (of which there is just one). You can then just make a global DumpState (not a pointer!), and separate the fields between: - those that are protected by a mutex (most likely the DumpResult and written_size, possibly others) - those that are only written before the thread is started, and thus do not need to be protected by a mutex - those that are only accessed by the thread, and thus do not need to be protected by a mutex either. See for example this extract from migration/block.c: typedef struct BlkMigState { /* Written during setup phase. Can be read without a lock. */ int blk_enable; int shared_base; QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; int64_t total_sector_sum; bool zero_blocks; /* Protected by lock. */ QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; int submitted; int read_done; /* Only used by migration thread. Does not need a lock. */ int transferred; int prev_progress; int bulk_completed; /* The lock. */ QemuMutex lock; } BlkMigState; static BlkMigState block_mig_state; Paolo > + global_dump_state.gds_cur = NULL; > + init = true; > + } > + return &global_dump_state; > +} > + > +/* Returns non-zero if there is existing dump in progress, otherwise > + * zero is returned. */ > +bool dump_in_progress(void) > +{ > + GlobalDumpState *global = dump_state_get_global(); > + /* we do not need to take the mutex here if we are checking the > + * pointer only. */ > + return (!!global->gds_cur); > +} > + > +/* Trying to create one DumpState. This will fail if there is an > + * existing one. Return DumpState pointer if succeeded, otherwise > + * NULL is returned. */ > +static DumpState *dump_state_create(GlobalDumpState *global) > +{ > + DumpState *cur = NULL; > + qemu_mutex_lock(&global->gds_mutex); > + > + cur = global->gds_cur; > + if (cur) { > + /* one dump in progress */ > + cur = NULL; > + } else { > + /* we are the first! */ > + cur = g_malloc0(sizeof(*cur)); > + global->gds_cur = cur; > + } > + > + qemu_mutex_unlock(&global->gds_mutex); > + return cur; > +} > + > +/* release current DumpState structure */ > +static void dump_state_release(GlobalDumpState *global) > +{ > + DumpState *cur = NULL; > + qemu_mutex_lock(&global->gds_mutex); > + > + cur = global->gds_cur; > + /* we should never call release before having one */ > + assert(cur); > + global->gds_cur = NULL; > + > + qemu_mutex_unlock(&global->gds_mutex); > + dump_cleanup(cur); > + g_free(cur); > +} > + > +/* this operation might be time consuming. */ > +static void dump_process(DumpState *s, Error **errp) > +{ > + if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > + create_kdump_vmcore(s, errp); > + } else { > + create_vmcore(s, errp); > + } > +} > + > +static void *dump_thread(void *data) > +{ > + GlobalDumpState *global = (GlobalDumpState *)data; > + dump_process(global->gds_cur, NULL); > + dump_state_release(global); > + return NULL; > +} > + > void qmp_dump_guest_memory(bool paging, const char *file, > bool has_detach, bool detach, > bool has_begin, int64_t begin, bool has_length, > @@ -1590,6 +1674,14 @@ void qmp_dump_guest_memory(bool paging, const char *file, > int fd = -1; > DumpState *s; > Error *local_err = NULL; > + /* by default, we are keeping the old style, which is sync dump */ > + bool detach_p = false; > + GlobalDumpState *global = dump_state_get_global(); > + > + if (runstate_check(RUN_STATE_INMIGRATE)) { > + error_setg(errp, "Dump not allowed during incoming migration."); > + return; > + } > > /* > * kdump-compressed format need the whole memory dumped, so paging or > @@ -1609,6 +1701,9 @@ void qmp_dump_guest_memory(bool paging, const char *file, > error_setg(errp, QERR_MISSING_PARAMETER, "begin"); > return; > } > + if (has_detach) { > + detach_p = detach; > + } > > /* check whether lzo/snappy is supported */ > #ifndef CONFIG_LZO > @@ -1647,7 +1742,11 @@ void qmp_dump_guest_memory(bool paging, const char *file, > return; > } > > - s = g_malloc0(sizeof(DumpState)); > + s = dump_state_create(global); > + if (!s) { > + error_setg(errp, "One dump is in progress."); > + return; > + } > > dump_init(s, fd, has_format, format, paging, has_begin, > begin, length, &local_err); > @@ -1657,14 +1756,15 @@ void qmp_dump_guest_memory(bool paging, const char *file, > return; > } > > - if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { > - create_kdump_vmcore(s, errp); > + if (!detach_p) { > + /* sync dump */ > + dump_process(s, errp); > + dump_state_release(global); > } else { > - create_vmcore(s, errp); > + qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread, > + global, QEMU_THREAD_DETACHED); > + /* DumpState is freed in dump thread */ > } > - > - dump_cleanup(s); > - g_free(s); > } > > DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp) > diff --git a/include/qemu-common.h b/include/qemu-common.h > index 405364f..7b74961 100644 > --- a/include/qemu-common.h > +++ b/include/qemu-common.h > @@ -501,4 +501,8 @@ int parse_debug_env(const char *name, int max, int initial); > const char *qemu_ether_ntoa(const MACAddr *mac); > void page_size_init(void); > > +/* returns non-zero if dump is in progress, otherwise zero is > + * returned. */ > +bool dump_in_progress(void); > + > #endif > diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h > index 7e4ec5c..13c913e 100644 > --- a/include/sysemu/dump.h > +++ b/include/sysemu/dump.h > @@ -183,9 +183,19 @@ typedef struct DumpState { > off_t offset_page; /* offset of page part in vmcore */ > size_t num_dumpable; /* number of page that can be dumped */ > uint32_t flag_compress; /* indicate the compression format */ > + > + QemuThread dump_thread; /* only used when do async dump */ > + bool has_format; /* whether format is provided */ > + DumpGuestMemoryFormat format; /* valid only if has_format == true */ > } DumpState; > > +typedef struct GlobalDumpState { > + QemuMutex gds_mutex; /* protector for GlobalDumpState itself */ > + DumpState *gds_cur; /* current DumpState (might be NULL) */ > +} GlobalDumpState; > + > uint16_t cpu_to_dump16(DumpState *s, uint16_t val); > uint32_t cpu_to_dump32(DumpState *s, uint32_t val); > uint64_t cpu_to_dump64(DumpState *s, uint64_t val); > + > #endif > diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h > index a75d59a..dd56fc7 100644 > --- a/include/sysemu/memory_mapping.h > +++ b/include/sysemu/memory_mapping.h > @@ -30,10 +30,17 @@ typedef struct GuestPhysBlock { > QTAILQ_ENTRY(GuestPhysBlock) next; > } GuestPhysBlock; > > +typedef struct GuestMemoryRegion { > + MemoryRegion *gmr_mr; > + QTAILQ_ENTRY(GuestMemoryRegion) next; > +} GuestMemoryRegion; > + > /* point-in-time snapshot of guest-visible physical mappings */ > typedef struct GuestPhysBlockList { > unsigned num; > QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head; > + /* housekeep all the MemoryRegion that is referenced */ > + QTAILQ_HEAD(GuestMemRegionHead, GuestMemoryRegion) head_mr; > } GuestPhysBlockList; > > /* The physical and virtual address in the memory mapping are contiguous. */ > @@ -65,6 +72,7 @@ void memory_mapping_list_free(MemoryMappingList *list); > void memory_mapping_list_init(MemoryMappingList *list); > > void guest_phys_blocks_free(GuestPhysBlockList *list); > +void guest_memory_region_free(GuestPhysBlockList *list); > void guest_phys_blocks_init(GuestPhysBlockList *list); > void guest_phys_blocks_append(GuestPhysBlockList *list); > > diff --git a/memory_mapping.c b/memory_mapping.c > index 36d6b26..a279552 100644 > --- a/memory_mapping.c > +++ b/memory_mapping.c > @@ -186,6 +186,7 @@ void guest_phys_blocks_init(GuestPhysBlockList *list) > { > list->num = 0; > QTAILQ_INIT(&list->head); > + QTAILQ_INIT(&list->head_mr); > } > > typedef struct GuestPhysListener { > @@ -193,6 +194,39 @@ typedef struct GuestPhysListener { > MemoryListener listener; > } GuestPhysListener; > > +static void guest_memory_region_add(GuestPhysBlockList *list, > + MemoryRegion *mr) > +{ > + GuestMemoryRegion *gmr = NULL; > + QTAILQ_FOREACH(gmr, &list->head_mr, next) { > + if (gmr->gmr_mr == mr) { > + /* we already refererenced the MemoryRegion */ > + return; > + } > + } > + /* reference the MemoryRegion to make sure it's valid during dump */ > +#ifdef DEBUG_DUMP_GUEST_MEMORY > + fprintf(stderr, "DUMP: ref mem region: %s\n", mr->name); > +#endif > + memory_region_ref(mr); > + gmr = g_malloc0(sizeof(*gmr)); > + gmr->gmr_mr = mr; > + QTAILQ_INSERT_TAIL(&list->head_mr, gmr, next); > +} > + > +void guest_memory_region_free(GuestPhysBlockList *list) > +{ > + GuestMemoryRegion *gmr = NULL, *q = NULL; > + QTAILQ_FOREACH_SAFE(gmr, &list->head_mr, next, q) { > +#ifdef DEBUG_DUMP_GUEST_MEMORY > + fprintf(stderr, "DUMP: unref mem region: %s\n", gmr->gmr_mr->name); > +#endif > + QTAILQ_REMOVE(&list->head_mr, gmr, next); > + memory_region_unref(gmr->gmr_mr); > + g_free(gmr); > + } > +} > + > static void guest_phys_blocks_region_add(MemoryListener *listener, > MemoryRegionSection *section) > { > @@ -241,6 +275,10 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, > block->target_end = target_end; > block->host_addr = host_addr; > > + /* keep all the MemoryRegion that is referenced by the dump > + * process */ > + guest_memory_region_add(g->list, section->mr); > + > QTAILQ_INSERT_TAIL(&g->list->head, block, next); > ++g->list->num; > } else { > @@ -250,7 +288,7 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, > predecessor->target_end = target_end; > } > > -#ifdef DEBUG_GUEST_PHYS_REGION_ADD > +#ifdef DEBUG_DUMP_GUEST_MEMORY > fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end=" > TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start, > target_end, predecessor ? "joined" : "added", g->list->num); > @@ -263,8 +301,14 @@ void guest_phys_blocks_append(GuestPhysBlockList *list) > > g.list = list; > g.listener.region_add = &guest_phys_blocks_region_add; > + /* We should make sure all the memory objects are valid during > + * add dump regions. Before releasing it, we should also make > + * sure all the MemoryRegions to be used during dump is > + * referenced. */ > + rcu_read_lock(); > memory_listener_register(&g.listener, &address_space_memory); > memory_listener_unregister(&g.listener); > + rcu_read_unlock(); > } > > static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) > diff --git a/qmp.c b/qmp.c > index 0a1fa19..055586d 100644 > --- a/qmp.c > +++ b/qmp.c > @@ -102,6 +102,13 @@ void qmp_quit(Error **errp) > > void qmp_stop(Error **errp) > { > + /* if there is a dump in background, we should wait until the dump > + * finished */ > + if (dump_in_progress()) { > + error_setg(errp, "There is a dump in process, please wait."); > + return; > + } > + > if (runstate_check(RUN_STATE_INMIGRATE)) { > autostart = 0; > } else { > @@ -174,6 +181,13 @@ void qmp_cont(Error **errp) > BlockBackend *blk; > BlockDriverState *bs; > > + /* if there is a dump in background, we should wait until the dump > + * finished */ > + if (dump_in_progress()) { > + error_setg(errp, "There is a dump in process, please wait."); > + return; > + } > + > if (runstate_needs_reset()) { > error_setg(errp, "Resetting the Virtual Machine is required"); > return; >
On Fri, Nov 27, 2015 at 11:14:17AM +0100, Paolo Bonzini wrote: > > > On 27/11/2015 07:27, Peter Xu wrote: > > If embed the mr pointer into GuestPhyBlock, then we might need to > > ref/unref one MemoryRegion for multiple times (and I am not sure how > > many, maybe it could be a very big number especially the > > MemoryRegionSections are scattered?). Not sure whether it is more > > efficient. > > > > For what I see, the number of MemoryRegions should be not big (<5 in > > my case). So even with O(N^2), it should merely like O(N). Not sure > > about this too. > > > > Would like to hear more review comments from Paolo and others. > > > > Fam suggestion is a good one, ref'ing one MemoryRegion many times is not > a problem. Ok, then I will embed the MemoryRegion pointer into GuestPhysBlocks in v3. > > Also I noticed now that you do the dump_init in the main thread (using a > listener), so the RCU lock/unlock is not needed. I don't know this code > very well. Yes, it's in main thread too in v1 patch, since I think that should not be the time consuming part. If so, I will remove the RCU lock/unlock too. > > It's worth adding a comment at the top of functions that are called from > a separate thread. Ok. Will add that. Thanks! Peter > > Thanks, > > Paolo
On Fri, Nov 27, 2015 at 11:31:00AM +0100, Paolo Bonzini wrote: > [snip] > > + > > +static GlobalDumpState *dump_state_get_global(void) > > +{ > > + static bool init = false; > > + static GlobalDumpState global_dump_state; > > + if (unlikely(!init)) { > > + qemu_mutex_init(&global_dump_state.gds_mutex); > > The mutex is not necessary. The structure is always created in the main > thread and released by the dump thread (of which there is just one). [1] > > You can then just make a global DumpState (not a pointer!), and separate > the fields between: > > - those that are protected by a mutex (most likely the DumpResult and > written_size, possibly others) Hi, Paolo, So mutex is still necessary, right? (refer to [1]) Since "dump-query" will read several fields of it, while the dump thread might be modifying them as well? > > - those that are only written before the thread is started, and thus do > not need to be protected by a mutex > > - those that are only accessed by the thread, and thus do not need to be > protected by a mutex either. > > See for example this extract from migration/block.c: > > typedef struct BlkMigState { > /* Written during setup phase. Can be read without a lock. */ > int blk_enable; > int shared_base; > QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; > int64_t total_sector_sum; > bool zero_blocks; > > /* Protected by lock. */ > QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; > int submitted; > int read_done; > > /* Only used by migration thread. Does not need a lock. */ > int transferred; > int prev_progress; > int bulk_completed; > > /* The lock. */ > QemuMutex lock; > } BlkMigState; > > static BlkMigState block_mig_state; Ok, I think I can remove the global state and make a static DumpState. When I was drafting the patch, I just tried to keep the old logic (malloc/free) and avoid introducing bugs. Maybe I was wrong. I should better not introduce new struct if not necessary. Will try to follow this example in v3. Thanks. Peter > > Paolo >
On 27/11/2015 12:26, Peter Xu wrote: > On Fri, Nov 27, 2015 at 11:31:00AM +0100, Paolo Bonzini wrote: >> > > [snip] > >>> + >>> +static GlobalDumpState *dump_state_get_global(void) >>> +{ >>> + static bool init = false; >>> + static GlobalDumpState global_dump_state; >>> + if (unlikely(!init)) { >>> + qemu_mutex_init(&global_dump_state.gds_mutex); >> >> The mutex is not necessary. The structure is always created in the main >> thread and released by the dump thread (of which there is just one). > > [1] > >> >> You can then just make a global DumpState (not a pointer!), and separate >> the fields between: >> >> - those that are protected by a mutex (most likely the DumpResult and >> written_size, possibly others) > > Hi, Paolo, > > So mutex is still necessary, right? (refer to [1]) Since > "dump-query" will read several fields of it, while the dump thread > might be modifying them as well? Right, initially I meant a mutex around the allocation. But then I read the other patches where you add more fields, and came back to add more content. Strictly speaking it's likely that you can do everything without a mutex, but it's easier to use one. > Ok, I think I can remove the global state and make a static > DumpState. When I was drafting the patch, I just tried to keep the > old logic (malloc/free) and avoid introducing bugs. Maybe I was > wrong. I should better not introduce new struct if not necessary. It's okay. The only confusing bit was that you had to add more state to GlobalDumpState, and in the end it was split between DumpState and GlobalDumpState. As far as this patch is concerned, using malloc/free would have been okay, but then the subsequent changes suggest a different approach. Thanks! Paolo > Will try to follow this example in v3. > > Thanks. > Peter > >> >> Paolo >> > >
On Fri, Nov 27, 2015 at 01:14:25PM +0800, Fam Zheng wrote: > On Fri, 11/27 10:48, Peter Xu wrote: [snip] > > This patch doesn't handle the incoming migration case, i.e. when QEMU is > started with "-incoming", or "-incoming defer". Dump can go wrong when incoming > migration happens at the same time. Sorry to missed these lines. Still not understand why this patch cannot handle this if with [1] (Please check below for [1])? [snip] > > > > +extern GlobalDumpState global_dump_state; > > dump_state_get_global() returns a pointer to the local static variable, why do > you need this? Yes, I should remove that. Thanks! [snip] > > + > > @@ -1590,6 +1674,14 @@ void qmp_dump_guest_memory(bool paging, const char *file, > > int fd = -1; > > DumpState *s; > > Error *local_err = NULL; > > + /* by default, we are keeping the old style, which is sync dump */ > > + bool detach_p = false; > > + GlobalDumpState *global = dump_state_get_global(); > > + [1] > > + if (runstate_check(RUN_STATE_INMIGRATE)) { > > + error_setg(errp, "Dump not allowed during incoming migration."); > > + return; > > + } Thanks. Peter
On Sat, 11/28 13:51, Peter Xu wrote: > On Fri, Nov 27, 2015 at 01:14:25PM +0800, Fam Zheng wrote: > > On Fri, 11/27 10:48, Peter Xu wrote: > > [snip] > > > > > This patch doesn't handle the incoming migration case, i.e. when QEMU is > > started with "-incoming", or "-incoming defer". Dump can go wrong when incoming > > migration happens at the same time. > > Sorry to missed these lines. Still not understand why this patch > cannot handle this if with [1] (Please check below for [1])? You're right, that does work. I missed that "defer" also sets RUN_STATE_INMIGRATE. Fam
diff --git a/dump.c b/dump.c index d79e0ed..dfd56aa 100644 --- a/dump.c +++ b/dump.c @@ -73,6 +73,7 @@ uint64_t cpu_to_dump64(DumpState *s, uint64_t val) static int dump_cleanup(DumpState *s) { guest_phys_blocks_free(&s->guest_phys_blocks); + guest_memory_region_free(&s->guest_phys_blocks); memory_mapping_list_free(&s->list); close(s->fd); if (s->resume) { @@ -1427,6 +1428,9 @@ static void dump_init(DumpState *s, int fd, bool has_format, Error *err = NULL; int ret; + s->has_format = has_format; + s->format = format; + /* kdump-compressed is conflict with paging and filter */ if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { assert(!paging && !has_filter); @@ -1580,6 +1584,86 @@ cleanup: dump_cleanup(s); } +extern GlobalDumpState global_dump_state; + +static GlobalDumpState *dump_state_get_global(void) +{ + static bool init = false; + static GlobalDumpState global_dump_state; + if (unlikely(!init)) { + qemu_mutex_init(&global_dump_state.gds_mutex); + global_dump_state.gds_cur = NULL; + init = true; + } + return &global_dump_state; +} + +/* Returns non-zero if there is existing dump in progress, otherwise + * zero is returned. */ +bool dump_in_progress(void) +{ + GlobalDumpState *global = dump_state_get_global(); + /* we do not need to take the mutex here if we are checking the + * pointer only. */ + return (!!global->gds_cur); +} + +/* Trying to create one DumpState. This will fail if there is an + * existing one. Return DumpState pointer if succeeded, otherwise + * NULL is returned. */ +static DumpState *dump_state_create(GlobalDumpState *global) +{ + DumpState *cur = NULL; + qemu_mutex_lock(&global->gds_mutex); + + cur = global->gds_cur; + if (cur) { + /* one dump in progress */ + cur = NULL; + } else { + /* we are the first! */ + cur = g_malloc0(sizeof(*cur)); + global->gds_cur = cur; + } + + qemu_mutex_unlock(&global->gds_mutex); + return cur; +} + +/* release current DumpState structure */ +static void dump_state_release(GlobalDumpState *global) +{ + DumpState *cur = NULL; + qemu_mutex_lock(&global->gds_mutex); + + cur = global->gds_cur; + /* we should never call release before having one */ + assert(cur); + global->gds_cur = NULL; + + qemu_mutex_unlock(&global->gds_mutex); + dump_cleanup(cur); + g_free(cur); +} + +/* this operation might be time consuming. */ +static void dump_process(DumpState *s, Error **errp) +{ + if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { + create_kdump_vmcore(s, errp); + } else { + create_vmcore(s, errp); + } +} + +static void *dump_thread(void *data) +{ + GlobalDumpState *global = (GlobalDumpState *)data; + dump_process(global->gds_cur, NULL); + dump_state_release(global); + return NULL; +} + void qmp_dump_guest_memory(bool paging, const char *file, bool has_detach, bool detach, bool has_begin, int64_t begin, bool has_length, @@ -1590,6 +1674,14 @@ void qmp_dump_guest_memory(bool paging, const char *file, int fd = -1; DumpState *s; Error *local_err = NULL; + /* by default, we are keeping the old style, which is sync dump */ + bool detach_p = false; + GlobalDumpState *global = dump_state_get_global(); + + if (runstate_check(RUN_STATE_INMIGRATE)) { + error_setg(errp, "Dump not allowed during incoming migration."); + return; + } /* * kdump-compressed format need the whole memory dumped, so paging or @@ -1609,6 +1701,9 @@ void qmp_dump_guest_memory(bool paging, const char *file, error_setg(errp, QERR_MISSING_PARAMETER, "begin"); return; } + if (has_detach) { + detach_p = detach; + } /* check whether lzo/snappy is supported */ #ifndef CONFIG_LZO @@ -1647,7 +1742,11 @@ void qmp_dump_guest_memory(bool paging, const char *file, return; } - s = g_malloc0(sizeof(DumpState)); + s = dump_state_create(global); + if (!s) { + error_setg(errp, "One dump is in progress."); + return; + } dump_init(s, fd, has_format, format, paging, has_begin, begin, length, &local_err); @@ -1657,14 +1756,15 @@ void qmp_dump_guest_memory(bool paging, const char *file, return; } - if (has_format && format != DUMP_GUEST_MEMORY_FORMAT_ELF) { - create_kdump_vmcore(s, errp); + if (!detach_p) { + /* sync dump */ + dump_process(s, errp); + dump_state_release(global); } else { - create_vmcore(s, errp); + qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread, + global, QEMU_THREAD_DETACHED); + /* DumpState is freed in dump thread */ } - - dump_cleanup(s); - g_free(s); } DumpGuestMemoryCapability *qmp_query_dump_guest_memory_capability(Error **errp) diff --git a/include/qemu-common.h b/include/qemu-common.h index 405364f..7b74961 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -501,4 +501,8 @@ int parse_debug_env(const char *name, int max, int initial); const char *qemu_ether_ntoa(const MACAddr *mac); void page_size_init(void); +/* returns non-zero if dump is in progress, otherwise zero is + * returned. */ +bool dump_in_progress(void); + #endif diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h index 7e4ec5c..13c913e 100644 --- a/include/sysemu/dump.h +++ b/include/sysemu/dump.h @@ -183,9 +183,19 @@ typedef struct DumpState { off_t offset_page; /* offset of page part in vmcore */ size_t num_dumpable; /* number of page that can be dumped */ uint32_t flag_compress; /* indicate the compression format */ + + QemuThread dump_thread; /* only used when do async dump */ + bool has_format; /* whether format is provided */ + DumpGuestMemoryFormat format; /* valid only if has_format == true */ } DumpState; +typedef struct GlobalDumpState { + QemuMutex gds_mutex; /* protector for GlobalDumpState itself */ + DumpState *gds_cur; /* current DumpState (might be NULL) */ +} GlobalDumpState; + uint16_t cpu_to_dump16(DumpState *s, uint16_t val); uint32_t cpu_to_dump32(DumpState *s, uint32_t val); uint64_t cpu_to_dump64(DumpState *s, uint64_t val); + #endif diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h index a75d59a..dd56fc7 100644 --- a/include/sysemu/memory_mapping.h +++ b/include/sysemu/memory_mapping.h @@ -30,10 +30,17 @@ typedef struct GuestPhysBlock { QTAILQ_ENTRY(GuestPhysBlock) next; } GuestPhysBlock; +typedef struct GuestMemoryRegion { + MemoryRegion *gmr_mr; + QTAILQ_ENTRY(GuestMemoryRegion) next; +} GuestMemoryRegion; + /* point-in-time snapshot of guest-visible physical mappings */ typedef struct GuestPhysBlockList { unsigned num; QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head; + /* housekeep all the MemoryRegion that is referenced */ + QTAILQ_HEAD(GuestMemRegionHead, GuestMemoryRegion) head_mr; } GuestPhysBlockList; /* The physical and virtual address in the memory mapping are contiguous. */ @@ -65,6 +72,7 @@ void memory_mapping_list_free(MemoryMappingList *list); void memory_mapping_list_init(MemoryMappingList *list); void guest_phys_blocks_free(GuestPhysBlockList *list); +void guest_memory_region_free(GuestPhysBlockList *list); void guest_phys_blocks_init(GuestPhysBlockList *list); void guest_phys_blocks_append(GuestPhysBlockList *list); diff --git a/memory_mapping.c b/memory_mapping.c index 36d6b26..a279552 100644 --- a/memory_mapping.c +++ b/memory_mapping.c @@ -186,6 +186,7 @@ void guest_phys_blocks_init(GuestPhysBlockList *list) { list->num = 0; QTAILQ_INIT(&list->head); + QTAILQ_INIT(&list->head_mr); } typedef struct GuestPhysListener { @@ -193,6 +194,39 @@ typedef struct GuestPhysListener { MemoryListener listener; } GuestPhysListener; +static void guest_memory_region_add(GuestPhysBlockList *list, + MemoryRegion *mr) +{ + GuestMemoryRegion *gmr = NULL; + QTAILQ_FOREACH(gmr, &list->head_mr, next) { + if (gmr->gmr_mr == mr) { + /* we already refererenced the MemoryRegion */ + return; + } + } + /* reference the MemoryRegion to make sure it's valid during dump */ +#ifdef DEBUG_DUMP_GUEST_MEMORY + fprintf(stderr, "DUMP: ref mem region: %s\n", mr->name); +#endif + memory_region_ref(mr); + gmr = g_malloc0(sizeof(*gmr)); + gmr->gmr_mr = mr; + QTAILQ_INSERT_TAIL(&list->head_mr, gmr, next); +} + +void guest_memory_region_free(GuestPhysBlockList *list) +{ + GuestMemoryRegion *gmr = NULL, *q = NULL; + QTAILQ_FOREACH_SAFE(gmr, &list->head_mr, next, q) { +#ifdef DEBUG_DUMP_GUEST_MEMORY + fprintf(stderr, "DUMP: unref mem region: %s\n", gmr->gmr_mr->name); +#endif + QTAILQ_REMOVE(&list->head_mr, gmr, next); + memory_region_unref(gmr->gmr_mr); + g_free(gmr); + } +} + static void guest_phys_blocks_region_add(MemoryListener *listener, MemoryRegionSection *section) { @@ -241,6 +275,10 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, block->target_end = target_end; block->host_addr = host_addr; + /* keep all the MemoryRegion that is referenced by the dump + * process */ + guest_memory_region_add(g->list, section->mr); + QTAILQ_INSERT_TAIL(&g->list->head, block, next); ++g->list->num; } else { @@ -250,7 +288,7 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, predecessor->target_end = target_end; } -#ifdef DEBUG_GUEST_PHYS_REGION_ADD +#ifdef DEBUG_DUMP_GUEST_MEMORY fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end=" TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start, target_end, predecessor ? "joined" : "added", g->list->num); @@ -263,8 +301,14 @@ void guest_phys_blocks_append(GuestPhysBlockList *list) g.list = list; g.listener.region_add = &guest_phys_blocks_region_add; + /* We should make sure all the memory objects are valid during + * add dump regions. Before releasing it, we should also make + * sure all the MemoryRegions to be used during dump is + * referenced. */ + rcu_read_lock(); memory_listener_register(&g.listener, &address_space_memory); memory_listener_unregister(&g.listener); + rcu_read_unlock(); } static CPUState *find_paging_enabled_cpu(CPUState *start_cpu) diff --git a/qmp.c b/qmp.c index 0a1fa19..055586d 100644 --- a/qmp.c +++ b/qmp.c @@ -102,6 +102,13 @@ void qmp_quit(Error **errp) void qmp_stop(Error **errp) { + /* if there is a dump in background, we should wait until the dump + * finished */ + if (dump_in_progress()) { + error_setg(errp, "There is a dump in process, please wait."); + return; + } + if (runstate_check(RUN_STATE_INMIGRATE)) { autostart = 0; } else { @@ -174,6 +181,13 @@ void qmp_cont(Error **errp) BlockBackend *blk; BlockDriverState *bs; + /* if there is a dump in background, we should wait until the dump + * finished */ + if (dump_in_progress()) { + error_setg(errp, "There is a dump in process, please wait."); + return; + } + if (runstate_needs_reset()) { error_setg(errp, "Resetting the Virtual Machine is required"); return;
This patch implements "detach" for "dump-guest-memory" command. When specified, one background thread is created to do the dump work. When there is a dump running in background, the commands "stop", "cont" and "dump-guest-memory" will fail directly, with a hint telling user: "there is a dump in progress". Although it is not quite essential for now, the new codes are trying to make sure the dump is thread safe. To do this, one list is added into GuestPhysBlockList to track all the referenced MemoryRegions during dump process. We should make sure each MemoryRegion would only be referenced for once. One global struct "GlobalDumpState" is added to store some global informations for dump procedures. One mutex is used to protect the global dump info. Signed-off-by: Peter Xu <peterx@redhat.com> --- dump.c | 114 +++++++++++++++++++++++++++++++++++++--- include/qemu-common.h | 4 ++ include/sysemu/dump.h | 10 ++++ include/sysemu/memory_mapping.h | 8 +++ memory_mapping.c | 46 +++++++++++++++- qmp.c | 14 +++++ 6 files changed, 188 insertions(+), 8 deletions(-)