@@ -1414,6 +1414,9 @@ void migrate_decompress_threads_create(void)
int i, thread_count;
thread_count = migrate_decompress_threads();
+ if (thread_count == 1) {
+ return;
+ }
decompress_threads = g_new0(QemuThread, thread_count);
decomp_param = g_new0(DecompressParam, thread_count);
compressed_data_buf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
@@ -1432,8 +1435,11 @@ void migrate_decompress_threads_join(void)
{
int i, thread_count;
- quit_decomp_thread = true;
thread_count = migrate_decompress_threads();
+ if (thread_count == 1) {
+ return;
+ }
+ quit_decomp_thread = true;
for (i = 0; i < thread_count; i++) {
qemu_mutex_lock(&decomp_param[i].mutex);
qemu_cond_signal(&decomp_param[i].cond);
@@ -1575,7 +1581,14 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
break;
}
qemu_get_buffer(f, compressed_data_buf, len);
- decompress_data_with_multi_threads(compressed_data_buf, host, len);
+ if (migrate_decompress_threads() == 1) {
+ unsigned long pagesize = TARGET_PAGE_SIZE;
+ uncompress((Bytef *)host, &pagesize,
+ (const Bytef *)compressed_data_buf, len);
+ } else {
+ decompress_data_with_multi_threads(compressed_data_buf,
+ host, len);
+ }
break;
case RAM_SAVE_FLAG_XBZRLE:
host = host_from_stream_offset(f, addr, flags);
When decompression thread count is set to 1, the current implementation is inefficient because of the following reason: 1. Thread syncronization cost; 2. Data copy; This patch optimizes the performance for the case of 1 decompress thread. In this case, the compression is done in process_incoming_migration_co, for some fast decompression algorithm, it can help to improve the performance. Signed-off-by: Liang Li <liang.z.li@intel.com> --- migration/ram.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-)