diff options
Diffstat (limited to 'migration')
| -rw-r--r-- | migration/Makefile.objs | 2 | ||||
| -rw-r--r-- | migration/block.c | 39 | ||||
| -rw-r--r-- | migration/colo.c | 260 | ||||
| -rw-r--r-- | migration/migration.c | 76 | ||||
| -rw-r--r-- | migration/migration.h | 3 | ||||
| -rw-r--r-- | migration/multifd-zlib.c | 325 | ||||
| -rw-r--r-- | migration/multifd-zstd.c | 339 | ||||
| -rw-r--r-- | migration/multifd.c | 191 | ||||
| -rw-r--r-- | migration/multifd.h | 31 | ||||
| -rw-r--r-- | migration/ram.c | 2 | ||||
| -rw-r--r-- | migration/savevm.c | 1 | ||||
| -rw-r--r-- | migration/vmstate.c | 1 |
12 files changed, 1108 insertions, 162 deletions
diff --git a/migration/Makefile.objs b/migration/Makefile.objs index d3623d5f9b..0fc619e380 100644 --- a/migration/Makefile.objs +++ b/migration/Makefile.objs @@ -8,6 +8,8 @@ common-obj-y += xbzrle.o postcopy-ram.o common-obj-y += qjson.o common-obj-y += block-dirty-bitmap.o common-obj-y += multifd.o +common-obj-y += multifd-zlib.o +common-obj-$(CONFIG_ZSTD) += multifd-zstd.o common-obj-$(CONFIG_RDMA) += rdma.o diff --git a/migration/block.c b/migration/block.c index c90288ed29..737b6499f9 100644 --- a/migration/block.c +++ b/migration/block.c @@ -27,8 +27,8 @@ #include "migration/vmstate.h" #include "sysemu/block-backend.h" -#define BLOCK_SIZE (1 << 20) -#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS) +#define BLK_MIG_BLOCK_SIZE (1 << 20) +#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01 #define BLK_MIG_FLAG_EOS 0x02 @@ -133,7 +133,7 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk) uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK; if (block_mig_state.zero_blocks && - buffer_is_zero(blk->buf, BLOCK_SIZE)) { + buffer_is_zero(blk->buf, BLK_MIG_BLOCK_SIZE)) { flags |= BLK_MIG_FLAG_ZERO_BLOCK; } @@ -154,7 +154,7 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk) return; } - qemu_put_buffer(f, blk->buf, BLOCK_SIZE); + qemu_put_buffer(f, blk->buf, BLK_MIG_BLOCK_SIZE); } int blk_mig_active(void) @@ -309,7 +309,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) } blk = g_new(BlkMigBlock, 1); - blk->buf = g_malloc(BLOCK_SIZE); + blk->buf = g_malloc(BLK_MIG_BLOCK_SIZE); blk->bmds = bmds; blk->sector = cur_sector; blk->nr_sectors = nr_sectors; @@ -350,7 +350,8 @@ static int set_dirty_tracking(void) QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { bmds->dirty_bitmap = bdrv_create_dirty_bitmap(blk_bs(bmds->blk), - BLOCK_SIZE, NULL, NULL); + BLK_MIG_BLOCK_SIZE, + NULL, NULL); if (!bmds->dirty_bitmap) { ret = -errno; goto fail; @@ -548,7 +549,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, bdrv_dirty_bitmap_unlock(bmds->dirty_bitmap); blk = g_new(BlkMigBlock, 1); - blk->buf = g_malloc(BLOCK_SIZE); + blk->buf = g_malloc(BLK_MIG_BLOCK_SIZE); blk->bmds = bmds; blk->sector = sector; blk->nr_sectors = nr_sectors; @@ -770,7 +771,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque) /* control the rate of transfer */ blk_mig_lock(); - while (block_mig_state.read_done * BLOCK_SIZE < + while (block_mig_state.read_done * BLK_MIG_BLOCK_SIZE < qemu_file_get_rate_limit(f) && block_mig_state.submitted < MAX_PARALLEL_IO && (block_mig_state.submitted + block_mig_state.read_done) < @@ -874,13 +875,13 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, qemu_mutex_unlock_iothread(); blk_mig_lock(); - pending += block_mig_state.submitted * BLOCK_SIZE + - block_mig_state.read_done * BLOCK_SIZE; + pending += block_mig_state.submitted * BLK_MIG_BLOCK_SIZE + + block_mig_state.read_done * BLK_MIG_BLOCK_SIZE; blk_mig_unlock(); /* Report at least one block pending during bulk phase */ if (pending <= max_size && !block_mig_state.bulk_completed) { - pending = max_size + BLOCK_SIZE; + pending = max_size + BLK_MIG_BLOCK_SIZE; } DPRINTF("Enter save live pending %" PRIu64 "\n", pending); @@ -901,7 +902,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) int nr_sectors; int ret; BlockDriverInfo bdi; - int cluster_size = BLOCK_SIZE; + int cluster_size = BLK_MIG_BLOCK_SIZE; do { addr = qemu_get_be64(f); @@ -939,11 +940,11 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) ret = bdrv_get_info(blk_bs(blk), &bdi); if (ret == 0 && bdi.cluster_size > 0 && - bdi.cluster_size <= BLOCK_SIZE && - BLOCK_SIZE % bdi.cluster_size == 0) { + bdi.cluster_size <= BLK_MIG_BLOCK_SIZE && + BLK_MIG_BLOCK_SIZE % bdi.cluster_size == 0) { cluster_size = bdi.cluster_size; } else { - cluster_size = BLOCK_SIZE; + cluster_size = BLK_MIG_BLOCK_SIZE; } } @@ -962,14 +963,14 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) int64_t cur_addr; uint8_t *cur_buf; - buf = g_malloc(BLOCK_SIZE); - qemu_get_buffer(f, buf, BLOCK_SIZE); - for (i = 0; i < BLOCK_SIZE / cluster_size; i++) { + buf = g_malloc(BLK_MIG_BLOCK_SIZE); + qemu_get_buffer(f, buf, BLK_MIG_BLOCK_SIZE); + for (i = 0; i < BLK_MIG_BLOCK_SIZE / cluster_size; i++) { cur_addr = addr * BDRV_SECTOR_SIZE + i * cluster_size; cur_buf = buf + i * cluster_size; if ((!block_mig_state.zero_blocks || - cluster_size < BLOCK_SIZE) && + cluster_size < BLK_MIG_BLOCK_SIZE) && buffer_is_zero(cur_buf, cluster_size)) { ret = blk_pwrite_zeroes(blk, cur_addr, cluster_size, diff --git a/migration/colo.c b/migration/colo.c index 2c88aa57a2..93c5a452fb 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -664,13 +664,138 @@ void migrate_start_colo_process(MigrationState *s) qemu_mutex_lock_iothread(); } -static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request, - Error **errp) +static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, + QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp) +{ + uint64_t total_size; + uint64_t value; + Error *local_err = NULL; + int ret; + + qemu_mutex_lock_iothread(); + vm_stop_force_state(RUN_STATE_COLO); + trace_colo_vm_state_change("run", "stop"); + qemu_mutex_unlock_iothread(); + + /* FIXME: This is unnecessary for periodic checkpoint mode */ + colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + colo_receive_check_message(mis->from_src_file, + COLO_MESSAGE_VMSTATE_SEND, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + qemu_mutex_lock_iothread(); + cpu_synchronize_all_pre_loadvm(); + ret = qemu_loadvm_state_main(mis->from_src_file, mis); + qemu_mutex_unlock_iothread(); + + if (ret < 0) { + error_setg(errp, "Load VM's live state (ram) error"); + return; + } + + value = colo_receive_message_value(mis->from_src_file, + COLO_MESSAGE_VMSTATE_SIZE, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* + * Read VM device state data into channel buffer, + * It's better to re-use the memory allocated. + * Here we need to handle the channel buffer directly. + */ + if (value > bioc->capacity) { + bioc->capacity = value; + bioc->data = g_realloc(bioc->data, bioc->capacity); + } + total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); + if (total_size != value) { + error_setg(errp, "Got %" PRIu64 " VMState data, less than expected" + " %" PRIu64, total_size, value); + return; + } + bioc->usage = total_size; + qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); + + colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + qemu_mutex_lock_iothread(); + vmstate_loading = true; + ret = qemu_load_device_state(fb); + if (ret < 0) { + error_setg(errp, "COLO: load device state failed"); + qemu_mutex_unlock_iothread(); + return; + } + +#ifdef CONFIG_REPLICATION + replication_get_error_all(&local_err); + if (local_err) { + error_propagate(errp, local_err); + qemu_mutex_unlock_iothread(); + return; + } + + /* discard colo disk buffer */ + replication_do_checkpoint_all(&local_err); + if (local_err) { + error_propagate(errp, local_err); + qemu_mutex_unlock_iothread(); + return; + } +#else + abort(); +#endif + /* Notify all filters of all NIC to do checkpoint */ + colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); + + if (local_err) { + error_propagate(errp, local_err); + qemu_mutex_unlock_iothread(); + return; + } + + vmstate_loading = false; + vm_start(); + trace_colo_vm_state_change("stop", "run"); + qemu_mutex_unlock_iothread(); + + if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { + failover_set_state(FAILOVER_STATUS_RELAUNCH, + FAILOVER_STATUS_NONE); + failover_request_active(NULL); + return; + } + + colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + } +} + +static void colo_wait_handle_message(MigrationIncomingState *mis, + QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp) { COLOMessage msg; Error *local_err = NULL; - msg = colo_receive_message(f, &local_err); + msg = colo_receive_message(mis->from_src_file, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -678,10 +803,9 @@ static void colo_wait_handle_message(QEMUFile *f, int *checkpoint_request, switch (msg) { case COLO_MESSAGE_CHECKPOINT_REQUEST: - *checkpoint_request = 1; + colo_incoming_process_checkpoint(mis, fb, bioc, errp); break; default: - *checkpoint_request = 0; error_setg(errp, "Got unknown COLO message: %d", msg); break; } @@ -692,10 +816,7 @@ void *colo_process_incoming_thread(void *opaque) MigrationIncomingState *mis = opaque; QEMUFile *fb = NULL; QIOChannelBuffer *bioc = NULL; /* Cache incoming device state */ - uint64_t total_size; - uint64_t value; Error *local_err = NULL; - int ret; rcu_register_thread(); qemu_sem_init(&mis->colo_incoming_sem, 0); @@ -749,134 +870,19 @@ void *colo_process_incoming_thread(void *opaque) } while (mis->state == MIGRATION_STATUS_COLO) { - int request = 0; - - colo_wait_handle_message(mis->from_src_file, &request, &local_err); + colo_wait_handle_message(mis, fb, bioc, &local_err); if (local_err) { - goto out; + error_report_err(local_err); + break; } - assert(request); if (failover_get_state() != FAILOVER_STATUS_NONE) { error_report("failover request"); - goto out; - } - - qemu_mutex_lock_iothread(); - vm_stop_force_state(RUN_STATE_COLO); - trace_colo_vm_state_change("run", "stop"); - qemu_mutex_unlock_iothread(); - - /* FIXME: This is unnecessary for periodic checkpoint mode */ - colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY, - &local_err); - if (local_err) { - goto out; - } - - colo_receive_check_message(mis->from_src_file, - COLO_MESSAGE_VMSTATE_SEND, &local_err); - if (local_err) { - goto out; - } - - qemu_mutex_lock_iothread(); - cpu_synchronize_all_pre_loadvm(); - ret = qemu_loadvm_state_main(mis->from_src_file, mis); - qemu_mutex_unlock_iothread(); - - if (ret < 0) { - error_report("Load VM's live state (ram) error"); - goto out; - } - - value = colo_receive_message_value(mis->from_src_file, - COLO_MESSAGE_VMSTATE_SIZE, &local_err); - if (local_err) { - goto out; - } - - /* - * Read VM device state data into channel buffer, - * It's better to re-use the memory allocated. - * Here we need to handle the channel buffer directly. - */ - if (value > bioc->capacity) { - bioc->capacity = value; - bioc->data = g_realloc(bioc->data, bioc->capacity); - } - total_size = qemu_get_buffer(mis->from_src_file, bioc->data, value); - if (total_size != value) { - error_report("Got %" PRIu64 " VMState data, less than expected" - " %" PRIu64, total_size, value); - goto out; - } - bioc->usage = total_size; - qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); - - colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_RECEIVED, - &local_err); - if (local_err) { - goto out; - } - - qemu_mutex_lock_iothread(); - vmstate_loading = true; - ret = qemu_load_device_state(fb); - if (ret < 0) { - error_report("COLO: load device state failed"); - qemu_mutex_unlock_iothread(); - goto out; - } - -#ifdef CONFIG_REPLICATION - replication_get_error_all(&local_err); - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } - - /* discard colo disk buffer */ - replication_do_checkpoint_all(&local_err); - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } -#else - abort(); -#endif - /* Notify all filters of all NIC to do checkpoint */ - colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err); - - if (local_err) { - qemu_mutex_unlock_iothread(); - goto out; - } - - vmstate_loading = false; - vm_start(); - trace_colo_vm_state_change("stop", "run"); - qemu_mutex_unlock_iothread(); - - if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { - failover_set_state(FAILOVER_STATUS_RELAUNCH, - FAILOVER_STATUS_NONE); - failover_request_active(NULL); - goto out; - } - - colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, - &local_err); - if (local_err) { - goto out; + break; } } out: vmstate_loading = false; - /* Throw the unreported error message after exited from loop */ - if (local_err) { - error_report_err(local_err); - } /* * There are only two reasons we can get here, some error happened diff --git a/migration/migration.c b/migration/migration.c index 8fb68795dc..0b2045ccbd 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -88,6 +88,11 @@ /* The delay time (in ms) between two COLO checkpoints */ #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE +/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ +#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 +/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ +#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 /* Background transfer rate for postcopy, 0 means unlimited, note * that page requests can still exceed this limit. @@ -484,11 +489,6 @@ static void process_incoming_migration_co(void *opaque) goto fail; } - if (colo_init_ram_cache() < 0) { - error_report("Init ram cache failed"); - goto fail; - } - qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming", colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE); mis->have_colo_incoming_thread = true; @@ -798,6 +798,12 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->block_incremental = s->parameters.block_incremental; params->has_multifd_channels = true; params->multifd_channels = s->parameters.multifd_channels; + params->has_multifd_compression = true; + params->multifd_compression = s->parameters.multifd_compression; + params->has_multifd_zlib_level = true; + params->multifd_zlib_level = s->parameters.multifd_zlib_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; params->has_xbzrle_cache_size = true; params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; params->has_max_postcopy_bandwidth = true; @@ -865,7 +871,6 @@ bool migration_is_running(int state) case MIGRATION_STATUS_DEVICE: case MIGRATION_STATUS_WAIT_UNPLUG: case MIGRATION_STATUS_CANCELLING: - case MIGRATION_STATUS_COLO: return true; default: @@ -1205,6 +1210,20 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) return false; } + if (params->has_multifd_zlib_level && + (params->multifd_zlib_level > 9)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level", + "is invalid, it should be in the range of 0 to 9"); + return false; + } + + if (params->has_multifd_zstd_level && + (params->multifd_zstd_level > 20)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", + "is invalid, it should be in the range of 0 to 20"); + return false; + } + if (params->has_xbzrle_cache_size && (params->xbzrle_cache_size < qemu_target_page_size() || !is_power_of_2(params->xbzrle_cache_size))) { @@ -1315,6 +1334,9 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_multifd_channels) { dest->multifd_channels = params->multifd_channels; } + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } if (params->has_xbzrle_cache_size) { dest->xbzrle_cache_size = params->xbzrle_cache_size; } @@ -1411,6 +1433,9 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_multifd_channels) { s->parameters.multifd_channels = params->multifd_channels; } + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } if (params->has_xbzrle_cache_size) { s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; xbzrle_cache_resize(params->xbzrle_cache_size, errp); @@ -2236,6 +2261,33 @@ int migrate_multifd_channels(void) return s->parameters.multifd_channels; } +MultiFDCompression migrate_multifd_compression(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.multifd_compression; +} + +int migrate_multifd_zlib_level(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.multifd_zlib_level; +} + +int migrate_multifd_zstd_level(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.multifd_zstd_level; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -3523,6 +3575,15 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("multifd-channels", MigrationState, parameters.multifd_channels, DEFAULT_MIGRATE_MULTIFD_CHANNELS), + DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, + parameters.multifd_compression, + DEFAULT_MIGRATE_MULTIFD_COMPRESSION), + DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, + parameters.multifd_zlib_level, + DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, parameters.xbzrle_cache_size, DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), @@ -3613,6 +3674,9 @@ static void migration_instance_init(Object *obj) params->has_x_checkpoint_delay = true; params->has_block_incremental = true; params->has_multifd_channels = true; + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; + params->has_multifd_zstd_level = true; params->has_xbzrle_cache_size = true; params->has_max_postcopy_bandwidth = true; params->has_max_cpu_throttle = true; diff --git a/migration/migration.h b/migration/migration.h index 8473ddfc88..507284e563 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -300,6 +300,9 @@ bool migrate_auto_converge(void); bool migrate_use_multifd(void); bool migrate_pause_before_switchover(void); int migrate_multifd_channels(void); +MultiFDCompression migrate_multifd_compression(void); +int migrate_multifd_zlib_level(void); +int migrate_multifd_zstd_level(void); int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c new file mode 100644 index 0000000000..ab4ba75d75 --- /dev/null +++ b/migration/multifd-zlib.c @@ -0,0 +1,325 @@ +/* + * Multifd zlib compression implementation + * + * Copyright (c) 2020 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include <zlib.h> +#include "qemu/rcu.h" +#include "exec/target_page.h" +#include "qapi/error.h" +#include "migration.h" +#include "trace.h" +#include "multifd.h" + +struct zlib_data { + /* stream for compression */ + z_stream zs; + /* compressed buffer */ + uint8_t *zbuff; + /* size of compressed buffer */ + uint32_t zbuff_len; +}; + +/* Multifd zlib compression */ + +/** + * zlib_send_setup: setup send side + * + * Setup each channel with zlib compression. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +static int zlib_send_setup(MultiFDSendParams *p, Error **errp) +{ + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + struct zlib_data *z = g_malloc0(sizeof(struct zlib_data)); + z_stream *zs = &z->zs; + + zs->zalloc = Z_NULL; + zs->zfree = Z_NULL; + zs->opaque = Z_NULL; + if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { + g_free(z); + error_setg(errp, "multifd %d: deflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ + z->zbuff_len = page_count * qemu_target_page_size(); + z->zbuff_len *= 2; + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { + deflateEnd(&z->zs); + g_free(z); + error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + return -1; + } + p->data = z; + return 0; +} + +/** + * zlib_send_cleanup: cleanup send side + * + * Close the channel and return memory. + * + * @p: Params for the channel that we are using + */ +static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) +{ + struct zlib_data *z = p->data; + + deflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; + g_free(p->data); + p->data = NULL; +} + +/** + * zlib_send_prepare: prepare date to be able to send + * + * Create a compressed buffer with all the pages that we are going to + * send. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + */ +static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) +{ + struct iovec *iov = p->pages->iov; + struct zlib_data *z = p->data; + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; + uint32_t i; + + for (i = 0; i < used; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + + if (i == used - 1) { + flush = Z_SYNC_FLUSH; + } + + zs->avail_in = iov[i].iov_len; + zs->next_in = iov[i].iov_base; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; + + /* + * Welcome to deflate semantics + * + * We need to loop while: + * - return is Z_OK + * - there are stuff to be compressed + * - there are output space free + */ + do { + ret = deflate(zs, flush); + } while (ret == Z_OK && zs->avail_in && zs->avail_out); + if (ret == Z_OK && zs->avail_in) { + error_setg(errp, "multifd %d: deflate failed to compress all input", + p->id); + return -1; + } + if (ret != Z_OK) { + error_setg(errp, "multifd %d: deflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } + out_size += available - zs->avail_out; + } + p->next_packet_size = out_size; + p->flags |= MULTIFD_FLAG_ZLIB; + + return 0; +} + +/** + * zlib_send_write: do the actual write of the data + * + * Do the actual write of the comprresed buffer. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + * @errp: pointer to an error + */ +static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +{ + struct zlib_data *z = p->data; + + return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, + errp); +} + +/** + * zlib_recv_setup: setup receive side + * + * Create the compressed channel and buffer. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) +{ + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + struct zlib_data *z = g_malloc0(sizeof(struct zlib_data)); + z_stream *zs = &z->zs; + + p->data = z; + zs->zalloc = Z_NULL; + zs->zfree = Z_NULL; + zs->opaque = Z_NULL; + zs->avail_in = 0; + zs->next_in = Z_NULL; + if (inflateInit(zs) != Z_OK) { + error_setg(errp, "multifd %d: inflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ + z->zbuff_len = page_count * qemu_target_page_size(); + /* We know compression "could" use more space */ + z->zbuff_len *= 2; + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { + inflateEnd(zs); + error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + return -1; + } + return 0; +} + +/** + * zlib_recv_cleanup: setup receive side + * + * For no compression this function does nothing. + * + * @p: Params for the channel that we are using + */ +static void zlib_recv_cleanup(MultiFDRecvParams *p) +{ + struct zlib_data *z = p->data; + + inflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; + g_free(p->data); + p->data = NULL; +} + +/** + * zlib_recv_pages: read the data from the channel into actual pages + * + * Read the compressed buffer, and uncompress it into the actual + * pages. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + * @errp: pointer to an error + */ +static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) +{ + struct zlib_data *z = p->data; + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ + uint32_t out_size = zs->total_out; + uint32_t expected_size = used * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + int ret; + int i; + + if (flags != MULTIFD_FLAG_ZLIB) { + error_setg(errp, "multifd %d: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZLIB); + return -1; + } + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); + + if (ret != 0) { + return ret; + } + + zs->avail_in = in_size; + zs->next_in = z->zbuff; + + for (i = 0; i < used; i++) { + struct iovec *iov = &p->pages->iov[i]; + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + + if (i == used - 1) { + flush = Z_SYNC_FLUSH; + } + + zs->avail_out = iov->iov_len; + zs->next_out = iov->iov_base; + + /* + * Welcome to inflate semantics + * + * We need to loop while: + * - return is Z_OK + * - there are input available + * - we haven't completed a full page + */ + do { + ret = inflate(zs, flush); + } while (ret == Z_OK && zs->avail_in + && (zs->total_out - start) < iov->iov_len); + if (ret == Z_OK && (zs->total_out - start) < iov->iov_len) { + error_setg(errp, "multifd %d: inflate generated too few output", + p->id); + return -1; + } + if (ret != Z_OK) { + error_setg(errp, "multifd %d: inflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } + } + out_size = zs->total_out - out_size; + if (out_size != expected_size) { + error_setg(errp, "multifd %d: packet size received %d size expected %d", + p->id, out_size, expected_size); + return -1; + } + return 0; +} + +static MultiFDMethods multifd_zlib_ops = { + .send_setup = zlib_send_setup, + .send_cleanup = zlib_send_cleanup, + .send_prepare = zlib_send_prepare, + .send_write = zlib_send_write, + .recv_setup = zlib_recv_setup, + .recv_cleanup = zlib_recv_cleanup, + .recv_pages = zlib_recv_pages +}; + +static void multifd_zlib_register(void) +{ + multifd_register_ops(MULTIFD_COMPRESSION_ZLIB, &multifd_zlib_ops); +} + +migration_init(multifd_zlib_register); diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c new file mode 100644 index 0000000000..693bddf8c9 --- /dev/null +++ b/migration/multifd-zstd.c @@ -0,0 +1,339 @@ +/* + * Multifd zlib compression implementation + * + * Copyright (c) 2020 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include <zstd.h> +#include "qemu/rcu.h" +#include "exec/target_page.h" +#include "qapi/error.h" +#include "migration.h" +#include "trace.h" +#include "multifd.h" + +struct zstd_data { + /* stream for compression */ + ZSTD_CStream *zcs; + /* stream for decompression */ + ZSTD_DStream *zds; + /* buffers */ + ZSTD_inBuffer in; + ZSTD_outBuffer out; + /* compressed buffer */ + uint8_t *zbuff; + /* size of compressed buffer */ + uint32_t zbuff_len; +}; + +/* Multifd zstd compression */ + +/** + * zstd_send_setup: setup send side + * + * Setup each channel with zstd compression. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +static int zstd_send_setup(MultiFDSendParams *p, Error **errp) +{ + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + struct zstd_data *z = g_new0(struct zstd_data, 1); + int res; + + p->data = z; + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); + error_setg(errp, "multifd %d: zstd createCStream failed", p->id); + return -1; + } + + res = ZSTD_initCStream(z->zcs, migrate_multifd_zstd_level()); + if (ZSTD_isError(res)) { + ZSTD_freeCStream(z->zcs); + g_free(z); + error_setg(errp, "multifd %d: initCStream failed with error %s", + p->id, ZSTD_getErrorName(res)); + return -1; + } + /* We will never have more than page_count pages */ + z->zbuff_len = page_count * qemu_target_page_size(); + z->zbuff_len *= 2; + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { + ZSTD_freeCStream(z->zcs); + g_free(z); + error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + return -1; + } + return 0; +} + +/** + * zstd_send_cleanup: cleanup send side + * + * Close the channel and return memory. + * + * @p: Params for the channel that we are using + */ +static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) +{ + struct zstd_data *z = p->data; + + ZSTD_freeCStream(z->zcs); + z->zcs = NULL; + g_free(z->zbuff); + z->zbuff = NULL; + g_free(p->data); + p->data = NULL; +} + +/** + * zstd_send_prepare: prepare date to be able to send + * + * Create a compressed buffer with all the pages that we are going to + * send. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + */ +static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) +{ + struct iovec *iov = p->pages->iov; + struct zstd_data *z = p->data; + int ret; + uint32_t i; + + z->out.dst = z->zbuff; + z->out.size = z->zbuff_len; + z->out.pos = 0; + + for (i = 0; i < used; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + + if (i == used - 1) { + flush = ZSTD_e_flush; + } + z->in.src = iov[i].iov_base; + z->in.size = iov[i].iov_len; + z->in.pos = 0; + + /* + * Welcome to compressStream2 semantics + * + * We need to loop while: + * - return is > 0 + * - there is input available + * - there is output space free + */ + do { + ret = ZSTD_compressStream2(z->zcs, &z->out, &z->in, flush); + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.size - z->out.pos > 0)); + if (ret > 0 && (z->in.size - z->in.pos > 0)) { + error_setg(errp, "multifd %d: compressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { + error_setg(errp, "multifd %d: compressStream error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } + } + p->next_packet_size = z->out.pos; + p->flags |= MULTIFD_FLAG_ZSTD; + + return 0; +} + +/** + * zstd_send_write: do the actual write of the data + * + * Do the actual write of the comprresed buffer. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + * @errp: pointer to an error + */ +static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +{ + struct zstd_data *z = p->data; + + return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, + errp); +} + +/** + * zstd_recv_setup: setup receive side + * + * Create the compressed channel and buffer. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) +{ + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + struct zstd_data *z = g_new0(struct zstd_data, 1); + int ret; + + p->data = z; + z->zds = ZSTD_createDStream(); + if (!z->zds) { + g_free(z); + error_setg(errp, "multifd %d: zstd createDStream failed", p->id); + return -1; + } + + ret = ZSTD_initDStream(z->zds); + if (ZSTD_isError(ret)) { + ZSTD_freeDStream(z->zds); + g_free(z); + error_setg(errp, "multifd %d: initDStream failed with error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } + + /* We will never have more than page_count pages */ + z->zbuff_len = page_count * qemu_target_page_size(); + /* We know compression "could" use more space */ + z->zbuff_len *= 2; + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { + ZSTD_freeDStream(z->zds); + g_free(z); + error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + return -1; + } + return 0; +} + +/** + * zstd_recv_cleanup: setup receive side + * + * For no compression this function does nothing. + * + * @p: Params for the channel that we are using + */ +static void zstd_recv_cleanup(MultiFDRecvParams *p) +{ + struct zstd_data *z = p->data; + + ZSTD_freeDStream(z->zds); + z->zds = NULL; + g_free(z->zbuff); + z->zbuff = NULL; + g_free(p->data); + p->data = NULL; +} + +/** + * zstd_recv_pages: read the data from the channel into actual pages + * + * Read the compressed buffer, and uncompress it into the actual + * pages. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + * @errp: pointer to an error + */ +static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) +{ + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; + uint32_t expected_size = used * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + struct zstd_data *z = p->data; + int ret; + int i; + + if (flags != MULTIFD_FLAG_ZSTD) { + error_setg(errp, "multifd %d: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZSTD); + return -1; + } + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); + + if (ret != 0) { + return ret; + } + + z->in.src = z->zbuff; + z->in.size = in_size; + z->in.pos = 0; + + for (i = 0; i < used; i++) { + struct iovec *iov = &p->pages->iov[i]; + + z->out.dst = iov->iov_base; + z->out.size = iov->iov_len; + z->out.pos = 0; + + /* + * Welcome to decompressStream semantics + * + * We need to loop while: + * - return is > 0 + * - there is input available + * - we haven't put out a full page + */ + do { + ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.pos < iov->iov_len)); + if (ret > 0 && (z->out.pos < iov->iov_len)) { + error_setg(errp, "multifd %d: decompressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { + error_setg(errp, "multifd %d: decompressStream returned %s", + p->id, ZSTD_getErrorName(ret)); + return ret; + } + out_size += z->out.pos; + } + if (out_size != expected_size) { + error_setg(errp, "multifd %d: packet size received %d size expected %d", + p->id, out_size, expected_size); + return -1; + } + return 0; +} + +static MultiFDMethods multifd_zstd_ops = { + .send_setup = zstd_send_setup, + .send_cleanup = zstd_send_cleanup, + .send_prepare = zstd_send_prepare, + .send_write = zstd_send_write, + .recv_setup = zstd_recv_setup, + .recv_cleanup = zstd_recv_cleanup, + .recv_pages = zstd_recv_pages +}; + +static void multifd_zstd_register(void) +{ + multifd_register_ops(MULTIFD_COMPRESSION_ZSTD, &multifd_zstd_ops); +} + +migration_init(multifd_zstd_register); diff --git a/migration/multifd.c b/migration/multifd.c index b3e8ae9bcc..cb6a4a3ab8 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -38,6 +38,140 @@ typedef struct { uint64_t unused2[4]; /* Reserved for future use */ } __attribute__((packed)) MultiFDInit_t; +/* Multifd without compression */ + +/** + * nocomp_send_setup: setup send side + * + * For no compression this function does nothing. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) +{ + return 0; +} + +/** + * nocomp_send_cleanup: cleanup send side + * + * For no compression this function does nothing. + * + * @p: Params for the channel that we are using + */ +static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) +{ + return; +} + +/** + * nocomp_send_prepare: prepare date to be able to send + * + * For no compression we just have to calculate the size of the + * packet. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + * @errp: pointer to an error + */ +static int nocomp_send_prepare(MultiFDSendParams *p, uint32_t used, + Error **errp) +{ + p->next_packet_size = used * qemu_target_page_size(); + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; +} + +/** + * nocomp_send_write: do the actual write of the data + * + * For no compression we just have to write the data. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + * @errp: pointer to an error + */ +static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +{ + return qio_channel_writev_all(p->c, p->pages->iov, used, errp); +} + +/** + * nocomp_recv_setup: setup receive side + * + * For no compression this function does nothing. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) +{ + return 0; +} + +/** + * nocomp_recv_cleanup: setup receive side + * + * For no compression this function does nothing. + * + * @p: Params for the channel that we are using + */ +static void nocomp_recv_cleanup(MultiFDRecvParams *p) +{ +} + +/** + * nocomp_recv_pages: read the data from the channel into actual pages + * + * For no compression we just need to read things into the correct place. + * + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using + * @used: number of pages used + * @errp: pointer to an error + */ +static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) +{ + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + + if (flags != MULTIFD_FLAG_NOCOMP) { + error_setg(errp, "multifd %d: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } + return qio_channel_readv_all(p->c, p->pages->iov, used, errp); +} + +static MultiFDMethods multifd_nocomp_ops = { + .send_setup = nocomp_send_setup, + .send_cleanup = nocomp_send_cleanup, + .send_prepare = nocomp_send_prepare, + .send_write = nocomp_send_write, + .recv_setup = nocomp_recv_setup, + .recv_cleanup = nocomp_recv_cleanup, + .recv_pages = nocomp_recv_pages +}; + +static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = { + [MULTIFD_COMPRESSION_NONE] = &multifd_nocomp_ops, +}; + +void multifd_register_ops(int method, MultiFDMethods *ops) +{ + assert(0 < method && method < MULTIFD_COMPRESSION__MAX); + multifd_ops[method] = ops; +} + static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) { MultiFDInit_t msg = {}; @@ -246,6 +380,8 @@ struct { * We will use atomic operations. Only valid values are 0 and 1. */ int exiting; + /* multifd ops */ + MultiFDMethods *ops; } *multifd_send_state; /* @@ -397,6 +533,7 @@ void multifd_save_cleanup(void) } for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; + Error *local_err = NULL; socket_send_channel_destroy(p->c); p->c = NULL; @@ -410,6 +547,10 @@ void multifd_save_cleanup(void) p->packet_len = 0; g_free(p->packet); p->packet = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); + } } qemu_sem_destroy(&multifd_send_state->channels_ready); g_free(multifd_send_state->params); @@ -494,7 +635,14 @@ static void *multifd_send_thread(void *opaque) uint64_t packet_num = p->packet_num; flags = p->flags; - p->next_packet_size = used * qemu_target_page_size(); + if (used) { + ret = multifd_send_state->ops->send_prepare(p, used, + &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); + break; + } + } multifd_send_fill_packet(p); p->flags = 0; p->num_packets++; @@ -513,8 +661,7 @@ static void *multifd_send_thread(void *opaque) } if (used) { - ret = qio_channel_writev_all(p->c, p->pages->iov, - used, &local_err); + ret = multifd_send_state->ops->send_write(p, used, &local_err); if (ret != 0) { break; } @@ -604,6 +751,7 @@ int multifd_save_setup(Error **errp) multifd_send_state->pages = multifd_pages_init(page_count); qemu_sem_init(&multifd_send_state->channels_ready, 0); atomic_set(&multifd_send_state->exiting, 0); + multifd_send_state->ops = multifd_ops[migrate_multifd_compression()]; for (i = 0; i < thread_count; i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; @@ -623,6 +771,18 @@ int multifd_save_setup(Error **errp) p->name = g_strdup_printf("multifdsend_%d", i); socket_send_channel_create(multifd_new_send_channel_async, p); } + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + Error *local_err = NULL; + int ret; + + ret = multifd_send_state->ops->send_setup(p, &local_err); + if (ret) { + error_propagate(errp, local_err); + return ret; + } + } return 0; } @@ -634,6 +794,8 @@ struct { QemuSemaphore sem_sync; /* global number of generated multifd packets */ uint64_t packet_num; + /* multifd ops */ + MultiFDMethods *ops; } *multifd_recv_state; static void multifd_recv_terminate_threads(Error *err) @@ -673,7 +835,6 @@ static void multifd_recv_terminate_threads(Error *err) int multifd_load_cleanup(Error **errp) { int i; - int ret = 0; if (!migrate_use_multifd()) { return 0; @@ -706,6 +867,7 @@ int multifd_load_cleanup(Error **errp) p->packet_len = 0; g_free(p->packet); p->packet = NULL; + multifd_recv_state->ops->recv_cleanup(p); } qemu_sem_destroy(&multifd_recv_state->sem_sync); g_free(multifd_recv_state->params); @@ -713,7 +875,7 @@ int multifd_load_cleanup(Error **errp) g_free(multifd_recv_state); multifd_recv_state = NULL; - return ret; + return 0; } void multifd_recv_sync_main(void) @@ -778,6 +940,8 @@ static void *multifd_recv_thread(void *opaque) used = p->pages->used; flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; trace_multifd_recv(p->id, p->packet_num, used, flags, p->next_packet_size); p->num_packets++; @@ -785,8 +949,7 @@ static void *multifd_recv_thread(void *opaque) qemu_mutex_unlock(&p->mutex); if (used) { - ret = qio_channel_readv_all(p->c, p->pages->iov, - used, &local_err); + ret = multifd_recv_state->ops->recv_pages(p, used, &local_err); if (ret != 0) { break; } @@ -825,6 +988,7 @@ int multifd_load_setup(Error **errp) multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); atomic_set(&multifd_recv_state->count, 0); qemu_sem_init(&multifd_recv_state->sem_sync, 0); + multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()]; for (i = 0; i < thread_count; i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; @@ -839,6 +1003,18 @@ int multifd_load_setup(Error **errp) p->packet = g_malloc0(p->packet_len); p->name = g_strdup_printf("multifdrecv_%d", i); } + + for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + Error *local_err = NULL; + int ret; + + ret = multifd_recv_state->ops->recv_setup(p, &local_err); + if (ret) { + error_propagate(errp, local_err); + return ret; + } + } return 0; } @@ -896,4 +1072,3 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) return atomic_read(&multifd_recv_state->count) == migrate_multifd_channels(); } - diff --git a/migration/multifd.h b/migration/multifd.h index d8b0205977..448a03d89a 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -23,8 +23,16 @@ void multifd_recv_sync_main(void); void multifd_send_sync_main(QEMUFile *f); int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); +/* Multifd Compression flags */ #define MULTIFD_FLAG_SYNC (1 << 0) +/* We reserve 3 bits for compression methods */ +#define MULTIFD_FLAG_COMPRESSION_MASK (7 << 1) +/* we need to be compatible. Before compression value was 0 */ +#define MULTIFD_FLAG_NOCOMP (0 << 1) +#define MULTIFD_FLAG_ZLIB (1 << 1) +#define MULTIFD_FLAG_ZSTD (2 << 1) + /* This value needs to be a multiple of qemu_target_page_size() */ #define MULTIFD_PACKET_SIZE (512 * 1024) @@ -96,6 +104,8 @@ typedef struct { uint64_t num_pages; /* syncs main thread and channels */ QemuSemaphore sem_sync; + /* used for compression methods */ + void *data; } MultiFDSendParams; typedef struct { @@ -133,7 +143,28 @@ typedef struct { uint64_t num_pages; /* syncs main thread and channels */ QemuSemaphore sem_sync; + /* used for de-compression methods */ + void *data; } MultiFDRecvParams; +typedef struct { + /* Setup for sending side */ + int (*send_setup)(MultiFDSendParams *p, Error **errp); + /* Cleanup for sending side */ + void (*send_cleanup)(MultiFDSendParams *p, Error **errp); + /* Prepare the send packet */ + int (*send_prepare)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Write the send packet */ + int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Setup for receiving side */ + int (*recv_setup)(MultiFDRecvParams *p, Error **errp); + /* Cleanup for receiving side */ + void (*recv_cleanup)(MultiFDRecvParams *p); + /* Read all pages */ + int (*recv_pages)(MultiFDRecvParams *p, uint32_t used, Error **errp); +} MultiFDMethods; + +void multifd_register_ops(int method, MultiFDMethods *ops); + #endif diff --git a/migration/ram.c b/migration/ram.c index ed23ed1c7c..0ef68798d2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -28,7 +28,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include <zlib.h> #include "qemu/cutils.h" #include "qemu/bitops.h" #include "qemu/bitmap.h" @@ -43,6 +42,7 @@ #include "page_cache.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "qapi/qapi-types-migration.h" #include "qapi/qapi-events-migration.h" #include "qapi/qmp/qerror.h" #include "trace.h" diff --git a/migration/savevm.c b/migration/savevm.c index 1d4220ece8..c00a6807d9 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -665,6 +665,7 @@ void dump_vmstate_json_to_file(FILE *out_file) } fprintf(out_file, "\n}\n"); fclose(out_file); + g_slist_free(list); } static uint32_t calculate_new_instance_id(const char *idstr) diff --git a/migration/vmstate.c b/migration/vmstate.c index 7dd8ef66c6..bafa890384 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -362,7 +362,6 @@ int vmstate_save_state_v(QEMUFile *f, const VMStateDescription *vmsd, } for (i = 0; i < n_elems; i++) { void *curr_elem = first_elem + size * i; - ret = 0; vmsd_desc_field_start(vmsd, vmdesc_loop, field, i, n_elems); old_offset = qemu_ftell_fast(f); |