diff options
Diffstat (limited to 'migration/ram.c')
| -rw-r--r-- | migration/ram.c | 302 |
1 files changed, 167 insertions, 135 deletions
diff --git a/migration/ram.c b/migration/ram.c index 2f5ce4d60b..c844151ee9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -59,6 +59,7 @@ #include "qemu/iov.h" #include "multifd.h" #include "sysemu/runstate.h" +#include "rdma.h" #include "options.h" #include "sysemu/dirtylimit.h" #include "sysemu/kvm.h" @@ -88,7 +89,7 @@ #define RAM_SAVE_FLAG_EOS 0x10 #define RAM_SAVE_FLAG_CONTINUE 0x20 #define RAM_SAVE_FLAG_XBZRLE 0x40 -/* 0x80 is reserved in qemu-file.h for RAM_SAVE_FLAG_HOOK */ +/* 0x80 is reserved in rdma.h for RAM_SAVE_FLAG_HOOK */ #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100 #define RAM_SAVE_FLAG_MULTIFD_FLUSH 0x200 /* We can't use any flag that is bigger than 0x200 */ @@ -569,7 +570,6 @@ void mig_throttle_counter_reset(void) /** * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache * - * @rs: current RAM state * @current_addr: address for the zero page * * Update the xbzrle cache to reflect a page that's been sent as all 0. @@ -578,7 +578,7 @@ void mig_throttle_counter_reset(void) * As a bonus, if the page wasn't in the cache it gets added so that * when a small write is made into the 0'd page it gets XBZRLE sent. */ -static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) +static void xbzrle_cache_zero_page(ram_addr_t current_addr) { /* We don't care if this fails to allocate a new cache page * as long as it updated an old one */ @@ -1138,50 +1138,45 @@ void ram_release_page(const char *rbname, uint64_t offset) } /** - * save_zero_page_to_file: send the zero page to the file + * save_zero_page: send the zero page to the stream * - * Returns the size of data written to the file, 0 means the page is not - * a zero page + * Returns the number of pages written. * + * @rs: current RAM state * @pss: current PSS channel * @block: block that contains the page we want to send * @offset: offset inside the block for the page */ -static int save_zero_page_to_file(PageSearchStatus *pss, QEMUFile *file, - RAMBlock *block, ram_addr_t offset) +static int save_zero_page(RAMState *rs, PageSearchStatus *pss, RAMBlock *block, + ram_addr_t offset) { uint8_t *p = block->host + offset; + QEMUFile *file = pss->pss_channel; int len = 0; - if (buffer_is_zero(p, TARGET_PAGE_SIZE)) { - len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO); - qemu_put_byte(file, 0); - len += 1; - ram_release_page(block->idstr, offset); + if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) { + return 0; } - return len; -} -/** - * save_zero_page: send the zero page to the stream - * - * Returns the number of pages written. - * - * @pss: current PSS channel - * @block: block that contains the page we want to send - * @offset: offset inside the block for the page - */ -static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, - ram_addr_t offset) -{ - int len = save_zero_page_to_file(pss, f, block, offset); + len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO); + qemu_put_byte(file, 0); + len += 1; + ram_release_page(block->idstr, offset); - if (len) { - stat64_add(&mig_stats.zero_pages, 1); - ram_transferred_add(len); - return 1; + stat64_add(&mig_stats.zero_pages, 1); + ram_transferred_add(len); + + /* + * Must let xbzrle know, otherwise a previous (now 0'd) cached + * page would be stale. + */ + if (rs->xbzrle_started) { + XBZRLE_cache_lock(); + xbzrle_cache_zero_page(block->offset + offset); + XBZRLE_cache_unlock(); } - return -1; + + return len; } /* @@ -1196,8 +1191,8 @@ static bool control_save_page(PageSearchStatus *pss, RAMBlock *block, { int ret; - ret = ram_control_save_page(pss->pss_channel, block->offset, offset, - TARGET_PAGE_SIZE); + ret = rdma_control_save_page(pss->pss_channel, block->offset, offset, + TARGET_PAGE_SIZE); if (ret == RAM_SAVE_CONTROL_NOT_SUPP) { return false; } @@ -1395,7 +1390,8 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) pss->page = 0; pss->block = QLIST_NEXT_RCU(pss->block, next); if (!pss->block) { - if (!migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && + !migrate_multifd_flush_after_each_section()) { QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; int ret = multifd_send_sync_main(f); if (ret < 0) { @@ -2137,17 +2133,8 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) return 1; } - res = save_zero_page(pss, pss->pss_channel, block, offset); - if (res > 0) { - /* Must let xbzrle know, otherwise a previous (now 0'd) cached - * page would be stale - */ - if (rs->xbzrle_started) { - XBZRLE_cache_lock(); - xbzrle_cache_zero_page(rs, block->offset + offset); - XBZRLE_cache_unlock(); - } - return res; + if (save_zero_page(rs, pss, block, offset)) { + return 1; } /* @@ -2891,8 +2878,6 @@ static void migration_bitmap_clear_discarded_pages(RAMState *rs) static void ram_init_bitmaps(RAMState *rs) { - /* For memory_global_dirty_log_start below. */ - qemu_mutex_lock_iothread(); qemu_mutex_lock_ramlist(); WITH_RCU_READ_LOCK_GUARD() { @@ -2904,7 +2889,6 @@ static void ram_init_bitmaps(RAMState *rs) } } qemu_mutex_unlock_ramlist(); - qemu_mutex_unlock_iothread(); /* * After an eventual first bitmap sync, fixup the initial bitmap @@ -3062,17 +3046,27 @@ static int ram_save_setup(QEMUFile *f, void *opaque) } } - ram_control_before_iterate(f, RAM_CONTROL_SETUP); - ram_control_after_iterate(f, RAM_CONTROL_SETUP); + ret = rdma_registration_start(f, RAM_CONTROL_SETUP); + if (ret < 0) { + qemu_file_set_error(f, ret); + } + + ret = rdma_registration_stop(f, RAM_CONTROL_SETUP); + if (ret < 0) { + qemu_file_set_error(f, ret); + } migration_ops = g_malloc0(sizeof(MigrationOps)); migration_ops->ram_save_target_page = ram_save_target_page_legacy; + + qemu_mutex_unlock_iothread(); ret = multifd_send_sync_main(f); + qemu_mutex_lock_iothread(); if (ret < 0) { return ret; } - if (!migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) { qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); } @@ -3122,7 +3116,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) /* Read version before ram_list.blocks */ smp_rmb(); - ram_control_before_iterate(f, RAM_CONTROL_ROUND); + ret = rdma_registration_start(f, RAM_CONTROL_ROUND); + if (ret < 0) { + qemu_file_set_error(f, ret); + } t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); i = 0; @@ -3179,12 +3176,15 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) * Must occur before EOS (or any QEMUFile operation) * because of RDMA protocol. */ - ram_control_after_iterate(f, RAM_CONTROL_ROUND); + ret = rdma_registration_stop(f, RAM_CONTROL_ROUND); + if (ret < 0) { + qemu_file_set_error(f, ret); + } out: if (ret >= 0 && migration_is_setup_or_active(migrate_get_current()->state)) { - if (migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && migrate_multifd_flush_after_each_section()) { ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); if (ret < 0) { return ret; @@ -3227,7 +3227,10 @@ static int ram_save_complete(QEMUFile *f, void *opaque) migration_bitmap_sync_precopy(rs, true); } - ram_control_before_iterate(f, RAM_CONTROL_FINISH); + ret = rdma_registration_start(f, RAM_CONTROL_FINISH); + if (ret < 0) { + qemu_file_set_error(f, ret); + } /* try transferring iterative blocks of memory */ @@ -3249,7 +3252,11 @@ static int ram_save_complete(QEMUFile *f, void *opaque) qemu_mutex_unlock(&rs->bitmap_mutex); ram_flush_compressed_data(rs); - ram_control_after_iterate(f, RAM_CONTROL_FINISH); + + int ret = rdma_registration_stop(f, RAM_CONTROL_FINISH); + if (ret < 0) { + qemu_file_set_error(f, ret); + } } if (ret < 0) { @@ -3261,7 +3268,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return ret; } - if (!migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && !migrate_multifd_flush_after_each_section()) { qemu_put_be64(f, RAM_SAVE_FLAG_MULTIFD_FLUSH); } qemu_put_be64(f, RAM_SAVE_FLAG_EOS); @@ -3768,7 +3775,8 @@ int ram_load_postcopy(QEMUFile *f, int channel) break; case RAM_SAVE_FLAG_EOS: /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && + migrate_multifd_flush_after_each_section()) { multifd_recv_sync_main(); } break; @@ -3861,6 +3869,85 @@ void colo_flush_ram_cache(void) trace_colo_flush_ram_cache_end(); } +static int parse_ramblock(QEMUFile *f, RAMBlock *block, ram_addr_t length) +{ + int ret = 0; + /* ADVISE is earlier, it shows the source has the postcopy capability on */ + bool postcopy_advised = migration_incoming_postcopy_advised(); + + assert(block); + + if (!qemu_ram_is_migratable(block)) { + error_report("block %s should not be migrated !", block->idstr); + return -EINVAL; + } + + if (length != block->used_length) { + Error *local_err = NULL; + + ret = qemu_ram_resize(block, length, &local_err); + if (local_err) { + error_report_err(local_err); + } + } + /* For postcopy we need to check hugepage sizes match */ + if (postcopy_advised && migrate_postcopy_ram() && + block->page_size != qemu_host_page_size) { + uint64_t remote_page_size = qemu_get_be64(f); + if (remote_page_size != block->page_size) { + error_report("Mismatched RAM page size %s " + "(local) %zd != %" PRId64, block->idstr, + block->page_size, remote_page_size); + ret = -EINVAL; + } + } + if (migrate_ignore_shared()) { + hwaddr addr = qemu_get_be64(f); + if (migrate_ram_is_ignored(block) && + block->mr->addr != addr) { + error_report("Mismatched GPAs for block %s " + "%" PRId64 "!= %" PRId64, block->idstr, + (uint64_t)addr, (uint64_t)block->mr->addr); + ret = -EINVAL; + } + } + ret = rdma_block_notification_handle(f, block->idstr); + if (ret < 0) { + qemu_file_set_error(f, ret); + } + + return ret; +} + +static int parse_ramblocks(QEMUFile *f, ram_addr_t total_ram_bytes) +{ + int ret = 0; + + /* Synchronize RAM block list */ + while (!ret && total_ram_bytes) { + RAMBlock *block; + char id[256]; + ram_addr_t length; + int len = qemu_get_byte(f); + + qemu_get_buffer(f, (uint8_t *)id, len); + id[len] = 0; + length = qemu_get_be64(f); + + block = qemu_ram_block_by_name(id); + if (block) { + ret = parse_ramblock(f, block, length); + } else { + error_report("Unknown ramblock \"%s\", cannot accept " + "migration", id); + ret = -EINVAL; + } + total_ram_bytes -= length; + } + + return ret; +} + /** * ram_load_precopy: load pages in precopy case * @@ -3875,14 +3962,13 @@ static int ram_load_precopy(QEMUFile *f) { MigrationIncomingState *mis = migration_incoming_get_current(); int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0; - /* ADVISE is earlier, it shows the source has the postcopy capability on */ - bool postcopy_advised = migration_incoming_postcopy_advised(); + if (!migrate_compress()) { invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE; } while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { - ram_addr_t addr, total_ram_bytes; + ram_addr_t addr; void *host = NULL, *host_bak = NULL; uint8_t ch; @@ -3953,65 +4039,7 @@ static int ram_load_precopy(QEMUFile *f) switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { case RAM_SAVE_FLAG_MEM_SIZE: - /* Synchronize RAM block list */ - total_ram_bytes = addr; - while (!ret && total_ram_bytes) { - RAMBlock *block; - char id[256]; - ram_addr_t length; - - len = qemu_get_byte(f); - qemu_get_buffer(f, (uint8_t *)id, len); - id[len] = 0; - length = qemu_get_be64(f); - - block = qemu_ram_block_by_name(id); - if (block && !qemu_ram_is_migratable(block)) { - error_report("block %s should not be migrated !", id); - ret = -EINVAL; - } else if (block) { - if (length != block->used_length) { - Error *local_err = NULL; - - ret = qemu_ram_resize(block, length, - &local_err); - if (local_err) { - error_report_err(local_err); - } - } - /* For postcopy we need to check hugepage sizes match */ - if (postcopy_advised && migrate_postcopy_ram() && - block->page_size != qemu_host_page_size) { - uint64_t remote_page_size = qemu_get_be64(f); - if (remote_page_size != block->page_size) { - error_report("Mismatched RAM page size %s " - "(local) %zd != %" PRId64, - id, block->page_size, - remote_page_size); - ret = -EINVAL; - } - } - if (migrate_ignore_shared()) { - hwaddr addr2 = qemu_get_be64(f); - if (migrate_ram_is_ignored(block) && - block->mr->addr != addr2) { - error_report("Mismatched GPAs for block %s " - "%" PRId64 "!= %" PRId64, - id, (uint64_t)addr2, - (uint64_t)block->mr->addr); - ret = -EINVAL; - } - } - ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG, - block->idstr); - } else { - error_report("Unknown ramblock \"%s\", cannot " - "accept migration", id); - ret = -EINVAL; - } - - total_ram_bytes -= length; - } + ret = parse_ramblocks(f, addr); break; case RAM_SAVE_FLAG_ZERO: @@ -4046,12 +4074,16 @@ static int ram_load_precopy(QEMUFile *f) break; case RAM_SAVE_FLAG_EOS: /* normal exit */ - if (migrate_multifd_flush_after_each_section()) { + if (migrate_multifd() && + migrate_multifd_flush_after_each_section()) { multifd_recv_sync_main(); } break; case RAM_SAVE_FLAG_HOOK: - ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); + ret = rdma_registration_handle(f); + if (ret < 0) { + qemu_file_set_error(f, ret); + } break; default: error_report("Unknown combination of migration flags: 0x%x", flags); @@ -4159,7 +4191,8 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) int ret = -EINVAL; /* from_dst_file is always valid because we're within rp_thread */ QEMUFile *file = s->rp_state.from_dst_file; - unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; + g_autofree unsigned long *le_bitmap = NULL; + unsigned long nbits = block->used_length >> TARGET_PAGE_BITS; uint64_t local_size = DIV_ROUND_UP(nbits, 8); uint64_t size, end_mark; RAMState *rs = ram_state; @@ -4188,8 +4221,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) error_report("%s: ramblock '%s' bitmap size mismatch " "(0x%"PRIx64" != 0x%"PRIx64")", __func__, block->idstr, size, local_size); - ret = -EINVAL; - goto out; + return -EINVAL; } size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size); @@ -4200,15 +4232,13 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) error_report("%s: read bitmap failed for ramblock '%s': %d" " (size 0x%"PRIx64", got: 0x%"PRIx64")", __func__, block->idstr, ret, local_size, size); - ret = -EIO; - goto out; + return -EIO; } if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) { error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIx64, __func__, block->idstr, end_mark); - ret = -EINVAL; - goto out; + return -EINVAL; } /* @@ -4240,10 +4270,7 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) */ migration_rp_kick(s); - ret = 0; -out: - g_free(le_bitmap); - return ret; + return 0; } static int ram_resume_prepare(MigrationState *s, void *opaque) @@ -4290,6 +4317,11 @@ static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host, RAMBlock *rb = qemu_ram_block_from_host(host, false, &offset); Error *err = NULL; + if (!rb) { + error_report("RAM block not found"); + return; + } + if (migrate_ram_is_ignored(rb)) { return; } |