diff options
| -rw-r--r-- | hw/sd/sd.c | 27 | ||||
| -rw-r--r-- | hw/sd/trace-events | 2 | ||||
| -rw-r--r-- | include/migration/vmstate.h | 13 | ||||
| -rw-r--r-- | migration/block-dirty-bitmap.c | 26 | ||||
| -rw-r--r-- | migration/block.c | 40 | ||||
| -rw-r--r-- | migration/migration.c | 59 | ||||
| -rw-r--r-- | migration/migration.h | 24 | ||||
| -rw-r--r-- | migration/page_cache.c | 13 | ||||
| -rw-r--r-- | migration/postcopy-ram.c | 27 | ||||
| -rw-r--r-- | migration/ram.c | 14 | ||||
| -rw-r--r-- | migration/rdma.c | 7 | ||||
| -rw-r--r-- | migration/savevm.c | 61 | ||||
| -rw-r--r-- | migration/trace-events | 16 | ||||
| -rw-r--r-- | migration/vmstate-types.c | 26 | ||||
| -rw-r--r-- | migration/vmstate.c | 10 | ||||
| -rwxr-xr-x[-rw-r--r--] | scripts/tracetool.py | 0 | ||||
| -rw-r--r-- | tests/qtest/migration-test.c | 6 | ||||
| -rw-r--r-- | trace/control.c | 6 |
18 files changed, 246 insertions, 131 deletions
diff --git a/hw/sd/sd.c b/hw/sd/sd.c index c3febed243..3091382614 100644 --- a/hw/sd/sd.c +++ b/hw/sd/sd.c @@ -102,11 +102,14 @@ struct SDState { uint32_t card_status; uint8_t sd_status[64]; - /* Configurable properties */ + /* Static properties */ + uint8_t spec_version; BlockBackend *blk; bool spi; + /* Runtime changeables */ + uint32_t mode; /* current card mode, one of SDCardModes */ int32_t state; /* current card state, one of SDCardStates */ uint32_t vhs; @@ -251,11 +254,11 @@ static const int sd_cmd_class[SDMMC_CMD_MAX] = { 7, 7, 10, 7, 9, 9, 9, 8, 8, 10, 8, 8, 8, 8, 8, 8, }; -static uint8_t sd_crc7(void *message, size_t width) +static uint8_t sd_crc7(const void *message, size_t width) { int i, bit; uint8_t shift_reg = 0x00; - uint8_t *msg = (uint8_t *) message; + const uint8_t *msg = (const uint8_t *)message; for (i = 0; i < width; i ++, msg ++) for (bit = 7; bit >= 0; bit --) { @@ -267,11 +270,11 @@ static uint8_t sd_crc7(void *message, size_t width) return shift_reg; } -static uint16_t sd_crc16(void *message, size_t width) +static uint16_t sd_crc16(const void *message, size_t width) { int i, bit; uint16_t shift_reg = 0x0000; - uint16_t *msg = (uint16_t *) message; + const uint16_t *msg = (const uint16_t *)message; width <<= 1; for (i = 0; i < width; i ++, msg ++) @@ -824,6 +827,7 @@ static void sd_function_switch(SDState *sd, uint32_t arg) sd->data[12] = 0x80; /* Supported group 1 functions */ sd->data[13] = 0x03; + memset(&sd->data[14], 0, 3); for (i = 0; i < 6; i ++) { new_func = (arg >> (i * 4)) & 0x0f; if (mode && new_func != 0x0f) @@ -1676,7 +1680,7 @@ static sd_rsp_type_t sd_app_command(SDState *sd, return sd_illegal; } -static int cmd_valid_while_locked(SDState *sd, SDRequest *req) +static int cmd_valid_while_locked(SDState *sd, const uint8_t cmd) { /* Valid commands in locked state: * basic class (0) @@ -1687,13 +1691,12 @@ static int cmd_valid_while_locked(SDState *sd, SDRequest *req) * Anything else provokes an "illegal command" response. */ if (sd->expecting_acmd) { - return req->cmd == 41 || req->cmd == 42; + return cmd == 41 || cmd == 42; } - if (req->cmd == 16 || req->cmd == 55) { + if (cmd == 16 || cmd == 55) { return 1; } - return sd_cmd_class[req->cmd] == 0 - || sd_cmd_class[req->cmd] == 7; + return sd_cmd_class[cmd] == 0 || sd_cmd_class[cmd] == 7; } int sd_do_command(SDState *sd, SDRequest *req, @@ -1719,7 +1722,7 @@ int sd_do_command(SDState *sd, SDRequest *req, } if (sd->card_status & CARD_IS_LOCKED) { - if (!cmd_valid_while_locked(sd, req)) { + if (!cmd_valid_while_locked(sd, req->cmd)) { sd->card_status |= ILLEGAL_COMMAND; sd->expecting_acmd = false; qemu_log_mask(LOG_GUEST_ERROR, "SD: Card is locked\n"); @@ -1980,7 +1983,7 @@ uint8_t sd_read_byte(SDState *sd) { /* TODO: Append CRCs */ uint8_t ret; - int io_len; + uint32_t io_len; if (!sd->blk || !blk_is_inserted(sd->blk) || !sd->enable) return 0x00; diff --git a/hw/sd/trace-events b/hw/sd/trace-events index 96c7ea5e52..4140e48540 100644 --- a/hw/sd/trace-events +++ b/hw/sd/trace-events @@ -52,7 +52,7 @@ sdcard_unlock(void) "" sdcard_read_block(uint64_t addr, uint32_t len) "addr 0x%" PRIx64 " size 0x%x" sdcard_write_block(uint64_t addr, uint32_t len) "addr 0x%" PRIx64 " size 0x%x" sdcard_write_data(const char *proto, const char *cmd_desc, uint8_t cmd, uint8_t value) "%s %20s/ CMD%02d value 0x%02x" -sdcard_read_data(const char *proto, const char *cmd_desc, uint8_t cmd, int length) "%s %20s/ CMD%02d len %d" +sdcard_read_data(const char *proto, const char *cmd_desc, uint8_t cmd, uint32_t length) "%s %20s/ CMD%02d len %" PRIu32 sdcard_set_voltage(uint16_t millivolts) "%u mV" # milkymist-memcard.c diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index f68ed7db13..4d71dc8fba 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -219,7 +219,6 @@ extern const VMStateInfo vmstate_info_uint64; #define VMS_NULLPTR_MARKER (0x30U) /* '0' */ extern const VMStateInfo vmstate_info_nullptr; -extern const VMStateInfo vmstate_info_float64; extern const VMStateInfo vmstate_info_cpudouble; extern const VMStateInfo vmstate_info_timer; @@ -997,12 +996,6 @@ extern const VMStateInfo vmstate_info_qlist; VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint64, uint64_t) -#define VMSTATE_FLOAT64_V(_f, _s, _v) \ - VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64) - -#define VMSTATE_FLOAT64(_f, _s) \ - VMSTATE_FLOAT64_V(_f, _s, 0) - #define VMSTATE_TIMER_PTR_TEST(_f, _s, _test) \ VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *) @@ -1114,12 +1107,6 @@ extern const VMStateInfo vmstate_info_qlist; #define VMSTATE_INT64_ARRAY(_f, _s, _n) \ VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0) -#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v) \ - VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64) - -#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n) \ - VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0) - #define VMSTATE_CPUDOUBLE_ARRAY_V(_f, _s, _n, _v) \ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_cpudouble, CPU_DoubleU) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 5bef793ac0..c61d382be8 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -562,8 +562,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs, dbms->bitmap_alias = g_strdup(bitmap_alias); dbms->bitmap = bitmap; dbms->total_sectors = bdrv_nb_sectors(bs); - dbms->sectors_per_chunk = CHUNK_SIZE * 8 * - bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; + dbms->sectors_per_chunk = CHUNK_SIZE * 8LLU * + (bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS); + assert(dbms->sectors_per_chunk != 0); if (bdrv_dirty_bitmap_enabled(bitmap)) { dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED; } @@ -1071,18 +1072,15 @@ static int dirty_bitmap_load_header(QEMUFile *f, DBMLoadState *s, return -EINVAL; } - if (!s->cancelled) { - if (bitmap_alias_map) { - bitmap_name = g_hash_table_lookup(bitmap_alias_map, - s->bitmap_alias); - if (!bitmap_name) { - error_report("Error: Unknown bitmap alias '%s' on node " - "'%s' (alias '%s')", s->bitmap_alias, - s->bs->node_name, s->node_alias); - cancel_incoming_locked(s); - } - } else { - bitmap_name = s->bitmap_alias; + bitmap_name = s->bitmap_alias; + if (!s->cancelled && bitmap_alias_map) { + bitmap_name = g_hash_table_lookup(bitmap_alias_map, + s->bitmap_alias); + if (!bitmap_name) { + error_report("Error: Unknown bitmap alias '%s' on node " + "'%s' (alias '%s')", s->bitmap_alias, + s->bs->node_name, s->node_alias); + cancel_incoming_locked(s); } } diff --git a/migration/block.c b/migration/block.c index 737b6499f9..a950977855 100644 --- a/migration/block.c +++ b/migration/block.c @@ -26,6 +26,7 @@ #include "qemu-file.h" #include "migration/vmstate.h" #include "sysemu/block-backend.h" +#include "trace.h" #define BLK_MIG_BLOCK_SIZE (1 << 20) #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLK_MIG_BLOCK_SIZE >> BDRV_SECTOR_BITS) @@ -40,7 +41,7 @@ #define MAX_IO_BUFFERS 512 #define MAX_PARALLEL_IO 16 -//#define DEBUG_BLK_MIGRATION +/* #define DEBUG_BLK_MIGRATION */ #ifdef DEBUG_BLK_MIGRATION #define DPRINTF(fmt, ...) \ @@ -434,10 +435,9 @@ static int init_blk_migration(QEMUFile *f) block_mig_state.total_sector_sum += sectors; if (bmds->shared_base) { - DPRINTF("Start migration for %s with shared base image\n", - bdrv_get_device_name(bs)); + trace_migration_block_init_shared(bdrv_get_device_name(bs)); } else { - DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs)); + trace_migration_block_init_full(bdrv_get_device_name(bs)); } QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry); @@ -592,7 +592,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, return (bmds->cur_dirty >= bmds->total_sectors); error: - DPRINTF("Error reading sector %" PRId64 "\n", sector); + trace_migration_block_save_device_dirty(sector); g_free(blk->buf); g_free(blk); return ret; @@ -628,9 +628,9 @@ static int flush_blks(QEMUFile *f) BlkMigBlock *blk; int ret = 0; - DPRINTF("%s Enter submitted %d read_done %d transferred %d\n", - __func__, block_mig_state.submitted, block_mig_state.read_done, - block_mig_state.transferred); + trace_migration_block_flush_blks("Enter", block_mig_state.submitted, + block_mig_state.read_done, + block_mig_state.transferred); blk_mig_lock(); while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) { @@ -656,9 +656,9 @@ static int flush_blks(QEMUFile *f) } blk_mig_unlock(); - DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __func__, - block_mig_state.submitted, block_mig_state.read_done, - block_mig_state.transferred); + trace_migration_block_flush_blks("Exit", block_mig_state.submitted, + block_mig_state.read_done, + block_mig_state.transferred); return ret; } @@ -727,8 +727,8 @@ static int block_save_setup(QEMUFile *f, void *opaque) { int ret; - DPRINTF("Enter save live setup submitted %d transferred %d\n", - block_mig_state.submitted, block_mig_state.transferred); + trace_migration_block_save("setup", block_mig_state.submitted, + block_mig_state.transferred); qemu_mutex_lock_iothread(); ret = init_blk_migration(f); @@ -759,8 +759,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque) int64_t last_ftell = qemu_ftell(f); int64_t delta_ftell; - DPRINTF("Enter save live iterate submitted %d transferred %d\n", - block_mig_state.submitted, block_mig_state.transferred); + trace_migration_block_save("iterate", block_mig_state.submitted, + block_mig_state.transferred); ret = flush_blks(f); if (ret) { @@ -825,8 +825,8 @@ static int block_save_complete(QEMUFile *f, void *opaque) { int ret; - DPRINTF("Enter save live complete submitted %d transferred %d\n", - block_mig_state.submitted, block_mig_state.transferred); + trace_migration_block_save("complete", block_mig_state.submitted, + block_mig_state.transferred); ret = flush_blks(f); if (ret) { @@ -851,7 +851,7 @@ static int block_save_complete(QEMUFile *f, void *opaque) /* report completion */ qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); - DPRINTF("Block migration completed\n"); + trace_migration_block_save_complete(); qemu_put_be64(f, BLK_MIG_FLAG_EOS); @@ -884,7 +884,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size, pending = max_size + BLK_MIG_BLOCK_SIZE; } - DPRINTF("Enter save live pending %" PRIu64 "\n", pending); + trace_migration_block_save_pending(pending); /* We don't do postcopy */ *res_precopy_only += pending; } @@ -998,7 +998,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) (addr == 100) ? '\n' : '\r'); fflush(stdout); } else if (!(flags & BLK_MIG_FLAG_EOS)) { - fprintf(stderr, "Unknown block migration flags: %#x\n", flags); + fprintf(stderr, "Unknown block migration flags: 0x%x\n", flags); return -EINVAL; } ret = qemu_file_get_error(f); diff --git a/migration/migration.c b/migration/migration.c index deb6005b8d..9bb4fee5ac 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -143,6 +143,13 @@ static int migration_maybe_pause(MigrationState *s, int new_state); static void migrate_fd_cancel(MigrationState *s); +static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) +{ + uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp; + + return (a > b) - (a < b); +} + void migration_object_init(void) { Error *err = NULL; @@ -164,6 +171,8 @@ void migration_object_init(void) qemu_event_init(¤t_incoming->main_thread_load_event, false); qemu_sem_init(¤t_incoming->postcopy_pause_sem_dst, 0); qemu_sem_init(¤t_incoming->postcopy_pause_sem_fault, 0); + qemu_mutex_init(¤t_incoming->page_request_mutex); + current_incoming->page_requested = g_tree_new(page_request_addr_cmp); if (!migration_object_check(current_migration, &err)) { error_report_err(err); @@ -230,6 +239,11 @@ void migration_incoming_state_destroy(void) qemu_event_reset(&mis->main_thread_load_event); + if (mis->page_requested) { + g_tree_destroy(mis->page_requested); + mis->page_requested = NULL; + } + if (mis->socket_address_list) { qapi_free_SocketAddressList(mis->socket_address_list); mis->socket_address_list = NULL; @@ -306,8 +320,8 @@ error: * Start: Address offset within the RB * Len: Length in bytes required - must be a multiple of pagesize */ -int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, - ram_addr_t start) +int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, + RAMBlock *rb, ram_addr_t start) { uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */ size_t msglen = 12; /* start + len */ @@ -343,6 +357,37 @@ int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, return migrate_send_rp_message(mis, msg_type, msglen, bufc); } +int migrate_send_rp_req_pages(MigrationIncomingState *mis, + RAMBlock *rb, ram_addr_t start, uint64_t haddr) +{ + void *aligned = (void *)(uintptr_t)(haddr & (-qemu_ram_pagesize(rb))); + bool received; + + WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { + received = ramblock_recv_bitmap_test_byte_offset(rb, start); + if (!received && !g_tree_lookup(mis->page_requested, aligned)) { + /* + * The page has not been received, and it's not yet in the page + * request list. Queue it. Set the value of element to 1, so that + * things like g_tree_lookup() will return TRUE (1) when found. + */ + g_tree_insert(mis->page_requested, aligned, (gpointer)1); + mis->page_requested_count++; + trace_postcopy_page_req_add(aligned, mis->page_requested_count); + } + } + + /* + * If the page is there, skip sending the message. We don't even need the + * lock because as long as the page arrived, it'll be there forever. + */ + if (received) { + return 0; + } + + return migrate_send_rp_message_req_pages(mis, rb, start); +} + static bool migration_colo_enabled; bool migration_incoming_colo_enabled(void) { @@ -2468,8 +2513,8 @@ static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname, * Since we currently insist on matching page sizes, just sanity check * we're being asked for whole host pages. */ - if (start & (our_host_ps-1) || - (len & (our_host_ps-1))) { + if (start & (our_host_ps - 1) || + (len & (our_host_ps - 1))) { error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT " len: %zd", __func__, start, len); mark_source_rp_bad(ms); @@ -3123,9 +3168,6 @@ static MigThrError postcopy_pause(MigrationState *s) while (true) { QEMUFile *file; - migrate_set_state(&s->state, s->state, - MIGRATION_STATUS_POSTCOPY_PAUSED); - /* Current channel is possibly broken. Release it. */ assert(s->to_dst_file); qemu_mutex_lock(&s->qemu_file_lock); @@ -3136,6 +3178,9 @@ static MigThrError postcopy_pause(MigrationState *s) qemu_file_shutdown(file); qemu_fclose(file); + migrate_set_state(&s->state, s->state, + MIGRATION_STATUS_POSTCOPY_PAUSED); + error_report("Detected IO failure for postcopy. " "Migration paused."); diff --git a/migration/migration.h b/migration/migration.h index deb411aaad..d096b77f74 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -104,6 +104,23 @@ struct MigrationIncomingState { /* List of listening socket addresses */ SocketAddressList *socket_address_list; + + /* A tree of pages that we requested to the source VM */ + GTree *page_requested; + /* For debugging purpose only, but would be nice to keep */ + int page_requested_count; + /* + * The mutex helps to maintain the requested pages that we sent to the + * source, IOW, to guarantee coherent between the page_requests tree and + * the per-ramblock receivedmap. Note! This does not guarantee consistency + * of the real page copy procedures (using UFFDIO_[ZERO]COPY). E.g., even + * if one bit in receivedmap is cleared, UFFDIO_COPY could have happened + * for that page already. This is intended so that the mutex won't + * serialize and blocked by slow operations like UFFDIO_* ioctls. However + * this should be enough to make sure the page_requested tree always + * contains valid information. + */ + QemuMutex page_request_mutex; }; MigrationIncomingState *migration_incoming_get_current(void); @@ -124,8 +141,7 @@ struct MigrationClass { DeviceClass parent_class; }; -struct MigrationState -{ +struct MigrationState { /*< private >*/ DeviceState parent_obj; @@ -332,7 +348,9 @@ void migrate_send_rp_shut(MigrationIncomingState *mis, void migrate_send_rp_pong(MigrationIncomingState *mis, uint32_t value); int migrate_send_rp_req_pages(MigrationIncomingState *mis, RAMBlock *rb, - ram_addr_t start); + ram_addr_t start, uint64_t haddr); +int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, + RAMBlock *rb, ram_addr_t start); void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, char *block_name); void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); diff --git a/migration/page_cache.c b/migration/page_cache.c index 775582f453..098b436223 100644 --- a/migration/page_cache.c +++ b/migration/page_cache.c @@ -18,14 +18,7 @@ #include "qapi/error.h" #include "qemu/host-utils.h" #include "page_cache.h" - -#ifdef DEBUG_CACHE -#define DPRINTF(fmt, ...) \ - do { fprintf(stdout, "cache: " fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif +#include "trace.h" /* the page in cache will not be replaced in two cycles */ #define CACHED_PAGE_LIFETIME 2 @@ -75,7 +68,7 @@ PageCache *cache_init(int64_t new_size, size_t page_size, Error **errp) cache->num_items = 0; cache->max_num_items = num_pages; - DPRINTF("Setting cache buckets to %" PRId64 "\n", cache->max_num_items); + trace_migration_pagecache_init(cache->max_num_items); /* We prefer not to abort if there is no memory */ cache->page_cache = g_try_malloc((cache->max_num_items) * @@ -169,7 +162,7 @@ int cache_insert(PageCache *cache, uint64_t addr, const uint8_t *pdata, if (!it->it_data) { it->it_data = g_try_malloc(cache->page_size); if (!it->it_data) { - DPRINTF("Error allocating page\n"); + trace_migration_pagecache_insert(); return -1; } cache->num_items++; diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 0a2f88a87d..d3bb3a744b 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -403,7 +403,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) strerror(errno)); goto out; } - g_assert(((size_t)testarea & (pagesize-1)) == 0); + g_assert(((size_t)testarea & (pagesize - 1)) == 0); reg_struct.range.start = (uintptr_t)testarea; reg_struct.range.len = pagesize; @@ -684,7 +684,7 @@ int postcopy_request_shared_page(struct PostCopyFD *pcfd, RAMBlock *rb, qemu_ram_get_idstr(rb), rb_offset); return postcopy_wake_shared(pcfd, client_addr, rb); } - migrate_send_rp_req_pages(mis, rb, aligned_rbo); + migrate_send_rp_req_pages(mis, rb, aligned_rbo, client_addr); return 0; } @@ -979,7 +979,8 @@ retry: * Send the request to the source - we want to request one * of our host page sizes (which is >= TPS) */ - ret = migrate_send_rp_req_pages(mis, rb, rb_offset); + ret = migrate_send_rp_req_pages(mis, rb, rb_offset, + msg.arg.pagefault.address); if (ret) { /* May be network failure, try to wait for recovery */ if (ret == -EIO && postcopy_pause_fault_thread(mis)) { @@ -1128,10 +1129,12 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) return 0; } -static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, +static int qemu_ufd_copy_ioctl(MigrationIncomingState *mis, void *host_addr, void *from_addr, uint64_t pagesize, RAMBlock *rb) { + int userfault_fd = mis->userfault_fd; int ret; + if (from_addr) { struct uffdio_copy copy_struct; copy_struct.dst = (uint64_t)(uintptr_t)host_addr; @@ -1147,10 +1150,20 @@ static int qemu_ufd_copy_ioctl(int userfault_fd, void *host_addr, ret = ioctl(userfault_fd, UFFDIO_ZEROPAGE, &zero_struct); } if (!ret) { + qemu_mutex_lock(&mis->page_request_mutex); ramblock_recv_bitmap_set_range(rb, host_addr, pagesize / qemu_target_page_size()); + /* + * If this page resolves a page fault for a previous recorded faulted + * address, take a special note to maintain the requested page list. + */ + if (g_tree_lookup(mis->page_requested, host_addr)) { + g_tree_remove(mis->page_requested, host_addr); + mis->page_requested_count--; + trace_postcopy_page_req_del(host_addr, mis->page_requested_count); + } + qemu_mutex_unlock(&mis->page_request_mutex); mark_postcopy_blocktime_end((uintptr_t)host_addr); - } return ret; } @@ -1185,7 +1198,7 @@ int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from, * which would be slightly cheaper, but we'd have to be careful * of the order of updating our page state. */ - if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, from, pagesize, rb)) { + if (qemu_ufd_copy_ioctl(mis, host, from, pagesize, rb)) { int e = errno; error_report("%s: %s copy host: %p from: %p (size: %zd)", __func__, strerror(e), host, from, pagesize); @@ -1212,7 +1225,7 @@ int postcopy_place_page_zero(MigrationIncomingState *mis, void *host, * but it's not available for everything (e.g. hugetlbpages) */ if (qemu_ram_is_uf_zeroable(rb)) { - if (qemu_ufd_copy_ioctl(mis->userfault_fd, host, NULL, pagesize, rb)) { + if (qemu_ufd_copy_ioctl(mis, host, NULL, pagesize, rb)) { int e = errno; error_report("%s: %s zero host: %p", __func__, strerror(e), host); diff --git a/migration/ram.c b/migration/ram.c index 433489d633..2da2b622ab 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -101,14 +101,16 @@ static struct { static void XBZRLE_cache_lock(void) { - if (migrate_use_xbzrle()) + if (migrate_use_xbzrle()) { qemu_mutex_lock(&XBZRLE.lock); + } } static void XBZRLE_cache_unlock(void) { - if (migrate_use_xbzrle()) + if (migrate_use_xbzrle()) { qemu_mutex_unlock(&XBZRLE.lock); + } } /** @@ -1563,7 +1565,7 @@ int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len) rs->last_req_rb = ramblock; } trace_ram_save_queue_pages(ramblock->idstr, start, len); - if (start+len > ramblock->used_length) { + if (start + len > ramblock->used_length) { error_report("%s request overrun start=" RAM_ADDR_FMT " len=" RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT, __func__, start, len, ramblock->used_length); @@ -2741,7 +2743,7 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) */ static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags) { - static RAMBlock *block = NULL; + static RAMBlock *block; char id[256]; uint8_t len; @@ -3298,7 +3300,7 @@ static int ram_load_postcopy(QEMUFile *f) multifd_recv_sync_main(); break; default: - error_report("Unknown combination of migration flags: %#x" + error_report("Unknown combination of migration flags: 0x%x" " (postcopy mode)", flags); ret = -EINVAL; break; @@ -3576,7 +3578,7 @@ static int ram_load_precopy(QEMUFile *f) if (flags & RAM_SAVE_FLAG_HOOK) { ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL); } else { - error_report("Unknown combination of migration flags: %#x", + error_report("Unknown combination of migration flags: 0x%x", flags); ret = -EINVAL; } diff --git a/migration/rdma.c b/migration/rdma.c index 0340841fad..00eac34232 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -273,7 +273,8 @@ static uint64_t htonll(uint64_t v) return u.llv; } -static uint64_t ntohll(uint64_t v) { +static uint64_t ntohll(uint64_t v) +{ union { uint32_t lv[2]; uint64_t llv; } u; u.llv = v; return ((uint64_t)ntohl(u.lv[0]) << 32) | (uint64_t) ntohl(u.lv[1]); @@ -854,7 +855,7 @@ static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp) */ if (!verbs) { int num_devices, x; - struct ibv_device ** dev_list = ibv_get_device_list(&num_devices); + struct ibv_device **dev_list = ibv_get_device_list(&num_devices); bool roce_found = false; bool ib_found = false; @@ -1288,7 +1289,7 @@ const char *print_wrid(int wrid) * workload information or LRU information is available, do not attempt to use * this feature except for basic testing. */ -//#define RDMA_UNREGISTRATION_EXAMPLE +/* #define RDMA_UNREGISTRATION_EXAMPLE */ /* * Perform a non-optimized memory unregistration after every transfer diff --git a/migration/savevm.c b/migration/savevm.c index ff33e210eb..21ccba9fb3 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -63,7 +63,7 @@ #include "qemu/bitmap.h" #include "net/announce.h" -const unsigned int postcopy_ram_discard_version = 0; +const unsigned int postcopy_ram_discard_version; /* Subcommands for QEMU_VM_COMMAND */ enum qemu_vm_cmd { @@ -520,7 +520,7 @@ static const VMStateDescription vmstate_configuration = { VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, len), VMSTATE_END_OF_LIST() }, - .subsections = (const VMStateDescription*[]) { + .subsections = (const VMStateDescription *[]) { &vmstate_target_page_bits, &vmstate_capabilites, &vmstate_uuid, @@ -2010,6 +2010,49 @@ static int loadvm_postcopy_handle_run(MigrationIncomingState *mis) return LOADVM_QUIT; } +/* We must be with page_request_mutex held */ +static gboolean postcopy_sync_page_req(gpointer key, gpointer value, + gpointer data) +{ + MigrationIncomingState *mis = data; + void *host_addr = (void *) key; + ram_addr_t rb_offset; + RAMBlock *rb; + int ret; + + rb = qemu_ram_block_from_host(host_addr, true, &rb_offset); + if (!rb) { + /* + * This should _never_ happen. However be nice for a migrating VM to + * not crash/assert. Post an error (note: intended to not use *_once + * because we do want to see all the illegal addresses; and this can + * never be triggered by the guest so we're safe) and move on next. + */ + error_report("%s: illegal host addr %p", __func__, host_addr); + /* Try the next entry */ + return FALSE; + } + + ret = migrate_send_rp_message_req_pages(mis, rb, rb_offset); + if (ret) { + /* Please refer to above comment. */ + error_report("%s: send rp message failed for addr %p", + __func__, host_addr); + return FALSE; + } + + trace_postcopy_page_req_sync(host_addr); + + return FALSE; +} + +static void migrate_send_rp_req_pages_pending(MigrationIncomingState *mis) +{ + WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) { + g_tree_foreach(mis->page_requested, postcopy_sync_page_req, mis); + } +} + static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) { if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) { @@ -2032,6 +2075,20 @@ static int loadvm_postcopy_handle_resume(MigrationIncomingState *mis) /* Tell source that "we are ready" */ migrate_send_rp_resume_ack(mis, MIGRATION_RESUME_ACK_VALUE); + /* + * After a postcopy recovery, the source should have lost the postcopy + * queue, or potentially the requested pages could have been lost during + * the network down phase. Let's re-sync with the source VM by re-sending + * all the pending pages that we eagerly need, so these threads won't get + * blocked too long due to the recovery. + * + * Without this procedure, the faulted destination VM threads (waiting for + * page requests right before the postcopy is interrupted) can keep hanging + * until the pages are sent by the source during the background copying of + * pages, or another thread faulted on the same address accidentally. + */ + migrate_send_rp_req_pages_pending(mis); + return 0; } diff --git a/migration/trace-events b/migration/trace-events index 338f38b3dd..75de5004ac 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -49,6 +49,7 @@ vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s" vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s" postcopy_pause_incoming(void) "" postcopy_pause_incoming_continued(void) "" +postcopy_page_req_sync(void *host_addr) "sync page req %p" # vmstate.c vmstate_load_field_error(const char *field, int ret) "field \"%s\" load failed, ret = %d" @@ -162,6 +163,7 @@ postcopy_pause_return_path(void) "" postcopy_pause_return_path_continued(void) "" postcopy_pause_continued(void) "" postcopy_start_set_run(void) "" +postcopy_page_req_add(void *addr, int count) "new page req %p total %d" source_return_path_thread_bad_end(void) "" source_return_path_thread_end(void) "" source_return_path_thread_entry(void) "" @@ -272,6 +274,7 @@ postcopy_ram_incoming_cleanup_blocktime(uint64_t total) "total blocktime %" PRIu postcopy_request_shared_page(const char *sharer, const char *rb, uint64_t rb_offset) "for %s in %s offset 0x%"PRIx64 postcopy_request_shared_page_present(const char *sharer, const char *rb, uint64_t rb_offset) "%s already %s offset 0x%"PRIx64 postcopy_wake_shared(uint64_t client_addr, const char *rb) "at 0x%"PRIx64" in %s" +postcopy_page_req_del(void *addr, int count) "resolved page req %p total %d" get_mem_fault_cpu_index(int cpu, uint32_t pid) "cpu: %d, pid: %u" @@ -325,3 +328,16 @@ get_ramblock_vfn_hash(const char *idstr, uint64_t vfn, uint32_t crc) "ramblock n calc_page_dirty_rate(const char *idstr, uint32_t new_crc, uint32_t old_crc) "ramblock name: %s, new crc: %" PRIu32 ", old crc: %" PRIu32 skip_sample_ramblock(const char *idstr, uint64_t ramblock_size) "ramblock name: %s, ramblock size: %" PRIu64 find_page_matched(const char *idstr) "ramblock %s addr or size changed" + +# block.c +migration_block_init_shared(const char *blk_device_name) "Start migration for %s with shared base image" +migration_block_init_full(const char *blk_device_name) "Start full migration for %s" +migration_block_save_device_dirty(int64_t sector) "Error reading sector %" PRId64 +migration_block_flush_blks(const char *action, int submitted, int read_done, int transferred) "%s submitted %d read_done %d transferred %d" +migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d" +migration_block_save_complete(void) "Block migration completed" +migration_block_save_pending(uint64_t pending) "Enter save live pending %" PRIu64 + +# page_cache.c +migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64 +migration_pagecache_insert(void) "Error allocating page" diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index 35e784c9d9..e22d41d73d 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -420,32 +420,6 @@ const VMStateInfo vmstate_info_uint16_equal = { .put = put_uint16, }; -/* floating point */ - -static int get_float64(QEMUFile *f, void *pv, size_t size, - const VMStateField *field) -{ - float64 *v = pv; - - *v = make_float64(qemu_get_be64(f)); - return 0; -} - -static int put_float64(QEMUFile *f, void *pv, size_t size, - const VMStateField *field, QJSON *vmdesc) -{ - uint64_t *v = pv; - - qemu_put_be64(f, float64_val(*v)); - return 0; -} - -const VMStateInfo vmstate_info_float64 = { - .name = "float64", - .get = get_float64, - .put = put_float64, -}; - /* CPU_DoubleU type */ static int get_cpudouble(QEMUFile *f, void *pv, size_t size, diff --git a/migration/vmstate.c b/migration/vmstate.c index bafa890384..e9d2aef66b 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -32,13 +32,13 @@ static int vmstate_n_elems(void *opaque, const VMStateField *field) if (field->flags & VMS_ARRAY) { n_elems = field->num; } else if (field->flags & VMS_VARRAY_INT32) { - n_elems = *(int32_t *)(opaque+field->num_offset); + n_elems = *(int32_t *)(opaque + field->num_offset); } else if (field->flags & VMS_VARRAY_UINT32) { - n_elems = *(uint32_t *)(opaque+field->num_offset); + n_elems = *(uint32_t *)(opaque + field->num_offset); } else if (field->flags & VMS_VARRAY_UINT16) { - n_elems = *(uint16_t *)(opaque+field->num_offset); + n_elems = *(uint16_t *)(opaque + field->num_offset); } else if (field->flags & VMS_VARRAY_UINT8) { - n_elems = *(uint8_t *)(opaque+field->num_offset); + n_elems = *(uint8_t *)(opaque + field->num_offset); } if (field->flags & VMS_MULTIPLY_ELEMENTS) { @@ -54,7 +54,7 @@ static int vmstate_size(void *opaque, const VMStateField *field) int size = field->size; if (field->flags & VMS_VBUFFER) { - size = *(int32_t *)(opaque+field->size_offset); + size = *(int32_t *)(opaque + field->size_offset); if (field->flags & VMS_MULTIPLY) { size *= field->size; } diff --git a/scripts/tracetool.py b/scripts/tracetool.py index 31146242b7..31146242b7 100644..100755 --- a/scripts/tracetool.py +++ b/scripts/tracetool.py diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index f410ec5996..f2142fbd3c 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -464,6 +464,10 @@ static void migrate_postcopy_start(QTestState *from, QTestState *to) } typedef struct { + /* + * QTEST_LOG=1 may override this. When QTEST_LOG=1, we always dump errors + * unconditionally, because it means the user would like to be verbose. + */ bool hide_stderr; bool use_shmem; /* only launch the target process */ @@ -557,7 +561,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, g_free(bootpath); - if (args->hide_stderr) { + if (!getenv("QTEST_LOG") && args->hide_stderr) { ignore_stderr = "2>/dev/null"; } else { ignore_stderr = ""; diff --git a/trace/control.c b/trace/control.c index b35e512dce..5669db7eea 100644 --- a/trace/control.c +++ b/trace/control.c @@ -39,6 +39,7 @@ static TraceEventGroup *event_groups; static size_t nevent_groups; static uint32_t next_id; static uint32_t next_vcpu_id; +static bool init_trace_on_startup; QemuOptsList qemu_trace_opts = { .name = "trace", @@ -225,7 +226,9 @@ void trace_init_file(const char *file) { #ifdef CONFIG_TRACE_SIMPLE st_set_trace_file(file); - st_set_trace_file_enabled(true); + if (init_trace_on_startup) { + st_set_trace_file_enabled(true); + } #elif defined CONFIG_TRACE_LOG /* * If both the simple and the log backends are enabled, "--trace file" @@ -299,6 +302,7 @@ char *trace_opt_parse(const char *optarg) } trace_init_events(qemu_opt_get(opts, "events")); trace_file = g_strdup(qemu_opt_get(opts, "file")); + init_trace_on_startup = true; qemu_opts_del(opts); return trace_file; |