diff options
| -rw-r--r-- | MAINTAINERS | 10 | ||||
| -rw-r--r-- | accel/tcg/tb-maint.c | 4 | ||||
| -rw-r--r-- | docs/devel/kconfig.rst | 16 | ||||
| -rw-r--r-- | hw/vfio/common.c | 3 | ||||
| -rw-r--r-- | hw/vfio/pci.c | 67 | ||||
| -rw-r--r-- | hw/vfio/pci.h | 1 | ||||
| -rw-r--r-- | include/qemu/osdep.h | 2 | ||||
| -rw-r--r-- | include/qemu/rcu.h | 5 | ||||
| -rw-r--r-- | meson.build | 4 | ||||
| -rw-r--r-- | migration/meson.build | 6 | ||||
| -rw-r--r-- | migration/migration.c | 9 | ||||
| -rw-r--r-- | migration/qemu-file.c | 11 | ||||
| -rw-r--r-- | migration/qemu-file.h | 1 | ||||
| -rw-r--r-- | migration/ram-compress.c | 485 | ||||
| -rw-r--r-- | migration/ram-compress.h | 70 | ||||
| -rw-r--r-- | migration/ram.c | 490 | ||||
| -rw-r--r-- | target/i386/cpu.c | 375 | ||||
| -rw-r--r-- | target/i386/cpu.h | 15 | ||||
| -rw-r--r-- | target/loongarch/machine.c | 1 | ||||
| -rw-r--r-- | tests/qtest/migration-test.c | 134 | ||||
| -rw-r--r-- | tests/unit/test-aio-multithread.c | 30 | ||||
| -rw-r--r-- | util/rcu.c | 69 |
22 files changed, 1300 insertions, 508 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 55102f4761..f757369373 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3910,6 +3910,16 @@ F: configure F: scripts/mtest2make.py F: tests/Makefile.include +Kconfig +M: Paolo Bonzini <pbonzini@redhat.com> +S: Maintained +F: scripts/minikconf.py +F: docs/devel/kconfig.rst +F: Kconfig* +F: */Kconfig* +F: hw/*/Kconfig* +F: target/*/Kconfig* + GIT submodules M: Daniel P. Berrange <berrange@redhat.com> S: Odd Fixes diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index cb1f806f00..0dd173fbf0 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -746,7 +746,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) tcg_region_reset_all(); /* XXX: flush processor icache at this point if cache flush is expensive */ - qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1); + qatomic_inc(&tb_ctx.tb_flush_count); done: mmap_unlock(); @@ -758,7 +758,7 @@ done: void tb_flush(CPUState *cpu) { if (tcg_enabled()) { - unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count); + unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count); if (cpu_in_exclusive_context(cpu)) { do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count)); diff --git a/docs/devel/kconfig.rst b/docs/devel/kconfig.rst index ac9453eba9..e3a544e463 100644 --- a/docs/devel/kconfig.rst +++ b/docs/devel/kconfig.rst @@ -282,9 +282,19 @@ want to change some lines in the first group, for example like this:: CONFIG_PCI_DEVICES=y #CONFIG_TEST_DEVICES=n -and/or pick a subset of the devices in those device groups. Right now -there is no single place that lists all the optional devices for -``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future, +and/or pick a subset of the devices in those device groups. Without +further modifications to ``configs/devices/``, a system emulator built +without default devices might not do much more than start an empty +machine, and even then only if ``--nodefaults`` is specified on the +command line. Starting a VM *without* ``--nodefaults`` is allowed to +fail, but should never abort. Failures in ``make check`` with +``--without-default-devices`` are considered bugs in the test code: +the tests should either use ``--nodefaults``, and should be skipped +if a necessary device is not present in the build. Such failures +should not be worked around with ``select`` directives. + +Right now there is no single place that lists all the optional devices +for ``CONFIG_PCI_DEVICES`` and ``CONFIG_TEST_DEVICES``. In the future, we expect that ``.mak`` files will be automatically generated, so that they will include all these symbols and some help text on what they do. diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 4d01ea3515..78358ede27 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) VFIODevice *vbasedev; MigrationState *ms = migrate_get_current(); - if (!migration_is_setup_or_active(ms->state)) { + if (ms->state != MIGRATION_STATUS_ACTIVE && + ms->state != MIGRATION_STATUS_DEVICE) { return false; } diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index ec9a854361..bf27a39905 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2066,6 +2066,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) return 0; } +static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) +{ + uint32_t ctrl; + int i, nbar; + + ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); + nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; + + for (i = 0; i < nbar; i++) { + uint32_t cap; + int size; + + ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); + size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; + + /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ + cap = size <= 27 ? 1U << (size + 4) : 0; + + /* + * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one + * size in the range 1MB to 512GB. We intend to mask all sizes except + * the one currently enabled in the size field, therefore if it's + * outside the range, hide the whole capability as this virtualization + * trick won't work. If >512GB resizable BARs start to appear, we + * might need an opt-in or reservation scheme in the kernel. + */ + if (!(cap & PCI_REBAR_CAP_SIZES)) { + return -EINVAL; + } + + /* Hide all sizes reported in the ctrl reg per above requirement. */ + ctrl &= (PCI_REBAR_CTRL_BAR_SIZE | + PCI_REBAR_CTRL_NBAR_MASK | + PCI_REBAR_CTRL_BAR_IDX); + + /* + * The BAR size field is RW, however we've mangled the capability + * register such that we only report a single size, ie. the current + * BAR size. A write of an unsupported value is undefined, therefore + * the register field is essentially RO. + */ + vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0); + vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0); + } + + return 0; +} + static void vfio_add_ext_cap(VFIOPCIDevice *vdev) { PCIDevice *pdev = &vdev->pdev; @@ -2139,9 +2187,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) case 0: /* kernel masked capability */ case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ - case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); break; + case PCI_EXT_CAP_ID_REBAR: + if (!vfio_setup_rebar_ecap(vdev, next)) { + pcie_add_capability(pdev, cap_id, cap_ver, next, size); + } + break; default: pcie_add_capability(pdev, cap_id, cap_ver, next, size); } @@ -2856,6 +2908,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) int groupid; int i, ret; bool is_mdev; + char uuid[UUID_FMT_LEN]; + char *name; if (!vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -2936,7 +2990,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } - ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); + if (!qemu_uuid_is_null(&vdev->vf_token)) { + qemu_uuid_unparse(&vdev->vf_token, uuid); + name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); + } else { + name = vbasedev->name; + } + + ret = vfio_get_device(group, name, vbasedev, errp); + g_free(name); if (ret) { vfio_put_group(group); goto error; @@ -3268,6 +3330,7 @@ static void vfio_instance_init(Object *obj) static Property vfio_pci_dev_properties[] = { DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, vbasedev.pre_copy_dirty_page_tracking, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 177abcc8fb..2674476d6c 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -137,6 +137,7 @@ struct VFIOPCIDevice { VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ void *igd_opregion; PCIHostDeviceAddress host; + QemuUUID vf_token; EventNotifier err_notifier; EventNotifier req_notifier; int (*resetfn)(struct VFIOPCIDevice *); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 9eff0be95b..cc61b00ba9 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -75,7 +75,7 @@ QEMU_EXTERN_C int daemon(int, int); #ifdef _WIN32 /* as defined in sdkddkver.h */ #ifndef _WIN32_WINNT -#define _WIN32_WINNT 0x0601 /* Windows 7 API (should be in sync with glib) */ +#define _WIN32_WINNT 0x0602 /* Windows 8 API (should be >= the one from glib) */ #endif /* reduces the number of implicitly included headers */ #ifndef WIN32_LEAN_AND_MEAN diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index 313fc414bc..661c1a1468 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -87,7 +87,10 @@ static inline void rcu_read_lock(void) ctr = qatomic_read(&rcu_gp_ctr); qatomic_set(&p_rcu_reader->ctr, ctr); - /* Write p_rcu_reader->ctr before reading RCU-protected pointers. */ + /* + * Read rcu_gp_ptr and write p_rcu_reader->ctr before reading + * RCU-protected pointers. + */ smp_mb_placeholder(); } diff --git a/meson.build b/meson.build index a43f9d0012..0149bd634f 100644 --- a/meson.build +++ b/meson.build @@ -3217,6 +3217,10 @@ modinfo_files = [] block_mods = [] softmmu_mods = [] foreach d, list : modules + if not (d == 'block' ? have_block : have_system) + continue + endif + foreach m, module_ss : list if enable_modules and targetos != 'windows' module_ss = module_ss.apply(config_all, strict: false) diff --git a/migration/meson.build b/migration/meson.build index da1897fadf..75de868bb7 100644 --- a/migration/meson.build +++ b/migration/meson.build @@ -23,6 +23,8 @@ softmmu_ss.add(files( 'migration.c', 'multifd.c', 'multifd-zlib.c', + 'multifd-zlib.c', + 'ram-compress.c', 'options.c', 'postcopy-ram.c', 'savevm.c', @@ -38,4 +40,6 @@ endif softmmu_ss.add(when: zstd, if_true: files('multifd-zstd.c')) specific_ss.add(when: 'CONFIG_SOFTMMU', - if_true: files('dirtyrate.c', 'ram.c', 'target.c')) + if_true: files('dirtyrate.c', + 'ram.c', + 'target.c')) diff --git a/migration/migration.c b/migration/migration.c index 232e387109..0ee07802a5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -26,6 +26,7 @@ #include "sysemu/cpu-throttle.h" #include "rdma.h" #include "ram.h" +#include "ram-compress.h" #include "migration/global_state.h" #include "migration/misc.h" #include "migration.h" @@ -228,6 +229,7 @@ void migration_incoming_state_destroy(void) struct MigrationIncomingState *mis = migration_incoming_get_current(); multifd_load_cleanup(); + compress_threads_load_cleanup(); if (mis->to_src_file) { /* Tell source that we are done */ @@ -500,6 +502,12 @@ process_incoming_migration_co(void *opaque) Error *local_err = NULL; assert(mis->from_src_file); + + if (compress_threads_load_setup(mis->from_src_file)) { + error_report("Failed to setup decompress threads"); + goto fail; + } + mis->migration_incoming_co = qemu_coroutine_self(); mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); @@ -565,6 +573,7 @@ fail: qemu_fclose(mis->from_src_file); multifd_load_cleanup(); + compress_threads_load_cleanup(); exit(EXIT_FAILURE); } diff --git a/migration/qemu-file.c b/migration/qemu-file.c index f4cfd05c67..61fb580342 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -871,6 +871,17 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src) } /* + * Check if the writable buffer is empty + */ + +bool qemu_file_buffer_empty(QEMUFile *file) +{ + assert(qemu_file_is_writable(file)); + + return !file->iovcnt; +} + +/* * Get a string whose length is determined by a single preceding byte * A preallocated 256 byte buffer must be passed in. * Returns: len on success and a 0 terminated string in the buffer diff --git a/migration/qemu-file.h b/migration/qemu-file.h index 4f26bf6961..4ee58a87dd 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -113,6 +113,7 @@ size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, s ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, const uint8_t *p, size_t size); int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); +bool qemu_file_buffer_empty(QEMUFile *file); /* * Note that you can only peek continuous bytes from where the current pointer diff --git a/migration/ram-compress.c b/migration/ram-compress.c new file mode 100644 index 0000000000..06254d8c69 --- /dev/null +++ b/migration/ram-compress.c @@ -0,0 +1,485 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2011-2015 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/cutils.h" + +#include "ram-compress.h" + +#include "qemu/error-report.h" +#include "migration.h" +#include "options.h" +#include "io/channel-null.h" +#include "exec/target_page.h" +#include "exec/ramblock.h" + +CompressionStats compression_counters; + +static CompressParam *comp_param; +static QemuThread *compress_threads; +/* comp_done_cond is used to wake up the migration thread when + * one of the compression threads has finished the compression. + * comp_done_lock is used to co-work with comp_done_cond. + */ +static QemuMutex comp_done_lock; +static QemuCond comp_done_cond; + +struct DecompressParam { + bool done; + bool quit; + QemuMutex mutex; + QemuCond cond; + void *des; + uint8_t *compbuf; + int len; + z_stream stream; +}; +typedef struct DecompressParam DecompressParam; + +static QEMUFile *decomp_file; +static DecompressParam *decomp_param; +static QemuThread *decompress_threads; +static QemuMutex decomp_done_lock; +static QemuCond decomp_done_cond; + +static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, + RAMBlock *block, ram_addr_t offset, + uint8_t *source_buf); + +static void *do_data_compress(void *opaque) +{ + CompressParam *param = opaque; + RAMBlock *block; + ram_addr_t offset; + CompressResult result; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->trigger) { + block = param->block; + offset = param->offset; + param->trigger = false; + qemu_mutex_unlock(¶m->mutex); + + result = do_compress_ram_page(param->file, ¶m->stream, + block, offset, param->originbuf); + + qemu_mutex_lock(&comp_done_lock); + param->done = true; + param->result = result; + qemu_cond_signal(&comp_done_cond); + qemu_mutex_unlock(&comp_done_lock); + + qemu_mutex_lock(¶m->mutex); + } else { + qemu_cond_wait(¶m->cond, ¶m->mutex); + } + } + qemu_mutex_unlock(¶m->mutex); + + return NULL; +} + +void compress_threads_save_cleanup(void) +{ + int i, thread_count; + + if (!migrate_compress() || !comp_param) { + return; + } + + thread_count = migrate_compress_threads(); + for (i = 0; i < thread_count; i++) { + /* + * we use it as a indicator which shows if the thread is + * properly init'd or not + */ + if (!comp_param[i].file) { + break; + } + + qemu_mutex_lock(&comp_param[i].mutex); + comp_param[i].quit = true; + qemu_cond_signal(&comp_param[i].cond); + qemu_mutex_unlock(&comp_param[i].mutex); + + qemu_thread_join(compress_threads + i); + qemu_mutex_destroy(&comp_param[i].mutex); + qemu_cond_destroy(&comp_param[i].cond); + deflateEnd(&comp_param[i].stream); + g_free(comp_param[i].originbuf); + qemu_fclose(comp_param[i].file); + comp_param[i].file = NULL; + } + qemu_mutex_destroy(&comp_done_lock); + qemu_cond_destroy(&comp_done_cond); + g_free(compress_threads); + g_free(comp_param); + compress_threads = NULL; + comp_param = NULL; +} + +int compress_threads_save_setup(void) +{ + int i, thread_count; + + if (!migrate_compress()) { + return 0; + } + thread_count = migrate_compress_threads(); + compress_threads = g_new0(QemuThread, thread_count); + comp_param = g_new0(CompressParam, thread_count); + qemu_cond_init(&comp_done_cond); + qemu_mutex_init(&comp_done_lock); + for (i = 0; i < thread_count; i++) { + comp_param[i].originbuf = g_try_malloc(qemu_target_page_size()); + if (!comp_param[i].originbuf) { + goto exit; + } + + if (deflateInit(&comp_param[i].stream, + migrate_compress_level()) != Z_OK) { + g_free(comp_param[i].originbuf); + goto exit; + } + + /* comp_param[i].file is just used as a dummy buffer to save data, + * set its ops to empty. + */ + comp_param[i].file = qemu_file_new_output( + QIO_CHANNEL(qio_channel_null_new())); + comp_param[i].done = true; + comp_param[i].quit = false; + qemu_mutex_init(&comp_param[i].mutex); + qemu_cond_init(&comp_param[i].cond); + qemu_thread_create(compress_threads + i, "compress", + do_data_compress, comp_param + i, + QEMU_THREAD_JOINABLE); + } + return 0; + +exit: + compress_threads_save_cleanup(); + return -1; +} + +static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, + RAMBlock *block, ram_addr_t offset, + uint8_t *source_buf) +{ + uint8_t *p = block->host + offset; + size_t page_size = qemu_target_page_size(); + int ret; + + assert(qemu_file_buffer_empty(f)); + + if (buffer_is_zero(p, page_size)) { + return RES_ZEROPAGE; + } + + /* + * copy it to a internal buffer to avoid it being modified by VM + * so that we can catch up the error during compression and + * decompression + */ + memcpy(source_buf, p, page_size); + ret = qemu_put_compression_data(f, stream, source_buf, page_size); + if (ret < 0) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + error_report("compressed data failed!"); + qemu_fflush(f); + return RES_NONE; + } + return RES_COMPRESS; +} + +static inline void compress_reset_result(CompressParam *param) +{ + param->result = RES_NONE; + param->block = NULL; + param->offset = 0; +} + +void flush_compressed_data(int (send_queued_data(CompressParam *))) +{ + int idx, thread_count; + + thread_count = migrate_compress_threads(); + + qemu_mutex_lock(&comp_done_lock); + for (idx = 0; idx < thread_count; idx++) { + while (!comp_param[idx].done) { + qemu_cond_wait(&comp_done_cond, &comp_done_lock); + } + } + qemu_mutex_unlock(&comp_done_lock); + + for (idx = 0; idx < thread_count; idx++) { + qemu_mutex_lock(&comp_param[idx].mutex); + if (!comp_param[idx].quit) { + CompressParam *param = &comp_param[idx]; + send_queued_data(param); + assert(qemu_file_buffer_empty(param->file)); + compress_reset_result(param); + } + qemu_mutex_unlock(&comp_param[idx].mutex); + } +} + +static inline void set_compress_params(CompressParam *param, RAMBlock *block, + ram_addr_t offset) +{ + param->block = block; + param->offset = offset; + param->trigger = true; +} + +int compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, + int (send_queued_data(CompressParam *))) +{ + int idx, thread_count, pages = -1; + bool wait = migrate_compress_wait_thread(); + + thread_count = migrate_compress_threads(); + qemu_mutex_lock(&comp_done_lock); +retry: + for (idx = 0; idx < thread_count; idx++) { + if (comp_param[idx].done) { + CompressParam *param = &comp_param[idx]; + qemu_mutex_lock(¶m->mutex); + param->done = false; + send_queued_data(param); + assert(qemu_file_buffer_empty(param->file)); + compress_reset_result(param); + set_compress_params(param, block, offset); + + qemu_cond_signal(¶m->cond); + qemu_mutex_unlock(¶m->mutex); + pages = 1; + break; + } + } + + /* + * wait for the free thread if the user specifies 'compress-wait-thread', + * otherwise we will post the page out in the main thread as normal page. + */ + if (pages < 0 && wait) { + qemu_cond_wait(&comp_done_cond, &comp_done_lock); + goto retry; + } + qemu_mutex_unlock(&comp_done_lock); + + return pages; +} + +/* return the size after decompression, or negative value on error */ +static int +qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, + const uint8_t *source, size_t source_len) +{ + int err; + + err = inflateReset(stream); + if (err != Z_OK) { + return -1; + } + + stream->avail_in = source_len; + stream->next_in = (uint8_t *)source; + stream->avail_out = dest_len; + stream->next_out = dest; + + err = inflate(stream, Z_NO_FLUSH); + if (err != Z_STREAM_END) { + return -1; + } + + return stream->total_out; +} + +static void *do_data_decompress(void *opaque) +{ + DecompressParam *param = opaque; + unsigned long pagesize; + uint8_t *des; + int len, ret; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->des) { + des = param->des; + len = param->len; + param->des = 0; + qemu_mutex_unlock(¶m->mutex); + + pagesize = qemu_target_page_size(); + + ret = qemu_uncompress_data(¶m->stream, des, pagesize, + param->compbuf, len); + if (ret < 0 && migrate_get_current()->decompress_error_check) { + error_report("decompress data failed"); + qemu_file_set_error(decomp_file, ret); + } + + qemu_mutex_lock(&decomp_done_lock); + param->done = true; + qemu_cond_signal(&decomp_done_cond); + qemu_mutex_unlock(&decomp_done_lock); + + qemu_mutex_lock(¶m->mutex); + } else { + qemu_cond_wait(¶m->cond, ¶m->mutex); + } + } + qemu_mutex_unlock(¶m->mutex); + + return NULL; +} + +int wait_for_decompress_done(void) +{ + int idx, thread_count; + + if (!migrate_compress()) { + return 0; + } + + thread_count = migrate_decompress_threads(); + qemu_mutex_lock(&decomp_done_lock); + for (idx = 0; idx < thread_count; idx++) { + while (!decomp_param[idx].done) { + qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); + } + } + qemu_mutex_unlock(&decomp_done_lock); + return qemu_file_get_error(decomp_file); +} + +void compress_threads_load_cleanup(void) +{ + int i, thread_count; + + if (!migrate_compress()) { + return; + } + thread_count = migrate_decompress_threads(); + for (i = 0; i < thread_count; i++) { + /* + * we use it as a indicator which shows if the thread is + * properly init'd or not + */ + if (!decomp_param[i].compbuf) { + break; + } + + qemu_mutex_lock(&decomp_param[i].mutex); + decomp_param[i].quit = true; + qemu_cond_signal(&decomp_param[i].cond); + qemu_mutex_unlock(&decomp_param[i].mutex); + } + for (i = 0; i < thread_count; i++) { + if (!decomp_param[i].compbuf) { + break; + } + + qemu_thread_join(decompress_threads + i); + qemu_mutex_destroy(&decomp_param[i].mutex); + qemu_cond_destroy(&decomp_param[i].cond); + inflateEnd(&decomp_param[i].stream); + g_free(decomp_param[i].compbuf); + decomp_param[i].compbuf = NULL; + } + g_free(decompress_threads); + g_free(decomp_param); + decompress_threads = NULL; + decomp_param = NULL; + decomp_file = NULL; +} + +int compress_threads_load_setup(QEMUFile *f) +{ + int i, thread_count; + + if (!migrate_compress()) { + return 0; + } + + thread_count = migrate_decompress_threads(); + decompress_threads = g_new0(QemuThread, thread_count); + decomp_param = g_new0(DecompressParam, thread_count); + qemu_mutex_init(&decomp_done_lock); + qemu_cond_init(&decomp_done_cond); + decomp_file = f; + for (i = 0; i < thread_count; i++) { + if (inflateInit(&decomp_param[i].stream) != Z_OK) { + goto exit; + } + + size_t compbuf_size = compressBound(qemu_target_page_size()); + decomp_param[i].compbuf = g_malloc0(compbuf_size); + qemu_mutex_init(&decomp_param[i].mutex); + qemu_cond_init(&decomp_param[i].cond); + decomp_param[i].done = true; + decomp_param[i].quit = false; + qemu_thread_create(decompress_threads + i, "decompress", + do_data_decompress, decomp_param + i, + QEMU_THREAD_JOINABLE); + } + return 0; +exit: + compress_threads_load_cleanup(); + return -1; +} + +void decompress_data_with_multi_threads(QEMUFile *f, void *host, int len) +{ + int idx, thread_count; + + thread_count = migrate_decompress_threads(); + QEMU_LOCK_GUARD(&decomp_done_lock); + while (true) { + for (idx = 0; idx < thread_count; idx++) { + if (decomp_param[idx].done) { + decomp_param[idx].done = false; + qemu_mutex_lock(&decomp_param[idx].mutex); + qemu_get_buffer(f, decomp_param[idx].compbuf, len); + decomp_param[idx].des = host; + decomp_param[idx].len = len; + qemu_cond_signal(&decomp_param[idx].cond); + qemu_mutex_unlock(&decomp_param[idx].mutex); + break; + } + } + if (idx < thread_count) { + break; + } else { + qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); + } + } +} diff --git a/migration/ram-compress.h b/migration/ram-compress.h new file mode 100644 index 0000000000..6f7fe2f472 --- /dev/null +++ b/migration/ram-compress.h @@ -0,0 +1,70 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2011-2015 Red Hat Inc + * + * Authors: + * Juan Quintela <quintela@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_MIGRATION_COMPRESS_H +#define QEMU_MIGRATION_COMPRESS_H + +#include "qemu-file.h" + +enum CompressResult { + RES_NONE = 0, + RES_ZEROPAGE = 1, + RES_COMPRESS = 2 +}; +typedef enum CompressResult CompressResult; + +struct CompressParam { + bool done; + bool quit; + bool trigger; + CompressResult result; + QEMUFile *file; + QemuMutex mutex; + QemuCond cond; + RAMBlock *block; + ram_addr_t offset; + + /* internally used fields */ + z_stream stream; + uint8_t *originbuf; +}; +typedef struct CompressParam CompressParam; + +void compress_threads_save_cleanup(void); +int compress_threads_save_setup(void); + +void flush_compressed_data(int (send_queued_data(CompressParam *))); +int compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, + int (send_queued_data(CompressParam *))); + +int wait_for_decompress_done(void); +void compress_threads_load_cleanup(void); +int compress_threads_load_setup(QEMUFile *f); +void decompress_data_with_multi_threads(QEMUFile *f, void *host, int len); + +#endif diff --git a/migration/ram.c b/migration/ram.c index 5e7bf20ca5..f78e9912cd 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -32,8 +32,8 @@ #include "qemu/bitmap.h" #include "qemu/madvise.h" #include "qemu/main-loop.h" -#include "io/channel-null.h" #include "xbzrle.h" +#include "ram-compress.h" #include "ram.h" #include "migration.h" #include "migration-stats.h" @@ -480,56 +480,8 @@ typedef struct MigrationOps MigrationOps; MigrationOps *migration_ops; -CompressionStats compression_counters; - -struct CompressParam { - bool done; - bool quit; - bool zero_page; - QEMUFile *file; - QemuMutex mutex; - QemuCond cond; - RAMBlock *block; - ram_addr_t offset; - - /* internally used fields */ - z_stream stream; - uint8_t *originbuf; -}; -typedef struct CompressParam CompressParam; - -struct DecompressParam { - bool done; - bool quit; - QemuMutex mutex; - QemuCond cond; - void *des; - uint8_t *compbuf; - int len; - z_stream stream; -}; -typedef struct DecompressParam DecompressParam; - -static CompressParam *comp_param; -static QemuThread *compress_threads; -/* comp_done_cond is used to wake up the migration thread when - * one of the compression threads has finished the compression. - * comp_done_lock is used to co-work with comp_done_cond. - */ -static QemuMutex comp_done_lock; -static QemuCond comp_done_cond; - -static QEMUFile *decomp_file; -static DecompressParam *decomp_param; -static QemuThread *decompress_threads; -static QemuMutex decomp_done_lock; -static QemuCond decomp_done_cond; - static int ram_save_host_page_urgent(PageSearchStatus *pss); -static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, - ram_addr_t offset, uint8_t *source_buf); - /* NOTE: page is the PFN not real ram_addr_t. */ static void pss_init(PageSearchStatus *pss, RAMBlock *rb, ram_addr_t page) { @@ -548,123 +500,6 @@ static bool pss_overlap(PageSearchStatus *pss1, PageSearchStatus *pss2) (pss1->host_page_start == pss2->host_page_start); } -static void *do_data_compress(void *opaque) -{ - CompressParam *param = opaque; - RAMBlock *block; - ram_addr_t offset; - bool zero_page; - - qemu_mutex_lock(¶m->mutex); - while (!param->quit) { - if (param->block) { - block = param->block; - offset = param->offset; - param->block = NULL; - qemu_mutex_unlock(¶m->mutex); - - zero_page = do_compress_ram_page(param->file, ¶m->stream, - block, offset, param->originbuf); - - qemu_mutex_lock(&comp_done_lock); - param->done = true; - param->zero_page = zero_page; - qemu_cond_signal(&comp_done_cond); - qemu_mutex_unlock(&comp_done_lock); - - qemu_mutex_lock(¶m->mutex); - } else { - qemu_cond_wait(¶m->cond, ¶m->mutex); - } - } - qemu_mutex_unlock(¶m->mutex); - - return NULL; -} - -static void compress_threads_save_cleanup(void) -{ - int i, thread_count; - - if (!migrate_compress() || !comp_param) { - return; - } - - thread_count = migrate_compress_threads(); - for (i = 0; i < thread_count; i++) { - /* - * we use it as a indicator which shows if the thread is - * properly init'd or not - */ - if (!comp_param[i].file) { - break; - } - - qemu_mutex_lock(&comp_param[i].mutex); - comp_param[i].quit = true; - qemu_cond_signal(&comp_param[i].cond); - qemu_mutex_unlock(&comp_param[i].mutex); - - qemu_thread_join(compress_threads + i); - qemu_mutex_destroy(&comp_param[i].mutex); - qemu_cond_destroy(&comp_param[i].cond); - deflateEnd(&comp_param[i].stream); - g_free(comp_param[i].originbuf); - qemu_fclose(comp_param[i].file); - comp_param[i].file = NULL; - } - qemu_mutex_destroy(&comp_done_lock); - qemu_cond_destroy(&comp_done_cond); - g_free(compress_threads); - g_free(comp_param); - compress_threads = NULL; - comp_param = NULL; -} - -static int compress_threads_save_setup(void) -{ - int i, thread_count; - - if (!migrate_compress()) { - return 0; - } - thread_count = migrate_compress_threads(); - compress_threads = g_new0(QemuThread, thread_count); - comp_param = g_new0(CompressParam, thread_count); - qemu_cond_init(&comp_done_cond); - qemu_mutex_init(&comp_done_lock); - for (i = 0; i < thread_count; i++) { - comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE); - if (!comp_param[i].originbuf) { - goto exit; - } - - if (deflateInit(&comp_param[i].stream, - migrate_compress_level()) != Z_OK) { - g_free(comp_param[i].originbuf); - goto exit; - } - - /* comp_param[i].file is just used as a dummy buffer to save data, - * set its ops to empty. - */ - comp_param[i].file = qemu_file_new_output( - QIO_CHANNEL(qio_channel_null_new())); - comp_param[i].done = true; - comp_param[i].quit = false; - qemu_mutex_init(&comp_param[i].mutex); - qemu_cond_init(&comp_param[i].cond); - qemu_thread_create(compress_threads + i, "compress", - do_data_compress, comp_param + i, - QEMU_THREAD_JOINABLE); - } - return 0; - -exit: - compress_threads_save_cleanup(); - return -1; -} - /** * save_page_header: write page header to wire * @@ -1452,40 +1287,12 @@ static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, return 1; } -static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, - ram_addr_t offset, uint8_t *source_buf) -{ - RAMState *rs = ram_state; - PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; - uint8_t *p = block->host + offset; - int ret; - - if (save_zero_page_to_file(pss, f, block, offset)) { - return true; - } - - save_page_header(pss, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); - - /* - * copy it to a internal buffer to avoid it being modified by VM - * so that we can catch up the error during compression and - * decompression - */ - memcpy(source_buf, p, TARGET_PAGE_SIZE); - ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE); - if (ret < 0) { - qemu_file_set_error(migrate_get_current()->to_dst_file, ret); - error_report("compressed data failed!"); - } - return false; -} - static void update_compress_thread_counts(const CompressParam *param, int bytes_xmit) { ram_transferred_add(bytes_xmit); - if (param->zero_page) { + if (param->result == RES_ZEROPAGE) { stat64_add(&mig_stats.zero_pages, 1); return; } @@ -1497,81 +1304,49 @@ update_compress_thread_counts(const CompressParam *param, int bytes_xmit) static bool save_page_use_compression(RAMState *rs); -static void flush_compressed_data(RAMState *rs) +static int send_queued_data(CompressParam *param) { + PageSearchStatus *pss = &ram_state->pss[RAM_CHANNEL_PRECOPY]; MigrationState *ms = migrate_get_current(); - int idx, len, thread_count; + QEMUFile *file = ms->to_dst_file; + int len = 0; - if (!save_page_use_compression(rs)) { - return; - } - thread_count = migrate_compress_threads(); + RAMBlock *block = param->block; + ram_addr_t offset = param->offset; - qemu_mutex_lock(&comp_done_lock); - for (idx = 0; idx < thread_count; idx++) { - while (!comp_param[idx].done) { - qemu_cond_wait(&comp_done_cond, &comp_done_lock); - } + if (param->result == RES_NONE) { + return 0; } - qemu_mutex_unlock(&comp_done_lock); - for (idx = 0; idx < thread_count; idx++) { - qemu_mutex_lock(&comp_param[idx].mutex); - if (!comp_param[idx].quit) { - len = qemu_put_qemu_file(ms->to_dst_file, comp_param[idx].file); - /* - * it's safe to fetch zero_page without holding comp_done_lock - * as there is no further request submitted to the thread, - * i.e, the thread should be waiting for a request at this point. - */ - update_compress_thread_counts(&comp_param[idx], len); - } - qemu_mutex_unlock(&comp_param[idx].mutex); + assert(block == pss->last_sent_block); + + if (param->result == RES_ZEROPAGE) { + assert(qemu_file_buffer_empty(param->file)); + len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO); + qemu_put_byte(file, 0); + len += 1; + ram_release_page(block->idstr, offset); + } else if (param->result == RES_COMPRESS) { + assert(!qemu_file_buffer_empty(param->file)); + len += save_page_header(pss, file, block, + offset | RAM_SAVE_FLAG_COMPRESS_PAGE); + len += qemu_put_qemu_file(file, param->file); + } else { + abort(); } -} -static inline void set_compress_params(CompressParam *param, RAMBlock *block, - ram_addr_t offset) -{ - param->block = block; - param->offset = offset; + update_compress_thread_counts(param, len); + + return len; } -static int compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset) +static void ram_flush_compressed_data(RAMState *rs) { - int idx, thread_count, bytes_xmit = -1, pages = -1; - bool wait = migrate_compress_wait_thread(); - MigrationState *ms = migrate_get_current(); - - thread_count = migrate_compress_threads(); - qemu_mutex_lock(&comp_done_lock); -retry: - for (idx = 0; idx < thread_count; idx++) { - if (comp_param[idx].done) { - comp_param[idx].done = false; - bytes_xmit = qemu_put_qemu_file(ms->to_dst_file, - comp_param[idx].file); - qemu_mutex_lock(&comp_param[idx].mutex); - set_compress_params(&comp_param[idx], block, offset); - qemu_cond_signal(&comp_param[idx].cond); - qemu_mutex_unlock(&comp_param[idx].mutex); - pages = 1; - update_compress_thread_counts(&comp_param[idx], bytes_xmit); - break; - } - } - - /* - * wait for the free thread if the user specifies 'compress-wait-thread', - * otherwise we will post the page out in the main thread as normal page. - */ - if (pages < 0 && wait) { - qemu_cond_wait(&comp_done_cond, &comp_done_lock); - goto retry; + if (!save_page_use_compression(rs)) { + return; } - qemu_mutex_unlock(&comp_done_lock); - return pages; + flush_compressed_data(send_queued_data); } #define PAGE_ALL_CLEAN 0 @@ -1628,7 +1403,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) * Also If xbzrle is on, stop using the data compression at this * point. In theory, xbzrle can do better than compression. */ - flush_compressed_data(rs); + ram_flush_compressed_data(rs); /* Hit the end of the list */ pss->block = QLIST_FIRST_RCU(&ram_list.blocks); @@ -2318,11 +2093,11 @@ static bool save_compress_page(RAMState *rs, PageSearchStatus *pss, * much CPU resource. */ if (block != pss->last_sent_block) { - flush_compressed_data(rs); + ram_flush_compressed_data(rs); return false; } - if (compress_page_with_multi_thread(block, offset) > 0) { + if (compress_page_with_multi_thread(block, offset, send_queued_data) > 0) { return true; } @@ -3368,7 +3143,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) * page is sent in one chunk. */ if (migrate_postcopy_ram()) { - flush_compressed_data(rs); + ram_flush_compressed_data(rs); } /* @@ -3463,7 +3238,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) } qemu_mutex_unlock(&rs->bitmap_mutex); - flush_compressed_data(rs); + ram_flush_compressed_data(rs); ram_control_after_iterate(f, RAM_CONTROL_FINISH); } @@ -3674,192 +3449,6 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size) } } -/* return the size after decompression, or negative value on error */ -static int -qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, - const uint8_t *source, size_t source_len) -{ - int err; - - err = inflateReset(stream); - if (err != Z_OK) { - return -1; - } - - stream->avail_in = source_len; - stream->next_in = (uint8_t *)source; - stream->avail_out = dest_len; - stream->next_out = dest; - - err = inflate(stream, Z_NO_FLUSH); - if (err != Z_STREAM_END) { - return -1; - } - - return stream->total_out; -} - -static void *do_data_decompress(void *opaque) -{ - DecompressParam *param = opaque; - unsigned long pagesize; - uint8_t *des; - int len, ret; - - qemu_mutex_lock(¶m->mutex); - while (!param->quit) { - if (param->des) { - des = param->des; - len = param->len; - param->des = 0; - qemu_mutex_unlock(¶m->mutex); - - pagesize = TARGET_PAGE_SIZE; - - ret = qemu_uncompress_data(¶m->stream, des, pagesize, - param->compbuf, len); - if (ret < 0 && migrate_get_current()->decompress_error_check) { - error_report("decompress data failed"); - qemu_file_set_error(decomp_file, ret); - } - - qemu_mutex_lock(&decomp_done_lock); - param->done = true; - qemu_cond_signal(&decomp_done_cond); - qemu_mutex_unlock(&decomp_done_lock); - - qemu_mutex_lock(¶m->mutex); - } else { - qemu_cond_wait(¶m->cond, ¶m->mutex); - } - } - qemu_mutex_unlock(¶m->mutex); - - return NULL; -} - -static int wait_for_decompress_done(void) -{ - int idx, thread_count; - - if (!migrate_compress()) { - return 0; - } - - thread_count = migrate_decompress_threads(); - qemu_mutex_lock(&decomp_done_lock); - for (idx = 0; idx < thread_count; idx++) { - while (!decomp_param[idx].done) { - qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); - } - } - qemu_mutex_unlock(&decomp_done_lock); - return qemu_file_get_error(decomp_file); -} - -static void compress_threads_load_cleanup(void) -{ - int i, thread_count; - - if (!migrate_compress()) { - return; - } - thread_count = migrate_decompress_threads(); - for (i = 0; i < thread_count; i++) { - /* - * we use it as a indicator which shows if the thread is - * properly init'd or not - */ - if (!decomp_param[i].compbuf) { - break; - } - - qemu_mutex_lock(&decomp_param[i].mutex); - decomp_param[i].quit = true; - qemu_cond_signal(&decomp_param[i].cond); - qemu_mutex_unlock(&decomp_param[i].mutex); - } - for (i = 0; i < thread_count; i++) { - if (!decomp_param[i].compbuf) { - break; - } - - qemu_thread_join(decompress_threads + i); - qemu_mutex_destroy(&decomp_param[i].mutex); - qemu_cond_destroy(&decomp_param[i].cond); - inflateEnd(&decomp_param[i].stream); - g_free(decomp_param[i].compbuf); - decomp_param[i].compbuf = NULL; - } - g_free(decompress_threads); - g_free(decomp_param); - decompress_threads = NULL; - decomp_param = NULL; - decomp_file = NULL; -} - -static int compress_threads_load_setup(QEMUFile *f) -{ - int i, thread_count; - - if (!migrate_compress()) { - return 0; - } - - thread_count = migrate_decompress_threads(); - decompress_threads = g_new0(QemuThread, thread_count); - decomp_param = g_new0(DecompressParam, thread_count); - qemu_mutex_init(&decomp_done_lock); - qemu_cond_init(&decomp_done_cond); - decomp_file = f; - for (i = 0; i < thread_count; i++) { - if (inflateInit(&decomp_param[i].stream) != Z_OK) { - goto exit; - } - - decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE)); - qemu_mutex_init(&decomp_param[i].mutex); - qemu_cond_init(&decomp_param[i].cond); - decomp_param[i].done = true; - decomp_param[i].quit = false; - qemu_thread_create(decompress_threads + i, "decompress", - do_data_decompress, decomp_param + i, - QEMU_THREAD_JOINABLE); - } - return 0; -exit: - compress_threads_load_cleanup(); - return -1; -} - -static void decompress_data_with_multi_threads(QEMUFile *f, - void *host, int len) -{ - int idx, thread_count; - - thread_count = migrate_decompress_threads(); - QEMU_LOCK_GUARD(&decomp_done_lock); - while (true) { - for (idx = 0; idx < thread_count; idx++) { - if (decomp_param[idx].done) { - decomp_param[idx].done = false; - qemu_mutex_lock(&decomp_param[idx].mutex); - qemu_get_buffer(f, decomp_param[idx].compbuf, len); - decomp_param[idx].des = host; - decomp_param[idx].len = len; - qemu_cond_signal(&decomp_param[idx].cond); - qemu_mutex_unlock(&decomp_param[idx].mutex); - break; - } - } - if (idx < thread_count) { - break; - } else { - qemu_cond_wait(&decomp_done_cond, &decomp_done_lock); - } - } -} - static void colo_init_ram_state(void) { ram_state_init(&ram_state); @@ -3969,10 +3558,6 @@ void colo_release_ram_cache(void) */ static int ram_load_setup(QEMUFile *f, void *opaque) { - if (compress_threads_load_setup(f)) { - return -1; - } - xbzrle_load_setup(); ramblock_recv_map_init(); @@ -3988,7 +3573,6 @@ static int ram_load_cleanup(void *opaque) } xbzrle_load_cleanup(); - compress_threads_load_cleanup(); RAMBLOCK_FOREACH_NOT_IGNORED(rb) { g_free(rb->receivedmap); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 823320fe42..4187759f10 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -809,7 +809,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "pfthreshold", "avic", NULL, "v-vmsave-vmload", "vgif", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "vnmi", NULL, NULL, "svme-addr-chk", NULL, NULL, NULL, }, .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, @@ -933,15 +933,31 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, "wbnoinvd", NULL, NULL, "ibpb", NULL, "ibrs", "amd-stibp", - NULL, NULL, NULL, NULL, + NULL, "stibp-always-on", NULL, NULL, NULL, NULL, NULL, NULL, "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, - NULL, NULL, NULL, NULL, + "amd-psfd", NULL, NULL, NULL, }, .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, .tcg_features = 0, .unmigratable_flags = 0, }, + [FEAT_8000_0021_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + NULL, NULL, "null-sel-clr-base", NULL, + "auto-ibrs", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_XSAVE] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1620,6 +1636,7 @@ typedef struct X86CPUVersionDefinition { const char *alias; const char *note; PropValue *props; + const CPUCaches *const cache_info; } X86CPUVersionDefinition; /* Base definition for a CPU model */ @@ -1728,6 +1745,56 @@ static const CPUCaches epyc_cache_info = { }, }; +static CPUCaches epyc_v4_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -1778,6 +1845,56 @@ static const CPUCaches epyc_rome_cache_info = { }, }; +static const CPUCaches epyc_rome_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -1828,6 +1945,106 @@ static const CPUCaches epyc_milan_cache_info = { }, }; +static const CPUCaches epyc_milan_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + +static const CPUCaches epyc_genoa_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -4112,6 +4329,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 4, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-v4 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v4_cache_info + }, { /* end of list */ } } }, @@ -4231,6 +4457,15 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Rome-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v3_cache_info + }, { /* end of list */ } } }, @@ -4288,6 +4523,98 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x8000001E, .model_id = "AMD EPYC-Milan Processor", .cache_info = &epyc_milan_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Milan-v2 Processor" }, + { "vaes", "on" }, + { "vpclmulqdq", "on" }, + { "stibp-always-on", "on" }, + { "amd-psfd", "on" }, + { "no-nested-data-bp", "on" }, + { "lfence-always-serializing", "on" }, + { "null-sel-clr-base", "on" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v2_cache_info + }, + { /* end of list */ } + } + }, + { + .name = "EPYC-Genoa", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 25, + .model = 17, + .stepping = 0, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_No_NESTED_DATA_BP | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | + CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Genoa Processor", + .cache_info = &epyc_genoa_cache_info, }, }; @@ -5225,6 +5552,31 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) assert(vdef->version == version); } +static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, + X86CPUModel *model) +{ + const X86CPUVersionDefinition *vdef; + X86CPUVersion version = x86_cpu_model_resolve_version(model); + const CPUCaches *cache_info = model->cpudef->cache_info; + + if (version == CPU_VERSION_LEGACY) { + return cache_info; + } + + for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { + if (vdef->cache_info) { + cache_info = vdef->cache_info; + } + + if (vdef->version == version) { + break; + } + } + + assert(vdef->version == version); + return cache_info; +} + /* * Load data from X86CPUDefinition into a X86CPU object. * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. @@ -5257,7 +5609,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) } /* legacy-cache defaults to 'off' if CPU model provides cache info */ - cpu->legacy_cache = !def->cache_info; + cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; @@ -6024,6 +6376,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } break; + case 0x80000021: + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = *ecx = *edx = 0; + break; default: /* reserved values: zero */ *eax = 0; @@ -6453,6 +6809,10 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); } + if (env->features[FEAT_8000_0021_EAX]) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); + } + /* SGX requires CPUID[0x12] for EPC enumeration */ if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); @@ -6736,14 +7096,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) /* Cache information initialization */ if (!cpu->legacy_cache) { - if (!xcc->model || !xcc->model->cpudef->cache_info) { + const CPUCaches *cache_info = + x86_cpu_get_versioned_cache_info(cpu, xcc->model); + + if (!xcc->model || !cache_info) { g_autofree char *name = x86_cpu_class_get_model_name(xcc); error_setg(errp, "CPU model '%s' doesn't support legacy-cache=off", name); return; } env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *xcc->model->cpudef->cache_info; + *cache_info; } else { /* Build legacy cache information */ env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8504aaac68..8ade71ab55 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -600,6 +600,7 @@ typedef enum FeatureWord { FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ + FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ @@ -773,6 +774,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_SVM_AVIC (1U << 13) #define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) #define CPUID_SVM_VGIF (1U << 16) +#define CPUID_SVM_VNMI (1U << 25) #define CPUID_SVM_SVME_ADDR_CHK (1U << 28) /* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ @@ -946,8 +948,21 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_8000_0008_EBX_IBRS (1U << 14) /* Single Thread Indirect Branch Predictors */ #define CPUID_8000_0008_EBX_STIBP (1U << 15) +/* STIBP mode has enhanced performance and may be left always on */ +#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) /* Speculative Store Bypass Disable */ #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) +/* Predictive Store Forwarding Disable */ +#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + +/* Processor ignores nested data breakpoints */ +#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) +/* LFENCE is always serializing */ +#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) +/* Null Selector Clears Base */ +#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) +/* Automatic IBRS */ +#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c index 7adc1bdff9..d8ac99c9a4 100644 --- a/target/loongarch/machine.c +++ b/target/loongarch/machine.c @@ -163,5 +163,6 @@ const VMStateDescription vmstate_loongarch_cpu = { .subsections = (const VMStateDescription*[]) { &vmstate_fpu, &vmstate_lsx, + NULL } }; diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index be73ec3c06..8a5df84624 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -406,6 +406,41 @@ static void migrate_set_parameter_str(QTestState *who, const char *parameter, migrate_check_parameter_str(who, parameter, value); } +static long long migrate_get_parameter_bool(QTestState *who, + const char *parameter) +{ + QDict *rsp; + int result; + + rsp = wait_command(who, "{ 'execute': 'query-migrate-parameters' }"); + result = qdict_get_bool(rsp, parameter); + qobject_unref(rsp); + return !!result; +} + +static void migrate_check_parameter_bool(QTestState *who, const char *parameter, + int value) +{ + int result; + + result = migrate_get_parameter_bool(who, parameter); + g_assert_cmpint(result, ==, value); +} + +static void migrate_set_parameter_bool(QTestState *who, const char *parameter, + int value) +{ + QDict *rsp; + + rsp = qtest_qmp(who, + "{ 'execute': 'migrate-set-parameters'," + "'arguments': { %s: %i } }", + parameter, value); + g_assert(qdict_haskey(rsp, "return")); + qobject_unref(rsp); + migrate_check_parameter_bool(who, parameter, value); +} + static void migrate_ensure_non_converge(QTestState *who) { /* Can't converge with 1ms downtime + 3 mbs bandwidth limit */ @@ -1092,6 +1127,36 @@ test_migrate_tls_x509_finish(QTestState *from, #endif /* CONFIG_TASN1 */ #endif /* CONFIG_GNUTLS */ +static void * +test_migrate_compress_start(QTestState *from, + QTestState *to) +{ + migrate_set_parameter_int(from, "compress-level", 1); + migrate_set_parameter_int(from, "compress-threads", 4); + migrate_set_parameter_bool(from, "compress-wait-thread", true); + migrate_set_parameter_int(to, "decompress-threads", 4); + + migrate_set_capability(from, "compress", true); + migrate_set_capability(to, "compress", true); + + return NULL; +} + +static void * +test_migrate_compress_nowait_start(QTestState *from, + QTestState *to) +{ + migrate_set_parameter_int(from, "compress-level", 9); + migrate_set_parameter_int(from, "compress-threads", 1); + migrate_set_parameter_bool(from, "compress-wait-thread", false); + migrate_set_parameter_int(to, "decompress-threads", 1); + + migrate_set_capability(from, "compress", true); + migrate_set_capability(to, "compress", true); + + return NULL; +} + static int migrate_postcopy_prepare(QTestState **from_ptr, QTestState **to_ptr, MigrateCommon *args) @@ -1169,6 +1234,15 @@ static void test_postcopy(void) test_postcopy_common(&args); } +static void test_postcopy_compress(void) +{ + MigrateCommon args = { + .start_hook = test_migrate_compress_start + }; + + test_postcopy_common(&args); +} + static void test_postcopy_preempt(void) { MigrateCommon args = { @@ -1270,6 +1344,15 @@ static void test_postcopy_recovery(void) test_postcopy_recovery_common(&args); } +static void test_postcopy_recovery_compress(void) +{ + MigrateCommon args = { + .start_hook = test_migrate_compress_start + }; + + test_postcopy_recovery_common(&args); +} + #ifdef CONFIG_GNUTLS static void test_postcopy_recovery_tls_psk(void) { @@ -1303,6 +1386,7 @@ static void test_postcopy_preempt_all(void) test_postcopy_recovery_common(&args); } + #endif static void test_baddest(void) @@ -1524,6 +1608,40 @@ static void test_precopy_unix_xbzrle(void) test_precopy_common(&args); } +static void test_precopy_unix_compress(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = uri, + .start_hook = test_migrate_compress_start, + /* + * Test that no invalid thread state is left over from + * the previous iteration. + */ + .iterations = 2, + }; + + test_precopy_common(&args); +} + +static void test_precopy_unix_compress_nowait(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .connect_uri = uri, + .listen_uri = uri, + .start_hook = test_migrate_compress_nowait_start, + /* + * Test that no invalid thread state is left over from + * the previous iteration. + */ + .iterations = 2, + }; + + test_precopy_common(&args); +} + static void test_precopy_tcp_plain(void) { MigrateCommon args = { @@ -2532,11 +2650,27 @@ int main(int argc, char **argv) qtest_add_func("/migration/postcopy/preempt/plain", test_postcopy_preempt); qtest_add_func("/migration/postcopy/preempt/recovery/plain", test_postcopy_preempt_recovery); + if (getenv("QEMU_TEST_FLAKY_TESTS")) { + qtest_add_func("/migration/postcopy/compress/plain", + test_postcopy_compress); + qtest_add_func("/migration/postcopy/recovery/compress/plain", + test_postcopy_recovery_compress); + } } qtest_add_func("/migration/bad_dest", test_baddest); qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain); qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle); + /* + * Compression fails from time to time. + * Put test here but don't enable it until everything is fixed. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { + qtest_add_func("/migration/precopy/unix/compress/wait", + test_precopy_unix_compress); + qtest_add_func("/migration/precopy/unix/compress/nowait", + test_precopy_unix_compress_nowait); + } #ifdef CONFIG_GNUTLS qtest_add_func("/migration/precopy/unix/tls/psk", test_precopy_unix_tls_psk); diff --git a/tests/unit/test-aio-multithread.c b/tests/unit/test-aio-multithread.c index a555cc8835..80c5d4e2e6 100644 --- a/tests/unit/test-aio-multithread.c +++ b/tests/unit/test-aio-multithread.c @@ -107,8 +107,7 @@ static void test_lifecycle(void) /* aio_co_schedule test. */ static Coroutine *to_schedule[NUM_CONTEXTS]; - -static bool now_stopping; +static bool stop[NUM_CONTEXTS]; static int count_retry; static int count_here; @@ -136,6 +135,7 @@ static bool schedule_next(int n) static void finish_cb(void *opaque) { + stop[id] = true; schedule_next(id); } @@ -143,13 +143,19 @@ static coroutine_fn void test_multi_co_schedule_entry(void *opaque) { g_assert(to_schedule[id] == NULL); - while (!qatomic_mb_read(&now_stopping)) { + /* + * The next iteration will set to_schedule[id] again, but once finish_cb + * is scheduled there is no guarantee that it will actually be woken up, + * so at that point it must not go to sleep. + */ + while (!stop[id]) { int n; n = g_test_rand_int_range(0, NUM_CONTEXTS); schedule_next(n); qatomic_mb_set(&to_schedule[id], qemu_coroutine_self()); + /* finish_cb can run here. */ qemu_coroutine_yield(); g_assert(to_schedule[id] == NULL); } @@ -161,7 +167,6 @@ static void test_multi_co_schedule(int seconds) int i; count_here = count_other = count_retry = 0; - now_stopping = false; create_aio_contexts(); for (i = 0; i < NUM_CONTEXTS; i++) { @@ -171,10 +176,10 @@ static void test_multi_co_schedule(int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + /* Guarantee that each AioContext is woken up from its last wait. */ for (i = 0; i < NUM_CONTEXTS; i++) { ctx_run(i, finish_cb, NULL); - to_schedule[i] = NULL; + g_assert(to_schedule[i] == NULL); } join_aio_contexts(); @@ -199,10 +204,11 @@ static uint32_t atomic_counter; static uint32_t running; static uint32_t counter; static CoMutex comutex; +static bool now_stopping; static void coroutine_fn test_multi_co_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { qemu_co_mutex_lock(&comutex); counter++; qemu_co_mutex_unlock(&comutex); @@ -236,7 +242,7 @@ static void test_multi_co_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } @@ -327,7 +333,7 @@ static void mcs_mutex_unlock(void) static void test_multi_fair_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { mcs_mutex_lock(); counter++; mcs_mutex_unlock(); @@ -355,7 +361,7 @@ static void test_multi_fair_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } @@ -383,7 +389,7 @@ static QemuMutex mutex; static void test_multi_mutex_entry(void *opaque) { - while (!qatomic_mb_read(&now_stopping)) { + while (!qatomic_read(&now_stopping)) { qemu_mutex_lock(&mutex); counter++; qemu_mutex_unlock(&mutex); @@ -411,7 +417,7 @@ static void test_multi_mutex(int threads, int seconds) g_usleep(seconds * 1000000); - qatomic_mb_set(&now_stopping, true); + qatomic_set(&now_stopping, true); while (running > 0) { g_usleep(100000); } diff --git a/util/rcu.c b/util/rcu.c index b6d6c71cff..30a7e22026 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -83,12 +83,6 @@ static void wait_for_readers(void) */ qemu_event_reset(&rcu_gp_event); - /* Instead of using qatomic_mb_set for index->waiting, and - * qatomic_mb_read for index->ctr, memory barriers are placed - * manually since writes to different threads are independent. - * qemu_event_reset has acquire semantics, so no memory barrier - * is needed here. - */ QLIST_FOREACH(index, ®istry, node) { qatomic_set(&index->waiting, true); } @@ -96,6 +90,10 @@ static void wait_for_readers(void) /* Here, order the stores to index->waiting before the loads of * index->ctr. Pairs with smp_mb_placeholder() in rcu_read_unlock(), * ensuring that the loads of index->ctr are sequentially consistent. + * + * If this is the last iteration, this barrier also prevents + * frees from seeping upwards, and orders the two wait phases + * on architectures with 32-bit longs; see synchronize_rcu(). */ smp_mb_global(); @@ -104,7 +102,7 @@ static void wait_for_readers(void) QLIST_REMOVE(index, node); QLIST_INSERT_HEAD(&qsreaders, index, node); - /* No need for mb_set here, worst of all we + /* No need for memory barriers here, worst of all we * get some extra futex wakeups. */ qatomic_set(&index->waiting, false); @@ -149,26 +147,26 @@ void synchronize_rcu(void) /* Write RCU-protected pointers before reading p_rcu_reader->ctr. * Pairs with smp_mb_placeholder() in rcu_read_lock(). + * + * Also orders write to RCU-protected pointers before + * write to rcu_gp_ctr. */ smp_mb_global(); QEMU_LOCK_GUARD(&rcu_registry_lock); if (!QLIST_EMPTY(®istry)) { - /* In either case, the qatomic_mb_set below blocks stores that free - * old RCU-protected pointers. - */ if (sizeof(rcu_gp_ctr) < 8) { /* For architectures with 32-bit longs, a two-subphases algorithm * ensures we do not encounter overflow bugs. * * Switch parity: 0 -> 1, 1 -> 0. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); wait_for_readers(); - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); } else { /* Increment current grace period. */ - qatomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); + qatomic_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); } wait_for_readers(); @@ -191,8 +189,22 @@ static void enqueue(struct rcu_head *node) struct rcu_head **old_tail; node->next = NULL; + + /* + * Make this node the tail of the list. The node will be + * used by further enqueue operations, but it will not + * be dequeued yet... + */ old_tail = qatomic_xchg(&tail, &node->next); - qatomic_mb_set(old_tail, node); + + /* + * ... until it is pointed to from another item in the list. + * In the meantime, try_dequeue() will find a NULL next pointer + * and loop. + * + * Synchronizes with qatomic_load_acquire() in try_dequeue(). + */ + qatomic_store_release(old_tail, node); } static struct rcu_head *try_dequeue(void) @@ -200,26 +212,31 @@ static struct rcu_head *try_dequeue(void) struct rcu_head *node, *next; retry: - /* Test for an empty list, which we do not expect. Note that for + /* Head is only written by this thread, so no need for barriers. */ + node = head; + + /* + * If the head node has NULL in its next pointer, the value is + * wrong and we need to wait until its enqueuer finishes the update. + */ + next = qatomic_load_acquire(&node->next); + if (!next) { + return NULL; + } + + /* + * Test for an empty list, which we do not expect. Note that for * the consumer head and tail are always consistent. The head * is consistent because only the consumer reads/writes it. * The tail, because it is the first step in the enqueuing. * It is only the next pointers that might be inconsistent. */ - if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) { + if (head == &dummy && qatomic_read(&tail) == &dummy.next) { abort(); } - /* If the head node has NULL in its next pointer, the value is - * wrong and we need to wait until its enqueuer finishes the update. - */ - node = head; - next = qatomic_mb_read(&head->next); - if (!next) { - return NULL; - } - - /* Since we are the sole consumer, and we excluded the empty case + /* + * Since we are the sole consumer, and we excluded the empty case * above, the queue will always have at least two nodes: the * dummy node, and the one being removed. So we do not need to update * the tail pointer. |