summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS11
-rw-r--r--accel/kvm/kvm-all.c2
-rwxr-xr-xconfigure27
-rw-r--r--cpus-common.c4
-rw-r--r--cpus.c186
-rw-r--r--disas/m68k.c1
-rw-r--r--dump.c2
-rw-r--r--hmp-commands-info.hx22
-rw-r--r--hmp-commands.hx15
-rw-r--r--hmp.c24
-rw-r--r--hmp.h1
-rw-r--r--hw/acpi/piix4.c2
-rw-r--r--hw/arm/pxa2xx.c2
-rw-r--r--hw/audio/cs4231a.c1
-rw-r--r--hw/audio/es1370.c235
-rw-r--r--hw/audio/gusemu_hal.c1
-rw-r--r--hw/audio/sb16.c11
-rw-r--r--hw/display/cg3.c1
-rw-r--r--hw/display/cirrus_vga.c3
-rw-r--r--hw/i2c/pm_smbus.c254
-rw-r--r--hw/i2c/smbus.c37
-rw-r--r--hw/i2c/smbus_ich9.c26
-rw-r--r--hw/i386/pc.c16
-rw-r--r--hw/intc/apic.c42
-rw-r--r--hw/ipmi/isa_ipmi_bt.c68
-rw-r--r--hw/isa/vt82c686.c2
-rw-r--r--hw/mem/pc-dimm.c61
-rw-r--r--hw/misc/vmcoreinfo.c6
-rw-r--r--hw/ppc/prep.c3
-rw-r--r--hw/ppc/spapr.c30
-rw-r--r--hw/scsi/lsi53c895a.c4
-rw-r--r--hw/scsi/megasas.c2
-rw-r--r--hw/scsi/mptsas.c1
-rw-r--r--hw/scsi/vhost-scsi-common.c3
-rw-r--r--hw/scsi/vhost-scsi.c3
-rw-r--r--hw/scsi/vhost-user-scsi.c28
-rw-r--r--hw/timer/mc146818rtc.c20
-rw-r--r--hw/timer/sh_timer.c1
-rw-r--r--include/chardev/char-fe.h10
-rw-r--r--include/hw/i2c/pm_smbus.h24
-rw-r--r--include/hw/i2c/smbus.h17
-rw-r--r--include/hw/mem/pc-dimm.h5
-rw-r--r--include/hw/misc/vmcoreinfo.h12
-rw-r--r--include/hw/nvram/fw_cfg.h18
-rw-r--r--include/hw/nvram/fw_cfg_keys.h45
-rw-r--r--include/hw/virtio/vhost-scsi-common.h1
-rw-r--r--include/hw/virtio/vhost-user-scsi.h1
-rw-r--r--include/qemu/main-loop.h4
-rw-r--r--include/qemu/qht.h1
-rw-r--r--include/qemu/qsp.h29
-rw-r--r--include/qemu/rcu_queue.h135
-rw-r--r--include/qemu/seqlock.h22
-rw-r--r--include/qemu/thread-posix.h4
-rw-r--r--include/qemu/thread-win32.h5
-rw-r--r--include/qemu/thread.h66
-rw-r--r--include/qom/cpu.h11
-rw-r--r--include/standard-headers/linux/qemu_fw_cfg.h97
-rw-r--r--linux-user/main.c2
-rw-r--r--linux-user/syscall.c2
-rw-r--r--monitor.c11
-rw-r--r--pc-bios/optionrom/linuxboot_dma.c4
-rw-r--r--pc-bios/optionrom/optionrom.h15
-rw-r--r--qemu-options.hx10
-rwxr-xr-xscripts/checkpatch.pl7
-rwxr-xr-xscripts/qemu-guest-agent/fsfreeze-hook2
-rwxr-xr-xscripts/update-linux-headers.sh4
-rw-r--r--scsi/qemu-pr-helper.c4
-rw-r--r--stubs/iothread-lock.c2
-rw-r--r--target/arm/helper.c1
-rw-r--r--target/i386/cpu.c9
-rw-r--r--target/i386/cpu.h7
-rw-r--r--target/i386/kvm.c8
-rw-r--r--target/i386/seg_helper.c196
-rw-r--r--target/i386/translate.c2
-rw-r--r--target/s390x/cpu_models.c2
-rw-r--r--tests/Makefile.include15
-rw-r--r--tests/atomic_add-bench.c6
-rw-r--r--tests/boot-order-test.c2
-rw-r--r--tests/cpu-plug-test.c6
-rw-r--r--tests/device-introspect-test.c55
-rw-r--r--tests/fw_cfg-test.c2
-rw-r--r--tests/libqos/malloc-pc.c2
-rw-r--r--tests/libqtest.c52
-rw-r--r--tests/libqtest.h4
-rw-r--r--tests/migration-test.c20
-rw-r--r--tests/qom-test.c2
-rw-r--r--tests/test-char.c4
-rw-r--r--tests/test-hmp.c2
-rw-r--r--tests/test-rcu-list.c92
-rw-r--r--tests/test-rcu-simpleq.c2
-rw-r--r--tests/test-rcu-tailq.c2
-rw-r--r--tests/test-x86-cpuid-compat.c6
-rw-r--r--tests/vhost-user-test.c4
-rw-r--r--tests/virtio-ccw-test.c110
-rw-r--r--util/Makefile.objs1
-rw-r--r--util/module.c22
-rw-r--r--util/oslib-win32.c15
-rw-r--r--util/qemu-thread-win32.c4
-rw-r--r--util/qht.c47
-rw-r--r--util/qsp.c828
-rw-r--r--vl.c7
101 files changed, 2529 insertions, 736 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 6902a568f4..68bc92eef9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1696,7 +1696,6 @@ F: qom/
 X: qom/cpu.c
 F: tests/check-qom-interface.c
 F: tests/check-qom-proplist.c
-F: tests/qom-test.c
 
 QMP
 M: Markus Armbruster <armbru@redhat.com>
@@ -1708,6 +1707,16 @@ F: scripts/qmp/
 F: tests/qmp-test.c
 T: git git://repo.or.cz/qemu/armbru.git qapi-next
 
+qtest
+M: Paolo Bonzini <pbonzini@redhat.com>
+M: Thomas Huth <thuth@redhat.com>
+M: Laurent Vivier <lvivier@redhat.com>
+S: Maintained
+F: qtest.c
+F: tests/libqtest.*
+F: tests/libqos/
+F: tests/*-test.c
+
 Register API
 M: Alistair Francis <alistair@alistair23.me>
 S: Maintained
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 38f468d8e2..de12f78eb8 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1639,10 +1639,8 @@ static int kvm_init(MachineState *ms)
         s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
     }
 
-#ifdef KVM_CAP_READONLY_MEM
     kvm_readonly_mem_allowed =
         (kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0);
-#endif
 
     kvm_eventfds_allowed =
         (kvm_check_extension(s, KVM_CAP_IOEVENTFD) > 0);
diff --git a/configure b/configure
index e7bddc04b0..2ff272de79 100755
--- a/configure
+++ b/configure
@@ -3612,6 +3612,7 @@ fi
 # libmpathpersist probe
 
 if test "$mpath" != "no" ; then
+  # probe for the new API
   cat > $TMPC <<EOF
 #include <libudev.h>
 #include <mpath_persist.h>
@@ -3633,8 +3634,26 @@ int main(void) {
 EOF
   if compile_prog "" "-ludev -lmultipath -lmpathpersist" ; then
     mpathpersist=yes
+    mpathpersist_new_api=yes
   else
-    mpathpersist=no
+    # probe for the old API
+    cat > $TMPC <<EOF
+#include <libudev.h>
+#include <mpath_persist.h>
+unsigned mpath_mx_alloc_len = 1024;
+int logsink;
+int main(void) {
+    struct udev *udev = udev_new();
+    mpath_lib_init(udev);
+    return 0;
+}
+EOF
+    if compile_prog "" "-ludev -lmultipath -lmpathpersist" ; then
+      mpathpersist=yes
+      mpathpersist_new_api=no
+    else
+      mpathpersist=no
+    fi
   fi
 else
   mpathpersist=no
@@ -6409,9 +6428,6 @@ if test "$bluez" = "yes" ; then
   echo "CONFIG_BLUEZ=y" >> $config_host_mak
   echo "BLUEZ_CFLAGS=$bluez_cflags" >> $config_host_mak
 fi
-if test "$glib_subprocess" = "yes" ; then
-  echo "CONFIG_HAS_GLIB_SUBPROCESS_TESTS=y" >> $config_host_mak
-fi
 if test "$gtk" = "yes" ; then
   echo "CONFIG_GTK=m" >> $config_host_mak
   echo "CONFIG_GTKABI=$gtkabi" >> $config_host_mak
@@ -6495,6 +6511,9 @@ if test "$virtfs" = "yes" ; then
 fi
 if test "$mpath" = "yes" ; then
   echo "CONFIG_MPATH=y" >> $config_host_mak
+  if test "$mpathpersist_new_api" = "yes"; then
+    echo "CONFIG_MPATH_NEW_API=y" >> $config_host_mak
+  fi
 fi
 if test "$vhost_scsi" = "yes" ; then
   echo "CONFIG_VHOST_SCSI=y" >> $config_host_mak
diff --git a/cpus-common.c b/cpus-common.c
index 59f751ecf9..98dd8c6ff1 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -84,7 +84,7 @@ void cpu_list_add(CPUState *cpu)
     } else {
         assert(!cpu_index_auto_assigned);
     }
-    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
+    QTAILQ_INSERT_TAIL_RCU(&cpus, cpu, node);
     qemu_mutex_unlock(&qemu_cpu_list_lock);
 
     finish_safe_work(cpu);
@@ -101,7 +101,7 @@ void cpu_list_remove(CPUState *cpu)
 
     assert(!(cpu_index_auto_assigned && cpu != QTAILQ_LAST(&cpus, CPUTailQ)));
 
-    QTAILQ_REMOVE(&cpus, cpu, node);
+    QTAILQ_REMOVE_RCU(&cpus, cpu, node);
     cpu->cpu_index = UNASSIGNED_CPU_INDEX;
     qemu_mutex_unlock(&qemu_cpu_list_lock);
 }
diff --git a/cpus.c b/cpus.c
index b5844b7103..8ee6e5db93 100644
--- a/cpus.c
+++ b/cpus.c
@@ -121,8 +121,6 @@ static bool all_cpu_threads_idle(void)
 /* Protected by TimersState seqlock */
 
 static bool icount_sleep = true;
-/* Conversion factor from emulated instructions to virtual clock ticks.  */
-static int icount_time_shift;
 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 #define MAX_ICOUNT_SHIFT 10
 
@@ -131,20 +129,27 @@ typedef struct TimersState {
     int64_t cpu_ticks_prev;
     int64_t cpu_ticks_offset;
 
-    /* cpu_clock_offset can be read out of BQL, so protect it with
-     * this lock.
+    /* Protect fields that can be respectively read outside the
+     * BQL, and written from multiple threads.
      */
     QemuSeqLock vm_clock_seqlock;
-    int64_t cpu_clock_offset;
-    int32_t cpu_ticks_enabled;
-    int64_t dummy;
+    QemuSpin vm_clock_lock;
+
+    int16_t cpu_ticks_enabled;
+
+    /* Conversion factor from emulated instructions to virtual clock ticks.  */
+    int16_t icount_time_shift;
 
     /* Compensate for varying guest execution speed.  */
     int64_t qemu_icount_bias;
+
+    int64_t vm_clock_warp_start;
+    int64_t cpu_clock_offset;
+
     /* Only written by TCG thread */
     int64_t qemu_icount;
+
     /* for adjusting icount */
-    int64_t vm_clock_warp_start;
     QEMUTimer *icount_rt_timer;
     QEMUTimer *icount_vm_timer;
     QEMUTimer *icount_warp_timer;
@@ -245,16 +250,19 @@ void cpu_update_icount(CPUState *cpu)
     int64_t executed = cpu_get_icount_executed(cpu);
     cpu->icount_budget -= executed;
 
-#ifdef CONFIG_ATOMIC64
+#ifndef CONFIG_ATOMIC64
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
+#endif
     atomic_set__nocheck(&timers_state.qemu_icount,
-                        atomic_read__nocheck(&timers_state.qemu_icount) +
-                        executed);
-#else /* FIXME: we need 64bit atomics to do this safely */
-    timers_state.qemu_icount += executed;
+                        timers_state.qemu_icount + executed);
+#ifndef CONFIG_ATOMIC64
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
 #endif
 }
 
-int64_t cpu_get_icount_raw(void)
+static int64_t cpu_get_icount_raw_locked(void)
 {
     CPUState *cpu = current_cpu;
 
@@ -266,20 +274,30 @@ int64_t cpu_get_icount_raw(void)
         /* Take into account what has run */
         cpu_update_icount(cpu);
     }
-#ifdef CONFIG_ATOMIC64
+    /* The read is protected by the seqlock, so __nocheck is okay.  */
     return atomic_read__nocheck(&timers_state.qemu_icount);
-#else /* FIXME: we need 64bit atomics to do this safely */
-    return timers_state.qemu_icount;
-#endif
 }
 
-/* Return the virtual CPU time, based on the instruction counter.  */
 static int64_t cpu_get_icount_locked(void)
 {
-    int64_t icount = cpu_get_icount_raw();
-    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
+    int64_t icount = cpu_get_icount_raw_locked();
+    return atomic_read__nocheck(&timers_state.qemu_icount_bias) + cpu_icount_to_ns(icount);
 }
 
+int64_t cpu_get_icount_raw(void)
+{
+    int64_t icount;
+    unsigned start;
+
+    do {
+        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+        icount = cpu_get_icount_raw_locked();
+    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+    return icount;
+}
+
+/* Return the virtual CPU time, based on the instruction counter.  */
 int64_t cpu_get_icount(void)
 {
     int64_t icount;
@@ -295,14 +313,29 @@ int64_t cpu_get_icount(void)
 
 int64_t cpu_icount_to_ns(int64_t icount)
 {
-    return icount << icount_time_shift;
+    return icount << atomic_read(&timers_state.icount_time_shift);
+}
+
+static int64_t cpu_get_ticks_locked(void)
+{
+    int64_t ticks = timers_state.cpu_ticks_offset;
+    if (timers_state.cpu_ticks_enabled) {
+        ticks += cpu_get_host_ticks();
+    }
+
+    if (timers_state.cpu_ticks_prev > ticks) {
+        /* Non increasing ticks may happen if the host uses software suspend.  */
+        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
+        ticks = timers_state.cpu_ticks_prev;
+    }
+
+    timers_state.cpu_ticks_prev = ticks;
+    return ticks;
 }
 
 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
  * counter.
- *
- * Caller must hold the BQL
  */
 int64_t cpu_get_ticks(void)
 {
@@ -312,19 +345,9 @@ int64_t cpu_get_ticks(void)
         return cpu_get_icount();
     }
 
-    ticks = timers_state.cpu_ticks_offset;
-    if (timers_state.cpu_ticks_enabled) {
-        ticks += cpu_get_host_ticks();
-    }
-
-    if (timers_state.cpu_ticks_prev > ticks) {
-        /* Note: non increasing ticks may happen if the host uses
-           software suspend */
-        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
-        ticks = timers_state.cpu_ticks_prev;
-    }
-
-    timers_state.cpu_ticks_prev = ticks;
+    qemu_spin_lock(&timers_state.vm_clock_lock);
+    ticks = cpu_get_ticks_locked();
+    qemu_spin_unlock(&timers_state.vm_clock_lock);
     return ticks;
 }
 
@@ -361,14 +384,15 @@ int64_t cpu_get_clock(void)
  */
 void cpu_enable_ticks(void)
 {
-    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     if (!timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
         timers_state.cpu_clock_offset -= get_clock();
         timers_state.cpu_ticks_enabled = 1;
     }
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
 }
 
 /* disable cpu_get_ticks() : the clock is stopped. You must not call
@@ -377,14 +401,15 @@ void cpu_enable_ticks(void)
  */
 void cpu_disable_ticks(void)
 {
-    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     if (timers_state.cpu_ticks_enabled) {
         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
         timers_state.cpu_clock_offset = cpu_get_clock_locked();
         timers_state.cpu_ticks_enabled = 0;
     }
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
 }
 
 /* Correlation between real and virtual time is always going to be
@@ -407,7 +432,8 @@ static void icount_adjust(void)
         return;
     }
 
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     cur_time = cpu_get_clock_locked();
     cur_icount = cpu_get_icount_locked();
 
@@ -415,20 +441,24 @@ static void icount_adjust(void)
     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
     if (delta > 0
         && last_delta + ICOUNT_WOBBLE < delta * 2
-        && icount_time_shift > 0) {
+        && timers_state.icount_time_shift > 0) {
         /* The guest is getting too far ahead.  Slow time down.  */
-        icount_time_shift--;
+        atomic_set(&timers_state.icount_time_shift,
+                   timers_state.icount_time_shift - 1);
     }
     if (delta < 0
         && last_delta - ICOUNT_WOBBLE > delta * 2
-        && icount_time_shift < MAX_ICOUNT_SHIFT) {
+        && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
         /* The guest is getting too far behind.  Speed time up.  */
-        icount_time_shift++;
+        atomic_set(&timers_state.icount_time_shift,
+                   timers_state.icount_time_shift + 1);
     }
     last_delta = delta;
-    timers_state.qemu_icount_bias = cur_icount
-                              - (timers_state.qemu_icount << icount_time_shift);
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    atomic_set__nocheck(&timers_state.qemu_icount_bias,
+                        cur_icount - (timers_state.qemu_icount
+                                      << timers_state.icount_time_shift));
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                         &timers_state.vm_clock_lock);
 }
 
 static void icount_adjust_rt(void *opaque)
@@ -448,7 +478,8 @@ static void icount_adjust_vm(void *opaque)
 
 static int64_t qemu_icount_round(int64_t count)
 {
-    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
+    int shift = atomic_read(&timers_state.icount_time_shift);
+    return (count + (1 << shift) - 1) >> shift;
 }
 
 static void icount_warp_rt(void)
@@ -468,7 +499,8 @@ static void icount_warp_rt(void)
         return;
     }
 
-    seqlock_write_begin(&timers_state.vm_clock_seqlock);
+    seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
     if (runstate_is_running()) {
         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
                                      cpu_get_clock_locked());
@@ -484,10 +516,12 @@ static void icount_warp_rt(void)
             int64_t delta = clock - cur_icount;
             warp_delta = MIN(warp_delta, delta);
         }
-        timers_state.qemu_icount_bias += warp_delta;
+        atomic_set__nocheck(&timers_state.qemu_icount_bias,
+                            timers_state.qemu_icount_bias + warp_delta);
     }
     timers_state.vm_clock_warp_start = -1;
-    seqlock_write_end(&timers_state.vm_clock_seqlock);
+    seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                       &timers_state.vm_clock_lock);
 
     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -512,9 +546,12 @@ void qtest_clock_warp(int64_t dest)
         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 
-        seqlock_write_begin(&timers_state.vm_clock_seqlock);
-        timers_state.qemu_icount_bias += warp;
-        seqlock_write_end(&timers_state.vm_clock_seqlock);
+        seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                           &timers_state.vm_clock_lock);
+        atomic_set__nocheck(&timers_state.qemu_icount_bias,
+                            timers_state.qemu_icount_bias + warp);
+        seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                             &timers_state.vm_clock_lock);
 
         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
@@ -581,9 +618,12 @@ void qemu_start_warp_timer(void)
              * It is useful when we want a deterministic execution time,
              * isolated from host latencies.
              */
-            seqlock_write_begin(&timers_state.vm_clock_seqlock);
-            timers_state.qemu_icount_bias += deadline;
-            seqlock_write_end(&timers_state.vm_clock_seqlock);
+            seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                               &timers_state.vm_clock_lock);
+            atomic_set__nocheck(&timers_state.qemu_icount_bias,
+                                timers_state.qemu_icount_bias + deadline);
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                                 &timers_state.vm_clock_lock);
             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
         } else {
             /*
@@ -594,12 +634,14 @@ void qemu_start_warp_timer(void)
              * you will not be sending network packets continuously instead of
              * every 100ms.
              */
-            seqlock_write_begin(&timers_state.vm_clock_seqlock);
+            seqlock_write_lock(&timers_state.vm_clock_seqlock,
+                               &timers_state.vm_clock_lock);
             if (timers_state.vm_clock_warp_start == -1
                 || timers_state.vm_clock_warp_start > clock) {
                 timers_state.vm_clock_warp_start = clock;
             }
-            seqlock_write_end(&timers_state.vm_clock_seqlock);
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+                                 &timers_state.vm_clock_lock);
             timer_mod_anticipate(timers_state.icount_warp_timer,
                                  clock + deadline);
         }
@@ -700,7 +742,7 @@ static const VMStateDescription vmstate_timers = {
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
         VMSTATE_INT64(cpu_ticks_offset, TimersState),
-        VMSTATE_INT64(dummy, TimersState),
+        VMSTATE_UNUSED(8),
         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
         VMSTATE_END_OF_LIST()
     },
@@ -812,7 +854,7 @@ void configure_icount(QemuOpts *opts, Error **errp)
     }
     if (strcmp(option, "auto") != 0) {
         errno = 0;
-        icount_time_shift = strtol(option, &rem_str, 0);
+        timers_state.icount_time_shift = strtol(option, &rem_str, 0);
         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
             error_setg(errp, "icount: Invalid shift value");
         }
@@ -828,7 +870,7 @@ void configure_icount(QemuOpts *opts, Error **errp)
 
     /* 125MIPS seems a reasonable initial guess at the guest speed.
        It will be corrected fairly quickly anyway.  */
-    icount_time_shift = 3;
+    timers_state.icount_time_shift = 3;
 
     /* Have both realtime and virtual time triggers for speed adjustment.
        The realtime trigger catches emulated time passing too slowly,
@@ -1491,7 +1533,7 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
             atomic_mb_set(&cpu->exit_request, 0);
         }
 
-        qemu_tcg_rr_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
+        qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
         deal_with_unplugged_cpus();
     }
 
@@ -1762,10 +1804,16 @@ bool qemu_mutex_iothread_locked(void)
     return iothread_locked;
 }
 
-void qemu_mutex_lock_iothread(void)
+/*
+ * The BQL is taken from so many places that it is worth profiling the
+ * callers directly, instead of funneling them all through a single function.
+ */
+void qemu_mutex_lock_iothread_impl(const char *file, int line)
 {
+    QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
+
     g_assert(!qemu_mutex_iothread_locked());
-    qemu_mutex_lock(&qemu_global_mutex);
+    bql_lock(&qemu_global_mutex, file, line);
     iothread_locked = true;
 }
 
diff --git a/disas/m68k.c b/disas/m68k.c
index a687df437c..0dc8aa1a3c 100644
--- a/disas/m68k.c
+++ b/disas/m68k.c
@@ -1623,6 +1623,7 @@ print_insn_arg (const char *d,
 
     case 'X':
       place = '8';
+      /* fall through */
     case 'Y':
     case 'Z':
     case 'W':
diff --git a/dump.c b/dump.c
index 04467b353e..500b554523 100644
--- a/dump.c
+++ b/dump.c
@@ -1742,7 +1742,7 @@ static void dump_init(DumpState *s, int fd, bool has_format,
             warn_report("guest note is not present");
         } else if (size < note_head_size || size > MAX_GUEST_NOTE_SIZE) {
             warn_report("guest note size is invalid: %" PRIu32, size);
-        } else if (format != VMCOREINFO_FORMAT_ELF) {
+        } else if (format != FW_CFG_VMCOREINFO_FORMAT_ELF) {
             warn_report("guest note format is unsupported: %" PRIu16, format);
         } else {
             s->guest_note = g_malloc(size + 1); /* +1 for adding \0 */
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 70639f656a..cbee8b944d 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -300,6 +300,28 @@ Show dynamic compiler opcode counters
 ETEXI
 
     {
+        .name       = "sync-profile",
+        .args_type  = "mean:-m,no_coalesce:-n,max:i?",
+        .params     = "[-m] [-n] [max]",
+        .help       = "show synchronization profiling info, up to max entries "
+                      "(default: 10), sorted by total wait time. (-m: sort by "
+                      "mean wait time; -n: do not coalesce objects with the "
+                      "same call site)",
+        .cmd        = hmp_info_sync_profile,
+    },
+
+STEXI
+@item info sync-profile [-m|-n] [@var{max}]
+@findex info sync-profile
+Show synchronization profiling info, up to @var{max} entries (default: 10),
+sorted by total wait time.
+        -m: sort by mean wait time
+        -n: do not coalesce objects with the same call site
+When different objects that share the same call site are coalesced, the "Object"
+field shows---enclosed in brackets---the number of objects being coalesced.
+ETEXI
+
+    {
         .name       = "kvm",
         .args_type  = "",
         .params     = "",
diff --git a/hmp-commands.hx b/hmp-commands.hx
index c1fc747403..db0c681f74 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -644,6 +644,21 @@ sendkey ctrl-alt-f1
 This command is useful to send keys that your graphical user interface
 intercepts at low level, such as @code{ctrl-alt-f1} in X Window.
 ETEXI
+    {
+        .name       = "sync-profile",
+        .args_type  = "op:s?",
+        .params     = "[on|off|reset]",
+        .help       = "enable, disable or reset synchronization profiling. "
+                      "With no arguments, prints whether profiling is on or off.",
+        .cmd        = hmp_sync_profile,
+    },
+
+STEXI
+@item sync-profile [on|off|reset]
+@findex sync-profile
+Enable, disable or reset synchronization profiling. With no arguments, prints
+whether profiling is on or off.
+ETEXI
 
     {
         .name       = "system_reset",
diff --git a/hmp.c b/hmp.c
index 2aafb50e8e..d94a47f7c7 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1062,6 +1062,30 @@ void hmp_stop(Monitor *mon, const QDict *qdict)
     qmp_stop(NULL);
 }
 
+void hmp_sync_profile(Monitor *mon, const QDict *qdict)
+{
+    const char *op = qdict_get_try_str(qdict, "op");
+
+    if (op == NULL) {
+        bool on = qsp_is_enabled();
+
+        monitor_printf(mon, "sync-profile is %s\n", on ? "on" : "off");
+        return;
+    }
+    if (!strcmp(op, "on")) {
+        qsp_enable();
+    } else if (!strcmp(op, "off")) {
+        qsp_disable();
+    } else if (!strcmp(op, "reset")) {
+        qsp_reset();
+    } else {
+        Error *err = NULL;
+
+        error_setg(&err, QERR_INVALID_PARAMETER, op);
+        hmp_handle_error(mon, &err);
+    }
+}
+
 void hmp_system_reset(Monitor *mon, const QDict *qdict)
 {
     qmp_system_reset(NULL);
diff --git a/hmp.h b/hmp.h
index 33354f1bdd..5f1addcca2 100644
--- a/hmp.h
+++ b/hmp.h
@@ -42,6 +42,7 @@ void hmp_info_tpm(Monitor *mon, const QDict *qdict);
 void hmp_info_iothreads(Monitor *mon, const QDict *qdict);
 void hmp_quit(Monitor *mon, const QDict *qdict);
 void hmp_stop(Monitor *mon, const QDict *qdict);
+void hmp_sync_profile(Monitor *mon, const QDict *qdict);
 void hmp_system_reset(Monitor *mon, const QDict *qdict);
 void hmp_system_powerdown(Monitor *mon, const QDict *qdict);
 void hmp_exit_preconfig(Monitor *mon, const QDict *qdict);
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index 6404af5f33..e330f24c71 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -512,7 +512,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)
     pci_conf[0x90] = s->smb_io_base | 1;
     pci_conf[0x91] = s->smb_io_base >> 8;
     pci_conf[0xd2] = 0x09;
-    pm_smbus_init(DEVICE(dev), &s->smb);
+    pm_smbus_init(DEVICE(dev), &s->smb, true);
     memory_region_set_enabled(&s->smb.io, pci_conf[0xd2] & 1);
     memory_region_add_subregion(pci_address_space_io(dev),
                                 s->smb_io_base, &s->smb.io);
diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index b67b0cefb6..f598a1c053 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -409,7 +409,7 @@ static uint64_t pxa2xx_mm_read(void *opaque, hwaddr addr,
     case MDCNFG ... SA1110:
         if ((addr & 3) == 0)
             return s->mm_regs[addr >> 2];
-
+        /* fall through */
     default:
         printf("%s: Bad register " REG_FMT "\n", __func__, addr);
         break;
diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c
index aaebec1839..9089dcb47e 100644
--- a/hw/audio/cs4231a.c
+++ b/hw/audio/cs4231a.c
@@ -305,6 +305,7 @@ static void cs_reset_voices (CSState *s, uint32_t val)
 
     case 6:
         as.endianness = 1;
+        /* fall through */
     case 2:
         as.fmt = AUD_FMT_S16;
         s->shift = as.nchannels;
diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c
index 59cf252754..dd75c9e8f5 100644
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -474,82 +474,7 @@ static inline uint32_t es1370_fixup (ES1370State *s, uint32_t addr)
     return addr;
 }
 
-static void es1370_writeb(void *opaque, uint32_t addr, uint32_t val)
-{
-    ES1370State *s = opaque;
-    uint32_t shift, mask;
-
-    addr = es1370_fixup (s, addr);
-
-    switch (addr) {
-    case ES1370_REG_CONTROL:
-    case ES1370_REG_CONTROL + 1:
-    case ES1370_REG_CONTROL + 2:
-    case ES1370_REG_CONTROL + 3:
-        shift = (addr - ES1370_REG_CONTROL) << 3;
-        mask = 0xff << shift;
-        val = (s->ctl & ~mask) | ((val & 0xff) << shift);
-        es1370_update_voices (s, val, s->sctl);
-        print_ctl (val);
-        break;
-    case ES1370_REG_MEMPAGE:
-        s->mempage = val;
-        break;
-    case ES1370_REG_SERIAL_CONTROL:
-    case ES1370_REG_SERIAL_CONTROL + 1:
-    case ES1370_REG_SERIAL_CONTROL + 2:
-    case ES1370_REG_SERIAL_CONTROL + 3:
-        shift = (addr - ES1370_REG_SERIAL_CONTROL) << 3;
-        mask = 0xff << shift;
-        val = (s->sctl & ~mask) | ((val & 0xff) << shift);
-        es1370_maybe_lower_irq (s, val);
-        es1370_update_voices (s, s->ctl, val);
-        print_sctl (val);
-        break;
-    default:
-        lwarn ("writeb %#x <- %#x\n", addr, val);
-        break;
-    }
-}
-
-static void es1370_writew(void *opaque, uint32_t addr, uint32_t val)
-{
-    ES1370State *s = opaque;
-    addr = es1370_fixup (s, addr);
-    uint32_t shift, mask;
-    struct chan *d = &s->chan[0];
-
-    switch (addr) {
-    case ES1370_REG_CODEC:
-        dolog ("ignored codec write address %#x, data %#x\n",
-               (val >> 8) & 0xff, val & 0xff);
-        s->codec = val;
-        break;
-
-    case ES1370_REG_CONTROL:
-    case ES1370_REG_CONTROL + 2:
-        shift = (addr != ES1370_REG_CONTROL) << 4;
-        mask = 0xffff << shift;
-        val = (s->ctl & ~mask) | ((val & 0xffff) << shift);
-        es1370_update_voices (s, val, s->sctl);
-        print_ctl (val);
-        break;
-
-    case ES1370_REG_ADC_SCOUNT:
-        d++;
-    case ES1370_REG_DAC2_SCOUNT:
-        d++;
-    case ES1370_REG_DAC1_SCOUNT:
-        d->scount = (d->scount & ~0xffff) | (val & 0xffff);
-        break;
-
-    default:
-        lwarn ("writew %#x <- %#x\n", addr, val);
-        break;
-    }
-}
-
-static void es1370_writel(void *opaque, uint32_t addr, uint32_t val)
+static void es1370_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
 {
     ES1370State *s = opaque;
     struct chan *d = &s->chan[0];
@@ -572,21 +497,19 @@ static void es1370_writel(void *opaque, uint32_t addr, uint32_t val)
         print_sctl (val);
         break;
 
-    case ES1370_REG_ADC_SCOUNT:
-        d++;
-    case ES1370_REG_DAC2_SCOUNT:
-        d++;
     case ES1370_REG_DAC1_SCOUNT:
+    case ES1370_REG_DAC2_SCOUNT:
+    case ES1370_REG_ADC_SCOUNT:
+        d += (addr - ES1370_REG_DAC1_SCOUNT) >> 2;
         d->scount = (val & 0xffff) | (d->scount & ~0xffff);
         ldebug ("chan %td CURR_SAMP_CT %d, SAMP_CT %d\n",
                 d - &s->chan[0], val >> 16, (val & 0xffff));
         break;
 
-    case ES1370_REG_ADC_FRAMEADR:
-        d++;
-    case ES1370_REG_DAC2_FRAMEADR:
-        d++;
     case ES1370_REG_DAC1_FRAMEADR:
+    case ES1370_REG_DAC2_FRAMEADR:
+    case ES1370_REG_ADC_FRAMEADR:
+        d += (addr - ES1370_REG_DAC1_FRAMEADR) >> 3;
         d->frame_addr = val;
         ldebug ("chan %td frame address %#x\n", d - &s->chan[0], val);
         break;
@@ -598,11 +521,10 @@ static void es1370_writel(void *opaque, uint32_t addr, uint32_t val)
         lwarn ("writing to phantom frame address %#x\n", val);
         break;
 
-    case ES1370_REG_ADC_FRAMECNT:
-        d++;
-    case ES1370_REG_DAC2_FRAMECNT:
-        d++;
     case ES1370_REG_DAC1_FRAMECNT:
+    case ES1370_REG_DAC2_FRAMECNT:
+    case ES1370_REG_ADC_FRAMECNT:
+        d += (addr - ES1370_REG_DAC1_FRAMECNT) >> 3;
         d->frame_cnt = val;
         d->leftover = 0;
         ldebug ("chan %td frame count %d, buffer size %d\n",
@@ -615,84 +537,7 @@ static void es1370_writel(void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t es1370_readb(void *opaque, uint32_t addr)
-{
-    ES1370State *s = opaque;
-    uint32_t val;
-
-    addr = es1370_fixup (s, addr);
-
-    switch (addr) {
-    case 0x1b:                  /* Legacy */
-        lwarn ("Attempt to read from legacy register\n");
-        val = 5;
-        break;
-    case ES1370_REG_MEMPAGE:
-        val = s->mempage;
-        break;
-    case ES1370_REG_CONTROL + 0:
-    case ES1370_REG_CONTROL + 1:
-    case ES1370_REG_CONTROL + 2:
-    case ES1370_REG_CONTROL + 3:
-        val = s->ctl >> ((addr - ES1370_REG_CONTROL) << 3);
-        break;
-    case ES1370_REG_STATUS + 0:
-    case ES1370_REG_STATUS + 1:
-    case ES1370_REG_STATUS + 2:
-    case ES1370_REG_STATUS + 3:
-        val = s->status >> ((addr - ES1370_REG_STATUS) << 3);
-        break;
-    default:
-        val = ~0;
-        lwarn ("readb %#x -> %#x\n", addr, val);
-        break;
-    }
-    return val;
-}
-
-static uint32_t es1370_readw(void *opaque, uint32_t addr)
-{
-    ES1370State *s = opaque;
-    struct chan *d = &s->chan[0];
-    uint32_t val;
-
-    addr = es1370_fixup (s, addr);
-
-    switch (addr) {
-    case ES1370_REG_ADC_SCOUNT + 2:
-        d++;
-    case ES1370_REG_DAC2_SCOUNT + 2:
-        d++;
-    case ES1370_REG_DAC1_SCOUNT + 2:
-        val = d->scount >> 16;
-        break;
-
-    case ES1370_REG_ADC_FRAMECNT:
-        d++;
-    case ES1370_REG_DAC2_FRAMECNT:
-        d++;
-    case ES1370_REG_DAC1_FRAMECNT:
-        val = d->frame_cnt & 0xffff;
-        break;
-
-    case ES1370_REG_ADC_FRAMECNT + 2:
-        d++;
-    case ES1370_REG_DAC2_FRAMECNT + 2:
-        d++;
-    case ES1370_REG_DAC1_FRAMECNT + 2:
-        val = d->frame_cnt >> 16;
-        break;
-
-    default:
-        val = ~0;
-        lwarn ("readw %#x -> %#x\n", addr, val);
-        break;
-    }
-
-    return val;
-}
-
-static uint32_t es1370_readl(void *opaque, uint32_t addr)
+static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size)
 {
     ES1370State *s = opaque;
     uint32_t val;
@@ -717,11 +562,10 @@ static uint32_t es1370_readl(void *opaque, uint32_t addr)
         val = s->sctl;
         break;
 
-    case ES1370_REG_ADC_SCOUNT:
-        d++;
-    case ES1370_REG_DAC2_SCOUNT:
-        d++;
     case ES1370_REG_DAC1_SCOUNT:
+    case ES1370_REG_DAC2_SCOUNT:
+    case ES1370_REG_ADC_SCOUNT:
+        d += (addr - ES1370_REG_DAC1_SCOUNT) >> 2;
         val = d->scount;
 #ifdef DEBUG_ES1370
         {
@@ -735,11 +579,10 @@ static uint32_t es1370_readl(void *opaque, uint32_t addr)
 #endif
         break;
 
-    case ES1370_REG_ADC_FRAMECNT:
-        d++;
-    case ES1370_REG_DAC2_FRAMECNT:
-        d++;
     case ES1370_REG_DAC1_FRAMECNT:
+    case ES1370_REG_DAC2_FRAMECNT:
+    case ES1370_REG_ADC_FRAMECNT:
+        d += (addr - ES1370_REG_DAC1_FRAMECNT) >> 3;
         val = d->frame_cnt;
 #ifdef DEBUG_ES1370
         {
@@ -753,11 +596,10 @@ static uint32_t es1370_readl(void *opaque, uint32_t addr)
 #endif
         break;
 
-    case ES1370_REG_ADC_FRAMEADR:
-        d++;
-    case ES1370_REG_DAC2_FRAMEADR:
-        d++;
     case ES1370_REG_DAC1_FRAMEADR:
+    case ES1370_REG_DAC2_FRAMEADR:
+    case ES1370_REG_ADC_FRAMEADR:
+        d += (addr - ES1370_REG_DAC1_FRAMEADR) >> 3;
         val = d->frame_addr;
         break;
 
@@ -908,44 +750,17 @@ static void es1370_adc_callback (void *opaque, int avail)
     es1370_run_channel (s, ADC_CHANNEL, avail);
 }
 
-static uint64_t es1370_read(void *opaque, hwaddr addr,
-                            unsigned size)
-{
-    switch (size) {
-    case 1:
-        return es1370_readb(opaque, addr);
-    case 2:
-        return es1370_readw(opaque, addr);
-    case 4:
-        return es1370_readl(opaque, addr);
-    default:
-        return -1;
-    }
-}
-
-static void es1370_write(void *opaque, hwaddr addr, uint64_t val,
-                      unsigned size)
-{
-    switch (size) {
-    case 1:
-        es1370_writeb(opaque, addr, val);
-        break;
-    case 2:
-        es1370_writew(opaque, addr, val);
-        break;
-    case 4:
-        es1370_writel(opaque, addr, val);
-        break;
-    }
-}
-
 static const MemoryRegionOps es1370_io_ops = {
     .read = es1370_read,
     .write = es1370_write,
-    .impl = {
+    .valid = {
         .min_access_size = 1,
         .max_access_size = 4,
     },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
diff --git a/hw/audio/gusemu_hal.c b/hw/audio/gusemu_hal.c
index 1150fc4426..ae40ca341c 100644
--- a/hw/audio/gusemu_hal.c
+++ b/hw/audio/gusemu_hal.c
@@ -261,6 +261,7 @@ void gus_write(GUSEmuState * state, int port, int size, unsigned int data)
             GUSregb(IRQStatReg2x6) = 0x10;
             GUS_irqrequest(state, state->gusirq, 1);
         }
+        /* fall through */
     case 0x20D:                /* SB2xCd no IRQ */
         GUSregb(SB2xCd) = (uint8_t) data;
         break;
diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c
index 5a4d32364e..c5b9bf79e8 100644
--- a/hw/audio/sb16.c
+++ b/hw/audio/sb16.c
@@ -741,10 +741,15 @@ static void complete (SB16State *s)
             ldebug ("set time const %d\n", s->time_const);
             break;
 
-        case 0x42:              /* FT2 sets output freq with this, go figure */
-            qemu_log_mask(LOG_UNIMP, "cmd 0x42 might not do what it think it"
-                          " should\n");
         case 0x41:
+        case 0x42:
+            /*
+             * 0x41 is documented as setting the output sample rate,
+             * and 0x42 the input sample rate, but in fact SB16 hardware
+             * seems to have only a single sample rate under the hood,
+             * and FT2 sets output freq with this (go figure).  Compare:
+             * http://homepages.cae.wisc.edu/~brodskye/sb16doc/sb16doc.html#SamplingRate
+             */
             s->freq = dsp_get_hilo (s);
             ldebug ("set freq %d\n", s->freq);
             break;
diff --git a/hw/display/cg3.c b/hw/display/cg3.c
index 6fff4852c5..1c199ab369 100644
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -232,6 +232,7 @@ static void cg3_reg_write(void *opaque, hwaddr addr, uint64_t val,
                 s->b[s->dac_index] = regval;
                 /* Index autoincrement */
                 s->dac_index = (s->dac_index + 1) & 0xff;
+                /* fall through */
             default:
                 s->dac_state = 0;
                 break;
diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c
index 7583b18c29..04c87c8e8d 100644
--- a/hw/display/cirrus_vga.c
+++ b/hw/display/cirrus_vga.c
@@ -1426,7 +1426,8 @@ static void cirrus_vga_write_sr(CirrusVGAState * s, uint32_t val)
         s->vga.hw_cursor_y = (val << 3) | (s->vga.sr_index >> 5);
 	break;
     case 0x07:			// Extended Sequencer Mode
-    cirrus_update_memory_access(s);
+        cirrus_update_memory_access(s);
+        /* fall through */
     case 0x08:			// EEPROM Control
     case 0x09:			// Scratch Register 0
     case 0x0a:			// Scratch Register 1
diff --git a/hw/i2c/pm_smbus.c b/hw/i2c/pm_smbus.c
index 0d26e0f6b5..685a2378ed 100644
--- a/hw/i2c/pm_smbus.c
+++ b/hw/i2c/pm_smbus.c
@@ -22,8 +22,6 @@
 #include "hw/i2c/pm_smbus.h"
 #include "hw/i2c/smbus.h"
 
-/* no save/load? */
-
 #define SMBHSTSTS       0x00
 #define SMBHSTCNT       0x02
 #define SMBHSTCMD       0x03
@@ -31,20 +29,40 @@
 #define SMBHSTDAT0      0x05
 #define SMBHSTDAT1      0x06
 #define SMBBLKDAT       0x07
+#define SMBAUXCTL       0x0d
 
-#define STS_HOST_BUSY   (1)
-#define STS_INTR        (1<<1)
-#define STS_DEV_ERR     (1<<2)
-#define STS_BUS_ERR     (1<<3)
-#define STS_FAILED      (1<<4)
-#define STS_SMBALERT    (1<<5)
-#define STS_INUSE_STS   (1<<6)
-#define STS_BYTE_DONE   (1<<7)
+#define STS_HOST_BUSY   (1 << 0)
+#define STS_INTR        (1 << 1)
+#define STS_DEV_ERR     (1 << 2)
+#define STS_BUS_ERR     (1 << 3)
+#define STS_FAILED      (1 << 4)
+#define STS_SMBALERT    (1 << 5)
+#define STS_INUSE_STS   (1 << 6)
+#define STS_BYTE_DONE   (1 << 7)
 /* Signs of successfully transaction end :
 *  ByteDoneStatus = 1 (STS_BYTE_DONE) and INTR = 1 (STS_INTR )
 */
 
-//#define DEBUG
+#define CTL_INTREN      (1 << 0)
+#define CTL_KILL        (1 << 1)
+#define CTL_LAST_BYTE   (1 << 5)
+#define CTL_START       (1 << 6)
+#define CTL_PEC_EN      (1 << 7)
+#define CTL_RETURN_MASK 0x1f
+
+#define PROT_QUICK          0
+#define PROT_BYTE           1
+#define PROT_BYTE_DATA      2
+#define PROT_WORD_DATA      3
+#define PROT_PROC_CALL      4
+#define PROT_BLOCK_DATA     5
+#define PROT_I2C_BLOCK_READ 6
+
+#define AUX_PEC       (1 << 0)
+#define AUX_BLK       (1 << 1)
+#define AUX_MASK      0x3
+
+/*#define DEBUG*/
 
 #ifdef DEBUG
 # define SMBUS_DPRINTF(format, ...)     printf(format, ## __VA_ARGS__)
@@ -62,19 +80,17 @@ static void smb_transaction(PMSMBus *s)
     I2CBus *bus = s->smbus;
     int ret;
 
-    assert(s->smb_stat & STS_HOST_BUSY);
-    s->smb_stat &= ~STS_HOST_BUSY;
-
     SMBUS_DPRINTF("SMBus trans addr=0x%02x prot=0x%02x\n", addr, prot);
     /* Transaction isn't exec if STS_DEV_ERR bit set */
     if ((s->smb_stat & STS_DEV_ERR) != 0)  {
         goto error;
     }
+
     switch(prot) {
-    case 0x0:
+    case PROT_QUICK:
         ret = smbus_quick_command(bus, addr, read);
         goto done;
-    case 0x1:
+    case PROT_BYTE:
         if (read) {
             ret = smbus_receive_byte(bus, addr);
             goto data8;
@@ -82,7 +98,7 @@ static void smb_transaction(PMSMBus *s)
             ret = smbus_send_byte(bus, addr, cmd);
             goto done;
         }
-    case 0x2:
+    case PROT_BYTE_DATA:
         if (read) {
             ret = smbus_read_byte(bus, addr, cmd);
             goto data8;
@@ -91,22 +107,73 @@ static void smb_transaction(PMSMBus *s)
             goto done;
         }
         break;
-    case 0x3:
+    case PROT_WORD_DATA:
         if (read) {
             ret = smbus_read_word(bus, addr, cmd);
             goto data16;
         } else {
-            ret = smbus_write_word(bus, addr, cmd, (s->smb_data1 << 8) | s->smb_data0);
+            ret = smbus_write_word(bus, addr, cmd,
+                                   (s->smb_data1 << 8) | s->smb_data0);
             goto done;
         }
         break;
-    case 0x5:
+    case PROT_I2C_BLOCK_READ:
         if (read) {
-            ret = smbus_read_block(bus, addr, cmd, s->smb_data);
+            int xfersize = s->smb_data0;
+            if (xfersize > sizeof(s->smb_data)) {
+                xfersize = sizeof(s->smb_data);
+            }
+            ret = smbus_read_block(bus, addr, s->smb_data1, s->smb_data,
+                                   xfersize, false, true);
             goto data8;
         } else {
-            ret = smbus_write_block(bus, addr, cmd, s->smb_data, s->smb_data0);
-            goto done;
+            /* The manual says the behavior is undefined, just set DEV_ERR. */
+            goto error;
+        }
+        break;
+    case PROT_BLOCK_DATA:
+        if (read) {
+            ret = smbus_read_block(bus, addr, cmd, s->smb_data,
+                                   sizeof(s->smb_data), !s->i2c_enable,
+                                   !s->i2c_enable);
+            if (ret < 0) {
+                goto error;
+            }
+            s->smb_index = 0;
+            s->op_done = false;
+            if (s->smb_auxctl & AUX_BLK) {
+                s->smb_stat |= STS_INTR;
+            } else {
+                s->smb_blkdata = s->smb_data[0];
+                s->smb_stat |= STS_HOST_BUSY | STS_BYTE_DONE;
+            }
+            s->smb_data0 = ret;
+            goto out;
+        } else {
+            if (s->smb_auxctl & AUX_BLK) {
+                if (s->smb_index != s->smb_data0) {
+                    s->smb_index = 0;
+                    goto error;
+                }
+                /* Data is already all written to the queue, just do
+                   the operation. */
+                s->smb_index = 0;
+                ret = smbus_write_block(bus, addr, cmd, s->smb_data,
+                                        s->smb_data0, !s->i2c_enable);
+                if (ret < 0) {
+                    goto error;
+                }
+                s->op_done = true;
+                s->smb_stat |= STS_INTR;
+                s->smb_stat &= ~STS_HOST_BUSY;
+            } else {
+                s->op_done = false;
+                s->smb_stat |= STS_HOST_BUSY | STS_BYTE_DONE;
+                s->smb_data[0] = s->smb_blkdata;
+                s->smb_index = 0;
+                ret = 0;
+            }
+            goto out;
         }
         break;
     default:
@@ -128,20 +195,35 @@ done:
     if (ret < 0) {
         goto error;
     }
-    s->smb_stat |= STS_BYTE_DONE | STS_INTR;
+    s->smb_stat |= STS_INTR;
+out:
     return;
 
 error:
     s->smb_stat |= STS_DEV_ERR;
     return;
-
 }
 
 static void smb_transaction_start(PMSMBus *s)
 {
-    /* Do not execute immediately the command ; it will be
-     * executed when guest will read SMB_STAT register */
-    s->smb_stat |= STS_HOST_BUSY;
+    if (s->smb_ctl & CTL_INTREN) {
+        smb_transaction(s);
+    } else {
+        /* Do not execute immediately the command; it will be
+         * executed when guest will read SMB_STAT register.  This
+         * is to work around a bug in AMIBIOS (that is working
+         * around another bug in some specific hardware) where
+         * it waits for STS_HOST_BUSY to be set before waiting
+         * checking for status.  If STS_HOST_BUSY doesn't get
+         * set, it gets stuck. */
+        s->smb_stat |= STS_HOST_BUSY;
+    }
+}
+
+static bool
+smb_irq_value(PMSMBus *s)
+{
+    return ((s->smb_stat & ~STS_HOST_BUSY) != 0) && (s->smb_ctl & CTL_INTREN);
 }
 
 static void smb_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
@@ -153,13 +235,61 @@ static void smb_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
                   " val=0x%02" PRIx64 "\n", addr, val);
     switch(addr) {
     case SMBHSTSTS:
-        s->smb_stat = (~(val & 0xff)) & s->smb_stat;
-        s->smb_index = 0;
+        s->smb_stat &= ~(val & ~STS_HOST_BUSY);
+        if (!s->op_done && !(s->smb_auxctl & AUX_BLK)) {
+            uint8_t read = s->smb_addr & 0x01;
+
+            s->smb_index++;
+            if (!read && s->smb_index == s->smb_data0) {
+                uint8_t prot = (s->smb_ctl >> 2) & 0x07;
+                uint8_t cmd = s->smb_cmd;
+                uint8_t addr = s->smb_addr >> 1;
+                int ret;
+
+                if (prot == PROT_I2C_BLOCK_READ) {
+                    s->smb_stat |= STS_DEV_ERR;
+                    goto out;
+                }
+
+                ret = smbus_write_block(s->smbus, addr, cmd, s->smb_data,
+                                        s->smb_data0, !s->i2c_enable);
+                if (ret < 0) {
+                    s->smb_stat |= STS_DEV_ERR;
+                    goto out;
+                }
+                s->op_done = true;
+                s->smb_stat |= STS_INTR;
+                s->smb_stat &= ~STS_HOST_BUSY;
+            } else if (!read) {
+                s->smb_data[s->smb_index] = s->smb_blkdata;
+                s->smb_stat |= STS_BYTE_DONE;
+            } else if (s->smb_ctl & CTL_LAST_BYTE) {
+                s->op_done = true;
+                s->smb_blkdata = s->smb_data[s->smb_index];
+                s->smb_index = 0;
+                s->smb_stat |= STS_INTR;
+                s->smb_stat &= ~STS_HOST_BUSY;
+            } else {
+                s->smb_blkdata = s->smb_data[s->smb_index];
+                s->smb_stat |= STS_BYTE_DONE;
+            }
+        }
         break;
     case SMBHSTCNT:
-        s->smb_ctl = val;
-        if (val & 0x40)
+        s->smb_ctl = val & ~CTL_START; /* CTL_START always reads 0 */
+        if (val & CTL_START) {
+            if (!s->op_done) {
+                s->smb_index = 0;
+                s->op_done = true;
+            }
             smb_transaction_start(s);
+        }
+        if (s->smb_ctl & CTL_KILL) {
+            s->op_done = true;
+            s->smb_index = 0;
+            s->smb_stat |= STS_FAILED;
+            s->smb_stat &= ~STS_HOST_BUSY;
+        }
         break;
     case SMBHSTCMD:
         s->smb_cmd = val;
@@ -174,13 +304,26 @@ static void smb_ioport_writeb(void *opaque, hwaddr addr, uint64_t val,
         s->smb_data1 = val;
         break;
     case SMBBLKDAT:
-        s->smb_data[s->smb_index++] = val;
-        if (s->smb_index > 31)
+        if (s->smb_index >= PM_SMBUS_MAX_MSG_SIZE) {
             s->smb_index = 0;
+        }
+        if (s->smb_auxctl & AUX_BLK) {
+            s->smb_data[s->smb_index++] = val;
+        } else {
+            s->smb_blkdata = val;
+        }
+        break;
+    case SMBAUXCTL:
+        s->smb_auxctl = val & AUX_MASK;
         break;
     default:
         break;
     }
+
+ out:
+    if (s->set_irq) {
+        s->set_irq(s, smb_irq_value(s));
+    }
 }
 
 static uint64_t smb_ioport_readb(void *opaque, hwaddr addr, unsigned width)
@@ -193,12 +336,12 @@ static uint64_t smb_ioport_readb(void *opaque, hwaddr addr, unsigned width)
         val = s->smb_stat;
         if (s->smb_stat & STS_HOST_BUSY) {
             /* execute command now */
+            s->smb_stat &= ~STS_HOST_BUSY;
             smb_transaction(s);
         }
         break;
     case SMBHSTCNT:
-        s->smb_index = 0;
-        val = s->smb_ctl & 0x1f;
+        val = s->smb_ctl & CTL_RETURN_MASK;
         break;
     case SMBHSTCMD:
         val = s->smb_cmd;
@@ -213,18 +356,44 @@ static uint64_t smb_ioport_readb(void *opaque, hwaddr addr, unsigned width)
         val = s->smb_data1;
         break;
     case SMBBLKDAT:
-        val = s->smb_data[s->smb_index++];
-        if (s->smb_index > 31)
+        if (s->smb_index >= PM_SMBUS_MAX_MSG_SIZE) {
             s->smb_index = 0;
+        }
+        if (s->smb_auxctl & AUX_BLK) {
+            val = s->smb_data[s->smb_index++];
+            if (!s->op_done && s->smb_index == s->smb_data0) {
+                s->op_done = true;
+                s->smb_index = 0;
+                s->smb_stat &= ~STS_HOST_BUSY;
+            }
+        } else {
+            val = s->smb_blkdata;
+        }
+        break;
+    case SMBAUXCTL:
+        val = s->smb_auxctl;
         break;
     default:
         val = 0;
         break;
     }
-    SMBUS_DPRINTF("SMB readb port=0x%04" HWADDR_PRIx " val=0x%02x\n", addr, val);
+    SMBUS_DPRINTF("SMB readb port=0x%04" HWADDR_PRIx " val=0x%02x\n",
+                  addr, val);
+
+    if (s->set_irq) {
+        s->set_irq(s, smb_irq_value(s));
+    }
+
     return val;
 }
 
+static void pm_smbus_reset(PMSMBus *s)
+{
+    s->op_done = true;
+    s->smb_index = 0;
+    s->smb_stat = 0;
+}
+
 static const MemoryRegionOps pm_smbus_ops = {
     .read = smb_ioport_readb,
     .write = smb_ioport_writeb,
@@ -233,9 +402,14 @@ static const MemoryRegionOps pm_smbus_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-void pm_smbus_init(DeviceState *parent, PMSMBus *smb)
+void pm_smbus_init(DeviceState *parent, PMSMBus *smb, bool force_aux_blk)
 {
+    smb->op_done = true;
+    smb->reset = pm_smbus_reset;
     smb->smbus = i2c_init_bus(parent, "i2c");
+    if (force_aux_blk) {
+        smb->smb_auxctl |= AUX_BLK;
+    }
     memory_region_init_io(&smb->io, OBJECT(parent), &pm_smbus_ops, smb,
                           "pm-smbus", 64);
 }
diff --git a/hw/i2c/smbus.c b/hw/i2c/smbus.c
index 587ce1ab7f..6ff77c582f 100644
--- a/hw/i2c/smbus.c
+++ b/hw/i2c/smbus.c
@@ -293,33 +293,42 @@ int smbus_write_word(I2CBus *bus, uint8_t addr, uint8_t command, uint16_t data)
     return 0;
 }
 
-int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data)
+int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
+                     int len, bool recv_len, bool send_cmd)
 {
-    int len;
+    int rlen;
     int i;
 
-    if (i2c_start_transfer(bus, addr, 0)) {
-        return -1;
+    if (send_cmd) {
+        if (i2c_start_transfer(bus, addr, 0)) {
+            return -1;
+        }
+        i2c_send(bus, command);
     }
-    i2c_send(bus, command);
     if (i2c_start_transfer(bus, addr, 1)) {
-        i2c_end_transfer(bus);
+        if (send_cmd) {
+            i2c_end_transfer(bus);
+        }
         return -1;
     }
-    len = i2c_recv(bus);
-    if (len > 32) {
-        len = 0;
+    if (recv_len) {
+        rlen = i2c_recv(bus);
+    } else {
+        rlen = len;
     }
-    for (i = 0; i < len; i++) {
+    if (rlen > len) {
+        rlen = 0;
+    }
+    for (i = 0; i < rlen; i++) {
         data[i] = i2c_recv(bus);
     }
     i2c_nack(bus);
     i2c_end_transfer(bus);
-    return len;
+    return rlen;
 }
 
 int smbus_write_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
-                      int len)
+                      int len, bool send_len)
 {
     int i;
 
@@ -330,7 +339,9 @@ int smbus_write_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
         return -1;
     }
     i2c_send(bus, command);
-    i2c_send(bus, len);
+    if (send_len) {
+        i2c_send(bus, len);
+    }
     for (i = 0; i < len; i++) {
         i2c_send(bus, data[i]);
     }
diff --git a/hw/i2c/smbus_ich9.c b/hw/i2c/smbus_ich9.c
index 007cb6701d..2a8b49e02f 100644
--- a/hw/i2c/smbus_ich9.c
+++ b/hw/i2c/smbus_ich9.c
@@ -40,6 +40,8 @@
 typedef struct ICH9SMBState {
     PCIDevice dev;
 
+    bool irq_enabled;
+
     PMSMBus smb;
 } ICH9SMBState;
 
@@ -61,12 +63,16 @@ static void ich9_smbus_write_config(PCIDevice *d, uint32_t address,
     pci_default_write_config(d, address, val, len);
     if (range_covers_byte(address, len, ICH9_SMB_HOSTC)) {
         uint8_t hostc = s->dev.config[ICH9_SMB_HOSTC];
-        if ((hostc & ICH9_SMB_HOSTC_HST_EN) &&
-            !(hostc & ICH9_SMB_HOSTC_I2C_EN)) {
+        if (hostc & ICH9_SMB_HOSTC_HST_EN) {
             memory_region_set_enabled(&s->smb.io, true);
         } else {
             memory_region_set_enabled(&s->smb.io, false);
         }
+        s->smb.i2c_enable = (hostc & ICH9_SMB_HOSTC_I2C_EN) != 0;
+        if (hostc & ICH9_SMB_HOSTC_SSRESET) {
+            s->smb.reset(&s->smb);
+            s->dev.config[ICH9_SMB_HOSTC] &= ~ICH9_SMB_HOSTC_SSRESET;
+        }
     }
 }
 
@@ -80,7 +86,7 @@ static void ich9_smbus_realize(PCIDevice *d, Error **errp)
     pci_set_byte(d->config + ICH9_SMB_HOSTC, 0);
     /* TODO bar0, bar1: 64bit BAR support*/
 
-    pm_smbus_init(&d->qdev, &s->smb);
+    pm_smbus_init(&d->qdev, &s->smb, false);
     pci_register_bar(d, ICH9_SMB_SMB_BASE_BAR, PCI_BASE_ADDRESS_SPACE_IO,
                      &s->smb.io);
 }
@@ -105,11 +111,25 @@ static void ich9_smb_class_init(ObjectClass *klass, void *data)
     dc->user_creatable = false;
 }
 
+static void ich9_smb_set_irq(PMSMBus *pmsmb, bool enabled)
+{
+    ICH9SMBState *s = pmsmb->opaque;
+
+    if (enabled == s->irq_enabled) {
+        return;
+    }
+
+    s->irq_enabled = enabled;
+    pci_set_irq(&s->dev, enabled);
+}
+
 I2CBus *ich9_smb_init(PCIBus *bus, int devfn, uint32_t smb_io_base)
 {
     PCIDevice *d =
         pci_create_simple_multifunction(bus, devfn, true, TYPE_ICH9_SMB_DEVICE);
     ICH9SMBState *s = ICH9_SMB_DEVICE(d);
+    s->smb.set_irq = ich9_smb_set_irq;
+    s->smb.opaque = s;
     return s->smb.smbus;
 }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 83a444472b..03148450c8 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1679,7 +1679,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                                Error **errp)
 {
     const PCMachineState *pcms = PC_MACHINE(hotplug_dev);
+    const PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
     const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
+    const uint64_t legacy_align = TARGET_PAGE_SIZE;
 
     /*
      * When -no-acpi is used with Q35 machine type, no ACPI is built,
@@ -1696,6 +1698,9 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         error_setg(errp, "nvdimm is not enabled: missing 'nvdimm' in '-M'");
         return;
     }
+
+    pc_dimm_pre_plug(dev, MACHINE(hotplug_dev),
+                     pcmc->enforce_aligned_dimm ? NULL : &legacy_align, errp);
 }
 
 static void pc_memory_plug(HotplugHandler *hotplug_dev,
@@ -1704,18 +1709,9 @@ static void pc_memory_plug(HotplugHandler *hotplug_dev,
     HotplugHandlerClass *hhc;
     Error *local_err = NULL;
     PCMachineState *pcms = PC_MACHINE(hotplug_dev);
-    PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
-    PCDIMMDevice *dimm = PC_DIMM(dev);
-    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
-    uint64_t align = TARGET_PAGE_SIZE;
     bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
 
-    if (memory_region_get_alignment(mr) && pcmc->enforce_aligned_dimm) {
-        align = memory_region_get_alignment(mr);
-    }
-
-    pc_dimm_plug(dev, MACHINE(pcms), align, &local_err);
+    pc_dimm_plug(dev, MACHINE(pcms), &local_err);
     if (local_err) {
         goto out;
     }
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 6fda52b86c..97ffdd820f 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -650,31 +650,17 @@ static void apic_timer(void *opaque)
     apic_timer_update(s, s->next_time);
 }
 
-static uint32_t apic_mem_readb(void *opaque, hwaddr addr)
-{
-    return 0;
-}
-
-static uint32_t apic_mem_readw(void *opaque, hwaddr addr)
-{
-    return 0;
-}
-
-static void apic_mem_writeb(void *opaque, hwaddr addr, uint32_t val)
-{
-}
-
-static void apic_mem_writew(void *opaque, hwaddr addr, uint32_t val)
-{
-}
-
-static uint32_t apic_mem_readl(void *opaque, hwaddr addr)
+static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size)
 {
     DeviceState *dev;
     APICCommonState *s;
     uint32_t val;
     int index;
 
+    if (size < 4) {
+        return 0;
+    }
+
     dev = cpu_get_current_apic();
     if (!dev) {
         return 0;
@@ -765,11 +751,17 @@ static void apic_send_msi(MSIMessage *msi)
     apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
 }
 
-static void apic_mem_writel(void *opaque, hwaddr addr, uint32_t val)
+static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+                           unsigned size)
 {
     DeviceState *dev;
     APICCommonState *s;
     int index = (addr >> 4) & 0xff;
+
+    if (size < 4) {
+        return;
+    }
+
     if (addr > 0xfff || !index) {
         /* MSI and MMIO APIC are at the same memory location,
          * but actually not on the global bus: MSI is on PCI bus
@@ -880,10 +872,12 @@ static void apic_post_load(APICCommonState *s)
 }
 
 static const MemoryRegionOps apic_io_ops = {
-    .old_mmio = {
-        .read = { apic_mem_readb, apic_mem_readw, apic_mem_readl, },
-        .write = { apic_mem_writeb, apic_mem_writew, apic_mem_writel, },
-    },
+    .read = apic_mem_read,
+    .write = apic_mem_write,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
diff --git a/hw/ipmi/isa_ipmi_bt.c b/hw/ipmi/isa_ipmi_bt.c
index e946030e84..8bbb1fa785 100644
--- a/hw/ipmi/isa_ipmi_bt.c
+++ b/hw/ipmi/isa_ipmi_bt.c
@@ -22,6 +22,7 @@
  * THE SOFTWARE.
  */
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "qapi/error.h"
 #include "hw/hw.h"
 #include "hw/ipmi/ipmi.h"
@@ -450,22 +451,63 @@ static void isa_ipmi_bt_realize(DeviceState *dev, Error **errp)
     isa_register_ioport(isadev, &iib->bt.io, iib->bt.io_base);
 }
 
-static const VMStateDescription vmstate_ISAIPMIBTDevice = {
-    .name = TYPE_IPMI_INTERFACE,
+static int ipmi_bt_vmstate_post_load(void *opaque, int version)
+{
+    IPMIBT *ib = opaque;
+
+    /* Make sure all the values are sane. */
+    if (ib->outpos >= MAX_IPMI_MSG_SIZE || ib->outlen >= MAX_IPMI_MSG_SIZE ||
+        ib->outpos >= ib->outlen) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "ipmi:bt: vmstate transfer received bad out values: %d %d\n",
+                      ib->outpos, ib->outlen);
+        ib->outpos = 0;
+        ib->outlen = 0;
+    }
+
+    if (ib->inlen >= MAX_IPMI_MSG_SIZE) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "ipmi:bt: vmstate transfer received bad in value: %d\n",
+                      ib->inlen);
+        ib->inlen = 0;
+    }
+
+    return 0;
+}
+
+const VMStateDescription vmstate_IPMIBT = {
+    .name = TYPE_IPMI_INTERFACE_PREFIX "bt",
     .version_id = 1,
     .minimum_version_id = 1,
+    .post_load = ipmi_bt_vmstate_post_load,
+    .fields      = (VMStateField[]) {
+        VMSTATE_BOOL(obf_irq_set, IPMIBT),
+        VMSTATE_BOOL(atn_irq_set, IPMIBT),
+        VMSTATE_BOOL(irqs_enabled, IPMIBT),
+        VMSTATE_UINT32(outpos, IPMIBT),
+        VMSTATE_UINT32(outlen, IPMIBT),
+        VMSTATE_UINT8_ARRAY(outmsg, IPMIBT, MAX_IPMI_MSG_SIZE),
+        VMSTATE_UINT32(inlen, IPMIBT),
+        VMSTATE_UINT8_ARRAY(inmsg, IPMIBT, MAX_IPMI_MSG_SIZE),
+        VMSTATE_UINT8(control_reg, IPMIBT),
+        VMSTATE_UINT8(mask_reg, IPMIBT),
+        VMSTATE_UINT8(waiting_rsp, IPMIBT),
+        VMSTATE_UINT8(waiting_seq, IPMIBT),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_ISAIPMIBTDevice = {
+    .name = TYPE_IPMI_INTERFACE_PREFIX "isa-bt",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    /*
+     * Version 1 had messed up the array transfer, it's not even usable
+     * because it used VMSTATE_VBUFFER_UINT32, but it did not transfer
+     * the buffer length, so random things would happen.
+     */
     .fields      = (VMStateField[]) {
-        VMSTATE_BOOL(bt.obf_irq_set, ISAIPMIBTDevice),
-        VMSTATE_BOOL(bt.atn_irq_set, ISAIPMIBTDevice),
-        VMSTATE_BOOL(bt.use_irq, ISAIPMIBTDevice),
-        VMSTATE_BOOL(bt.irqs_enabled, ISAIPMIBTDevice),
-        VMSTATE_UINT32(bt.outpos, ISAIPMIBTDevice),
-        VMSTATE_VBUFFER_UINT32(bt.outmsg, ISAIPMIBTDevice, 1, NULL, bt.outlen),
-        VMSTATE_VBUFFER_UINT32(bt.inmsg, ISAIPMIBTDevice, 1, NULL, bt.inlen),
-        VMSTATE_UINT8(bt.control_reg, ISAIPMIBTDevice),
-        VMSTATE_UINT8(bt.mask_reg, ISAIPMIBTDevice),
-        VMSTATE_UINT8(bt.waiting_rsp, ISAIPMIBTDevice),
-        VMSTATE_UINT8(bt.waiting_seq, ISAIPMIBTDevice),
+        VMSTATE_STRUCT(bt, ISAIPMIBTDevice, 1, vmstate_IPMIBT, IPMIBT),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c
index cff1946232..7302f6d74b 100644
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -370,7 +370,7 @@ static void vt82c686b_pm_realize(PCIDevice *dev, Error **errp)
     pci_conf[0x90] = s->smb_io_base | 1;
     pci_conf[0x91] = s->smb_io_base >> 8;
     pci_conf[0xd2] = 0x90;
-    pm_smbus_init(&s->dev.qdev, &s->smb);
+    pm_smbus_init(&s->dev.qdev, &s->smb, false);
     memory_region_add_subregion(get_system_io(), s->smb_io_base, &s->smb.io);
 
     apm_init(dev, &s->apm, NULL, s);
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 65843bc52a..fb6bcaedc4 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -29,57 +29,60 @@
 
 static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp);
 
-void pc_dimm_plug(DeviceState *dev, MachineState *machine, uint64_t align,
-                  Error **errp)
+void pc_dimm_pre_plug(DeviceState *dev, MachineState *machine,
+                      const uint64_t *legacy_align, Error **errp)
 {
-    int slot;
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
-    MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm,
-                                                              &error_abort);
-    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
     Error *local_err = NULL;
-    uint64_t addr;
+    MemoryRegion *mr;
+    uint64_t addr, align;
+    int slot;
 
-    addr = object_property_get_uint(OBJECT(dimm),
-                                    PC_DIMM_ADDR_PROP, &local_err);
+    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP,
+                                   &error_abort);
+    slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
+                                 machine->ram_slots, &local_err);
     if (local_err) {
         goto out;
     }
+    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &error_abort);
+    trace_mhp_pc_dimm_assigned_slot(slot);
 
-    addr = memory_device_get_free_addr(machine, !addr ? NULL : &addr, align,
-                                       memory_region_size(mr), &local_err);
+    mr = ddc->get_memory_region(dimm, &local_err);
     if (local_err) {
         goto out;
     }
 
-    object_property_set_uint(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
+    align = legacy_align ? *legacy_align : memory_region_get_alignment(mr);
+    addr = object_property_get_uint(OBJECT(dev), PC_DIMM_ADDR_PROP,
+                                    &error_abort);
+    addr = memory_device_get_free_addr(machine, !addr ? NULL : &addr, align,
+                                       memory_region_size(mr), &local_err);
     if (local_err) {
         goto out;
     }
     trace_mhp_pc_dimm_assigned_address(addr);
+    object_property_set_uint(OBJECT(dev), addr, PC_DIMM_ADDR_PROP,
+                             &error_abort);
+out:
+    error_propagate(errp, local_err);
+}
 
-    slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
-    if (local_err) {
-        goto out;
-    }
+void pc_dimm_plug(DeviceState *dev, MachineState *machine, Error **errp)
+{
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    MemoryRegion *vmstate_mr = ddc->get_vmstate_memory_region(dimm,
+                                                              &error_abort);
+    MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
+    uint64_t addr;
 
-    slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
-                                 machine->ram_slots, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
-    if (local_err) {
-        goto out;
-    }
-    trace_mhp_pc_dimm_assigned_slot(slot);
+    addr = object_property_get_uint(OBJECT(dev), PC_DIMM_ADDR_PROP,
+                                    &error_abort);
 
     memory_device_plug_region(machine, mr, addr);
     vmstate_register_ram(vmstate_mr, dev);
-
-out:
-    error_propagate(errp, local_err);
 }
 
 void pc_dimm_unplug(DeviceState *dev, MachineState *machine)
diff --git a/hw/misc/vmcoreinfo.c b/hw/misc/vmcoreinfo.c
index a2805527cb..304c6287c7 100644
--- a/hw/misc/vmcoreinfo.c
+++ b/hw/misc/vmcoreinfo.c
@@ -19,7 +19,7 @@ static void fw_cfg_vmci_write(void *dev, off_t offset, size_t len)
     VMCoreInfoState *s = VMCOREINFO(dev);
 
     s->has_vmcoreinfo = offset == 0 && len == sizeof(s->vmcoreinfo)
-        && s->vmcoreinfo.guest_format != VMCOREINFO_FORMAT_NONE;
+        && s->vmcoreinfo.guest_format != FW_CFG_VMCOREINFO_FORMAT_NONE;
 }
 
 static void vmcoreinfo_reset(void *dev)
@@ -28,7 +28,7 @@ static void vmcoreinfo_reset(void *dev)
 
     s->has_vmcoreinfo = false;
     memset(&s->vmcoreinfo, 0, sizeof(s->vmcoreinfo));
-    s->vmcoreinfo.host_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF);
+    s->vmcoreinfo.host_format = cpu_to_le16(FW_CFG_VMCOREINFO_FORMAT_ELF);
 }
 
 static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
@@ -53,7 +53,7 @@ static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
         return;
     }
 
-    fw_cfg_add_file_callback(fw_cfg, "etc/vmcoreinfo",
+    fw_cfg_add_file_callback(fw_cfg, FW_CFG_VMCOREINFO_FILENAME,
                              NULL, fw_cfg_vmci_write, s,
                              &s->vmcoreinfo, sizeof(s->vmcoreinfo), false);
 
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 47146ba12a..162b27a3b8 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -608,6 +608,9 @@ static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
         rtc_set_memory(rtc, 0x3e, checksum & 0xff);
         rtc_set_memory(rtc, 0x2f, checksum >> 8);
         rtc_set_memory(rtc, 0x3f, checksum >> 8);
+
+        object_property_add_alias(qdev_get_machine(), "rtc-time", OBJECT(rtc),
+                                  "date", NULL);
     }
     return 0;
 }
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ddd4478a34..4edb6c7d16 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -562,9 +562,12 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset,
 
 static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
 {
+    CPUState **rev;
     CPUState *cs;
+    int n_cpus;
     int cpus_offset;
     char *nodename;
+    int i;
 
     cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
     _FDT(cpus_offset);
@@ -575,8 +578,19 @@ static void spapr_populate_cpus_dt_node(void *fdt, sPAPRMachineState *spapr)
      * We walk the CPUs in reverse order to ensure that CPU DT nodes
      * created by fdt_add_subnode() end up in the right order in FDT
      * for the guest kernel the enumerate the CPUs correctly.
+     *
+     * The CPU list cannot be traversed in reverse order, so we need
+     * to do extra work.
      */
-    CPU_FOREACH_REVERSE(cs) {
+    n_cpus = 0;
+    rev = NULL;
+    CPU_FOREACH(cs) {
+        rev = g_renew(CPUState *, rev, n_cpus + 1);
+        rev[n_cpus++] = cs;
+    }
+
+    for (i = n_cpus - 1; i >= 0; i--) {
+        CPUState *cs = rev[i];
         PowerPCCPU *cpu = POWERPC_CPU(cs);
         int index = spapr_get_vcpu_id(cpu);
         DeviceClass *dc = DEVICE_GET_CLASS(cs);
@@ -3113,13 +3127,12 @@ static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
     MemoryRegion *mr = ddc->get_memory_region(dimm, &error_abort);
-    uint64_t align, size, addr;
+    uint64_t size, addr;
     uint32_t node;
 
-    align = memory_region_get_alignment(mr);
     size = memory_region_size(mr);
 
-    pc_dimm_plug(dev, MACHINE(ms), align, &local_err);
+    pc_dimm_plug(dev, MACHINE(ms), &local_err);
     if (local_err) {
         goto out;
     }
@@ -3154,6 +3167,7 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
     PCDIMMDevice *dimm = PC_DIMM(dev);
     PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+    Error *local_err = NULL;
     MemoryRegion *mr;
     uint64_t size;
     Object *memdev;
@@ -3179,7 +3193,13 @@ static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
     memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,
                                       &error_abort);
     pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(memdev));
-    spapr_check_pagesize(spapr, pagesize, errp);
+    spapr_check_pagesize(spapr, pagesize, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    pc_dimm_pre_plug(dev, MACHINE(hotplug_dev), NULL, errp);
 }
 
 struct sPAPRDIMMState {
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index 160657f4b9..955ba94800 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -959,6 +959,10 @@ static void lsi_do_msgout(LSIState *s)
                 DPRINTF("WDTR (ignored)\n");
                 lsi_skip_msgbytes(s, 1);
                 break;
+            case 4:
+                DPRINTF("PPR (ignored)\n");
+                lsi_skip_msgbytes(s, 5);
+                break;
             default:
                 goto bad;
             }
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index ba1afa3c1e..a56317e026 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -464,6 +464,7 @@ static void megasas_unmap_frame(MegasasState *s, MegasasCmd *cmd)
     cmd->frame = NULL;
     cmd->pa = 0;
     cmd->pa_size = 0;
+    qemu_sglist_destroy(&cmd->qsg);
     clear_bit(cmd->index, s->frame_map);
 }
 
@@ -580,7 +581,6 @@ static void megasas_complete_frame(MegasasState *s, uint64_t context)
 
 static void megasas_complete_command(MegasasCmd *cmd)
 {
-    qemu_sglist_destroy(&cmd->qsg);
     cmd->iov_size = 0;
     cmd->iov_offset = 0;
 
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index 4176e871e1..929404fb48 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -1431,6 +1431,7 @@ static void mptsas1068_class_init(ObjectClass *oc, void *data)
     dc->reset = mptsas_reset;
     dc->vmsd = &vmstate_mptsas;
     dc->desc = "LSI SAS 1068";
+    set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 }
 
 static const TypeInfo mptsas_info = {
diff --git a/hw/scsi/vhost-scsi-common.c b/hw/scsi/vhost-scsi-common.c
index e2a5828af1..b7fbab65dd 100644
--- a/hw/scsi/vhost-scsi-common.c
+++ b/hw/scsi/vhost-scsi-common.c
@@ -96,6 +96,9 @@ uint64_t vhost_scsi_common_get_features(VirtIODevice *vdev, uint64_t features,
 {
     VHostSCSICommon *vsc = VHOST_SCSI_COMMON(vdev);
 
+    /* Turn on predefined features supported by this device */
+    features |= vsc->host_features;
+
     return vhost_get_features(&vsc->dev, vsc->feature_bits, features);
 }
 
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 9c1bea8ff3..becf550085 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -238,6 +238,9 @@ static Property vhost_scsi_properties[] = {
     DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors,
                        0xFFFF),
     DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128),
+    DEFINE_PROP_BIT64("t10_pi", VHostSCSICommon, host_features,
+                                                 VIRTIO_SCSI_F_T10_PI,
+                                                 false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index 9355cfdf07..2e1ba4a87b 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -137,17 +137,6 @@ static void vhost_user_scsi_unrealize(DeviceState *dev, Error **errp)
     }
 }
 
-static uint64_t vhost_user_scsi_get_features(VirtIODevice *vdev,
-                                             uint64_t features, Error **errp)
-{
-    VHostUserSCSI *s = VHOST_USER_SCSI(vdev);
-
-    /* Turn on predefined features supported by this device */
-    features |= s->host_features;
-
-    return vhost_scsi_common_get_features(vdev, features, errp);
-}
-
 static Property vhost_user_scsi_properties[] = {
     DEFINE_PROP_CHR("chardev", VirtIOSCSICommon, conf.chardev),
     DEFINE_PROP_UINT32("boot_tpgt", VirtIOSCSICommon, conf.boot_tpgt, 0),
@@ -157,12 +146,15 @@ static Property vhost_user_scsi_properties[] = {
     DEFINE_PROP_UINT32("max_sectors", VirtIOSCSICommon, conf.max_sectors,
                        0xFFFF),
     DEFINE_PROP_UINT32("cmd_per_lun", VirtIOSCSICommon, conf.cmd_per_lun, 128),
-    DEFINE_PROP_BIT64("hotplug", VHostUserSCSI, host_features,
-                                                VIRTIO_SCSI_F_HOTPLUG,
-                                                true),
-    DEFINE_PROP_BIT64("param_change", VHostUserSCSI, host_features,
-                                                     VIRTIO_SCSI_F_CHANGE,
-                                                     true),
+    DEFINE_PROP_BIT64("hotplug", VHostSCSICommon, host_features,
+                                                  VIRTIO_SCSI_F_HOTPLUG,
+                                                  true),
+    DEFINE_PROP_BIT64("param_change", VHostSCSICommon, host_features,
+                                                       VIRTIO_SCSI_F_CHANGE,
+                                                       true),
+    DEFINE_PROP_BIT64("t10_pi", VHostSCSICommon, host_features,
+                                                 VIRTIO_SCSI_F_T10_PI,
+                                                 false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -187,7 +179,7 @@ static void vhost_user_scsi_class_init(ObjectClass *klass, void *data)
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     vdc->realize = vhost_user_scsi_realize;
     vdc->unrealize = vhost_user_scsi_unrealize;
-    vdc->get_features = vhost_user_scsi_get_features;
+    vdc->get_features = vhost_scsi_common_get_features;
     vdc->set_config = vhost_scsi_common_set_config;
     vdc->set_status = vhost_user_scsi_set_status;
     fwc->get_dev_path = vhost_scsi_common_get_fw_dev_path;
diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 6f1f723b1f..a504f0308d 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -120,7 +120,7 @@ static void rtc_coalesced_timer_update(RTCState *s)
         timer_del(s->coalesced_timer);
     } else {
         /* divide each RTC interval to 2 - 8 smaller intervals */
-        int c = MIN(s->irq_coalesced, 7) + 1; 
+        int c = MIN(s->irq_coalesced, 7) + 1;
         int64_t next_clock = qemu_clock_get_ns(rtc_clock) +
             periodic_clock_to_ns(s->period / c);
         timer_mod(s->coalesced_timer, next_clock);
@@ -485,7 +485,7 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
             s->cmos_data[s->cmos_index] = data;
             check_update_timer(s);
             break;
-	case RTC_IBM_PS2_CENTURY_BYTE:
+        case RTC_IBM_PS2_CENTURY_BYTE:
             s->cmos_index = RTC_CENTURY;
             /* fall through */
         case RTC_CENTURY:
@@ -713,7 +713,7 @@ static uint64_t cmos_ioport_read(void *opaque, hwaddr addr,
         return 0xff;
     } else {
         switch(s->cmos_index) {
-	case RTC_IBM_PS2_CENTURY_BYTE:
+        case RTC_IBM_PS2_CENTURY_BYTE:
             s->cmos_index = RTC_CENTURY;
             /* fall through */
         case RTC_CENTURY:
@@ -915,7 +915,7 @@ static void rtc_reset(void *opaque)
 
     if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
         s->irq_coalesced = 0;
-        s->irq_reinject_on_ack_count = 0;		
+        s->irq_reinject_on_ack_count = 0;
     }
 }
 
@@ -995,9 +995,6 @@ static void rtc_realizefn(DeviceState *dev, Error **errp)
 
     object_property_add_tm(OBJECT(s), "date", rtc_get_date, NULL);
 
-    object_property_add_alias(qdev_get_machine(), "rtc-time",
-                              OBJECT(s), "date", NULL);
-
     qdev_init_gpio_out(dev, &s->irq, 1);
 }
 
@@ -1019,6 +1016,9 @@ ISADevice *mc146818_rtc_init(ISABus *bus, int base_year, qemu_irq intercept_irq)
     }
     QLIST_INSERT_HEAD(&rtc_devices, s, link);
 
+    object_property_add_alias(qdev_get_machine(), "rtc-time", OBJECT(s),
+                              "date", NULL);
+
     return isadev;
 }
 
@@ -1052,17 +1052,11 @@ static void rtc_class_initfn(ObjectClass *klass, void *data)
     dc->user_creatable = false;
 }
 
-static void rtc_finalize(Object *obj)
-{
-    object_property_del(qdev_get_machine(), "rtc", NULL);
-}
-
 static const TypeInfo mc146818rtc_info = {
     .name          = TYPE_MC146818_RTC,
     .parent        = TYPE_ISA_DEVICE,
     .instance_size = sizeof(RTCState),
     .class_init    = rtc_class_initfn,
-    .instance_finalize = rtc_finalize,
 };
 
 static void mc146818rtc_register_types(void)
diff --git a/hw/timer/sh_timer.c b/hw/timer/sh_timer.c
index 5f8736cf10..91b18ba312 100644
--- a/hw/timer/sh_timer.c
+++ b/hw/timer/sh_timer.c
@@ -74,6 +74,7 @@ static uint32_t sh_timer_read(void *opaque, hwaddr offset)
     case OFFSET_TCPR:
         if (s->feat & TIMER_FEAT_CAPT)
             return s->tcpr;
+        /* fall through */
     default:
         hw_error("sh_timer_read: Bad offset %x\n", (int)offset);
         return 0;
diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h
index 71cd069478..c67271f1ba 100644
--- a/include/chardev/char-fe.h
+++ b/include/chardev/char-fe.h
@@ -113,7 +113,7 @@ void qemu_chr_fe_accept_input(CharBackend *be);
 /**
  * @qemu_chr_fe_disconnect:
  *
- * Close a fd accpeted by character backend.
+ * Close a fd accepted by character backend.
  * Without associated Chardev, do nothing.
  */
 void qemu_chr_fe_disconnect(CharBackend *be);
@@ -122,7 +122,7 @@ void qemu_chr_fe_disconnect(CharBackend *be);
  * @qemu_chr_fe_wait_connected:
  *
  * Wait for characted backend to be connected, return < 0 on error or
- * if no assicated Chardev.
+ * if no associated Chardev.
  */
 int qemu_chr_fe_wait_connected(CharBackend *be, Error **errp);
 
@@ -186,7 +186,7 @@ guint qemu_chr_fe_add_watch(CharBackend *be, GIOCondition cond,
  * @buf the data
  * @len the number of bytes to send
  *
- * Returns: the number of bytes consumed (0 if no assicated Chardev)
+ * Returns: the number of bytes consumed (0 if no associated Chardev)
  */
 int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len);
 
@@ -201,7 +201,7 @@ int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len);
  * @buf the data
  * @len the number of bytes to send
  *
- * Returns: the number of bytes consumed (0 if no assicated Chardev)
+ * Returns: the number of bytes consumed (0 if no associated Chardev)
  */
 int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len);
 
@@ -213,7 +213,7 @@ int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len);
  * @buf the data buffer
  * @len the number of bytes to read
  *
- * Returns: the number of bytes read (0 if no assicated Chardev)
+ * Returns: the number of bytes read (0 if no associated Chardev)
  */
 int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len);
 
diff --git a/include/hw/i2c/pm_smbus.h b/include/hw/i2c/pm_smbus.h
index 2a837afdcb..060d3c6ac0 100644
--- a/include/hw/i2c/pm_smbus.h
+++ b/include/hw/i2c/pm_smbus.h
@@ -1,6 +1,8 @@
 #ifndef PM_SMBUS_H
 #define PM_SMBUS_H
 
+#define PM_SMBUS_MAX_MSG_SIZE 32
+
 typedef struct PMSMBus {
     I2CBus *smbus;
     MemoryRegion io;
@@ -11,10 +13,26 @@ typedef struct PMSMBus {
     uint8_t smb_addr;
     uint8_t smb_data0;
     uint8_t smb_data1;
-    uint8_t smb_data[32];
-    uint8_t smb_index;
+    uint8_t smb_data[PM_SMBUS_MAX_MSG_SIZE];
+    uint8_t smb_blkdata;
+    uint8_t smb_auxctl;
+    uint32_t smb_index;
+
+    /* Set by pm_smbus.c */
+    void (*reset)(struct PMSMBus *s);
+
+    /* Set by the user. */
+    bool i2c_enable;
+    void (*set_irq)(struct PMSMBus *s, bool enabled);
+    void *opaque;
+
+    /* Internally used by pm_smbus. */
+
+    /* Set on block transfers after the last byte has been read, so the
+       INTR bit can be set at the right time. */
+    bool op_done;
 } PMSMBus;
 
-void pm_smbus_init(DeviceState *parent, PMSMBus *smb);
+void pm_smbus_init(DeviceState *parent, PMSMBus *smb, bool force_aux_blk);
 
 #endif /* PM_SMBUS_H */
diff --git a/include/hw/i2c/smbus.h b/include/hw/i2c/smbus.h
index 4fdba022c1..d8b1b9ee81 100644
--- a/include/hw/i2c/smbus.h
+++ b/include/hw/i2c/smbus.h
@@ -72,9 +72,22 @@ int smbus_read_byte(I2CBus *bus, uint8_t addr, uint8_t command);
 int smbus_write_byte(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t data);
 int smbus_read_word(I2CBus *bus, uint8_t addr, uint8_t command);
 int smbus_write_word(I2CBus *bus, uint8_t addr, uint8_t command, uint16_t data);
-int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data);
+
+/*
+ * Do a block transfer from an I2C device.  If recv_len is set, then the
+ * first received byte is a length field and is used to know how much data
+ * to receive.  Otherwise receive "len" bytes.  If send_cmd is set, send
+ * the command byte first before receiving the data.
+ */
+int smbus_read_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
+                     int len, bool recv_len, bool send_cmd);
+
+/*
+ * Do a block transfer to an I2C device.  If send_len is set, send the
+ * "len" value before the data.
+ */
 int smbus_write_block(I2CBus *bus, uint8_t addr, uint8_t command, uint8_t *data,
-                      int len);
+                      int len, bool send_len);
 
 void smbus_eeprom_init_one(I2CBus *smbus, uint8_t address, uint8_t *eeprom_buf);
 void smbus_eeprom_init(I2CBus *smbus, int nb_eeprom,
diff --git a/include/hw/mem/pc-dimm.h b/include/hw/mem/pc-dimm.h
index 26ebb7d5e9..b382eb4303 100644
--- a/include/hw/mem/pc-dimm.h
+++ b/include/hw/mem/pc-dimm.h
@@ -79,7 +79,8 @@ typedef struct PCDIMMDeviceClass {
                                                Error **errp);
 } PCDIMMDeviceClass;
 
-void pc_dimm_plug(DeviceState *dev, MachineState *machine, uint64_t align,
-                  Error **errp);
+void pc_dimm_pre_plug(DeviceState *dev, MachineState *machine,
+                      const uint64_t *legacy_align, Error **errp);
+void pc_dimm_plug(DeviceState *dev, MachineState *machine, Error **errp);
 void pc_dimm_unplug(DeviceState *dev, MachineState *machine);
 #endif
diff --git a/include/hw/misc/vmcoreinfo.h b/include/hw/misc/vmcoreinfo.h
index c3aa856545..0d11578059 100644
--- a/include/hw/misc/vmcoreinfo.h
+++ b/include/hw/misc/vmcoreinfo.h
@@ -13,20 +13,12 @@
 #define VMCOREINFO_H
 
 #include "hw/qdev.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 
 #define VMCOREINFO_DEVICE "vmcoreinfo"
 #define VMCOREINFO(obj) OBJECT_CHECK(VMCoreInfoState, (obj), VMCOREINFO_DEVICE)
 
-#define VMCOREINFO_FORMAT_NONE 0x0
-#define VMCOREINFO_FORMAT_ELF 0x1
-
-/* all fields are little-endian */
-typedef struct FWCfgVMCoreInfo {
-    uint16_t host_format; /* set on reset */
-    uint16_t guest_format;
-    uint32_t size;
-    uint64_t paddr;
-} QEMU_PACKED FWCfgVMCoreInfo;
+typedef struct fw_cfg_vmcoreinfo FWCfgVMCoreInfo;
 
 typedef struct VMCoreInfoState {
     DeviceClass parent_obj;
diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
index b2259cc4a3..f5a6895a74 100644
--- a/include/hw/nvram/fw_cfg.h
+++ b/include/hw/nvram/fw_cfg.h
@@ -2,7 +2,7 @@
 #define FW_CFG_H
 
 #include "exec/hwaddr.h"
-#include "hw/nvram/fw_cfg_keys.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 #include "hw/sysbus.h"
 #include "sysemu/dma.h"
 
@@ -14,12 +14,7 @@
 #define FW_CFG_IO(obj)  OBJECT_CHECK(FWCfgIoState,  (obj), TYPE_FW_CFG_IO)
 #define FW_CFG_MEM(obj) OBJECT_CHECK(FWCfgMemState, (obj), TYPE_FW_CFG_MEM)
 
-typedef struct FWCfgFile {
-    uint32_t  size;        /* file size */
-    uint16_t  select;      /* write this to 0x510 to read it */
-    uint16_t  reserved;
-    char      name[FW_CFG_MAX_FILE_PATH];
-} FWCfgFile;
+typedef struct fw_cfg_file FWCfgFile;
 
 #define FW_CFG_ORDER_OVERRIDE_VGA    70
 #define FW_CFG_ORDER_OVERRIDE_NIC    80
@@ -34,14 +29,7 @@ typedef struct FWCfgFiles {
     FWCfgFile f[];
 } FWCfgFiles;
 
-/* Control as first field allows for different structures selected by this
- * field, which might be useful in the future
- */
-typedef struct FWCfgDmaAccess {
-    uint32_t control;
-    uint32_t length;
-    uint64_t address;
-} QEMU_PACKED FWCfgDmaAccess;
+typedef struct fw_cfg_dma_access FWCfgDmaAccess;
 
 typedef void (*FWCfgCallback)(void *opaque);
 typedef void (*FWCfgWriteCallback)(void *opaque, off_t start, size_t len);
diff --git a/include/hw/nvram/fw_cfg_keys.h b/include/hw/nvram/fw_cfg_keys.h
deleted file mode 100644
index b6919451f5..0000000000
--- a/include/hw/nvram/fw_cfg_keys.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef FW_CFG_KEYS_H
-#define FW_CFG_KEYS_H
-
-#define FW_CFG_SIGNATURE        0x00
-#define FW_CFG_ID               0x01
-#define FW_CFG_UUID             0x02
-#define FW_CFG_RAM_SIZE         0x03
-#define FW_CFG_NOGRAPHIC        0x04
-#define FW_CFG_NB_CPUS          0x05
-#define FW_CFG_MACHINE_ID       0x06
-#define FW_CFG_KERNEL_ADDR      0x07
-#define FW_CFG_KERNEL_SIZE      0x08
-#define FW_CFG_KERNEL_CMDLINE   0x09
-#define FW_CFG_INITRD_ADDR      0x0a
-#define FW_CFG_INITRD_SIZE      0x0b
-#define FW_CFG_BOOT_DEVICE      0x0c
-#define FW_CFG_NUMA             0x0d
-#define FW_CFG_BOOT_MENU        0x0e
-#define FW_CFG_MAX_CPUS         0x0f
-#define FW_CFG_KERNEL_ENTRY     0x10
-#define FW_CFG_KERNEL_DATA      0x11
-#define FW_CFG_INITRD_DATA      0x12
-#define FW_CFG_CMDLINE_ADDR     0x13
-#define FW_CFG_CMDLINE_SIZE     0x14
-#define FW_CFG_CMDLINE_DATA     0x15
-#define FW_CFG_SETUP_ADDR       0x16
-#define FW_CFG_SETUP_SIZE       0x17
-#define FW_CFG_SETUP_DATA       0x18
-#define FW_CFG_FILE_DIR         0x19
-
-#define FW_CFG_FILE_FIRST       0x20
-#define FW_CFG_FILE_SLOTS_MIN   0x10
-
-#define FW_CFG_WRITE_CHANNEL    0x4000
-#define FW_CFG_ARCH_LOCAL       0x8000
-#define FW_CFG_ENTRY_MASK       (~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
-
-#define FW_CFG_INVALID          0xffff
-
-/* width in bytes of fw_cfg control register */
-#define FW_CFG_CTL_SIZE         0x02
-
-#define FW_CFG_MAX_FILE_PATH    56
-
-#endif
diff --git a/include/hw/virtio/vhost-scsi-common.h b/include/hw/virtio/vhost-scsi-common.h
index 4553be4bc3..57fb1d87b5 100644
--- a/include/hw/virtio/vhost-scsi-common.h
+++ b/include/hw/virtio/vhost-scsi-common.h
@@ -35,6 +35,7 @@ typedef struct VHostSCSICommon {
     int channel;
     int target;
     int lun;
+    uint64_t host_features;
 } VHostSCSICommon;
 
 int vhost_scsi_common_start(VHostSCSICommon *vsc);
diff --git a/include/hw/virtio/vhost-user-scsi.h b/include/hw/virtio/vhost-user-scsi.h
index 3ec34ae867..e429cacd8e 100644
--- a/include/hw/virtio/vhost-user-scsi.h
+++ b/include/hw/virtio/vhost-user-scsi.h
@@ -30,7 +30,6 @@
 
 typedef struct VHostUserSCSI {
     VHostSCSICommon parent_obj;
-    uint64_t host_features;
     VhostUserState *vhost_user;
 } VHostUserSCSI;
 
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 721aa2416a..e59f9ae1e9 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -276,7 +276,9 @@ bool qemu_mutex_iothread_locked(void);
  * NOTE: tools currently are single-threaded and qemu_mutex_lock_iothread
  * is a no-op there.
  */
-void qemu_mutex_lock_iothread(void);
+#define qemu_mutex_lock_iothread()                      \
+    qemu_mutex_lock_iothread_impl(__FILE__, __LINE__)
+void qemu_mutex_lock_iothread_impl(const char *file, int line);
 
 /**
  * qemu_mutex_unlock_iothread: Unlock the main loop mutex.
diff --git a/include/qemu/qht.h b/include/qemu/qht.h
index 1fb9116fa0..c9a11cc29a 100644
--- a/include/qemu/qht.h
+++ b/include/qemu/qht.h
@@ -46,6 +46,7 @@ typedef bool (*qht_lookup_func_t)(const void *obj, const void *userp);
 typedef void (*qht_iter_func_t)(struct qht *ht, void *p, uint32_t h, void *up);
 
 #define QHT_MODE_AUTO_RESIZE 0x1 /* auto-resize when heavily loaded */
+#define QHT_MODE_RAW_MUTEXES 0x2 /* bypass the profiler (QSP) */
 
 /**
  * qht_init - Initialize a QHT
diff --git a/include/qemu/qsp.h b/include/qemu/qsp.h
new file mode 100644
index 0000000000..a94c464f90
--- /dev/null
+++ b/include/qemu/qsp.h
@@ -0,0 +1,29 @@
+/*
+ * qsp.c - QEMU Synchronization Profiler
+ *
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * Note: this header file can *only* be included from thread.h.
+ */
+#ifndef QEMU_QSP_H
+#define QEMU_QSP_H
+
+#include "qemu/fprintf-fn.h"
+
+enum QSPSortBy {
+    QSP_SORT_BY_TOTAL_WAIT_TIME,
+    QSP_SORT_BY_AVG_WAIT_TIME,
+};
+
+void qsp_report(FILE *f, fprintf_function cpu_fprintf, size_t max,
+                enum QSPSortBy sort_by, bool callsite_coalesce);
+
+bool qsp_is_enabled(void);
+void qsp_enable(void);
+void qsp_disable(void);
+void qsp_reset(void);
+
+#endif /* QEMU_QSP_H */
diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h
index 01be77407b..904b3372dc 100644
--- a/include/qemu/rcu_queue.h
+++ b/include/qemu/rcu_queue.h
@@ -36,7 +36,7 @@ extern "C" {
 /*
  * List access methods.
  */
-#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL)
+#define QLIST_EMPTY_RCU(head) (atomic_read(&(head)->lh_first) == NULL)
 #define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first))
 #define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next))
 
@@ -112,7 +112,7 @@ extern "C" {
        (elm)->field.le_next->field.le_prev =        \
         (elm)->field.le_prev;                       \
     }                                               \
-    *(elm)->field.le_prev =  (elm)->field.le_next;  \
+    atomic_set((elm)->field.le_prev, (elm)->field.le_next); \
 } while (/*CONSTCOND*/0)
 
 /* List traversal must occur within an RCU critical section.  */
@@ -128,6 +128,137 @@ extern "C" {
           ((next_var) = atomic_rcu_read(&(var)->field.le_next), 1);  \
            (var) = (next_var))
 
+/*
+ * RCU simple queue
+ */
+
+/* Simple queue access methods */
+#define QSIMPLEQ_EMPTY_RCU(head)      (atomic_read(&(head)->sqh_first) == NULL)
+#define QSIMPLEQ_FIRST_RCU(head)       atomic_rcu_read(&(head)->sqh_first)
+#define QSIMPLEQ_NEXT_RCU(elm, field)  atomic_rcu_read(&(elm)->field.sqe_next)
+
+/* Simple queue functions */
+#define QSIMPLEQ_INSERT_HEAD_RCU(head, elm, field) do {         \
+    (elm)->field.sqe_next = (head)->sqh_first;                  \
+    if ((elm)->field.sqe_next == NULL) {                        \
+        (head)->sqh_last = &(elm)->field.sqe_next;              \
+    }                                                           \
+    atomic_rcu_set(&(head)->sqh_first, (elm));                  \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_TAIL_RCU(head, elm, field) do {    \
+    (elm)->field.sqe_next = NULL;                          \
+    atomic_rcu_set((head)->sqh_last, (elm));               \
+    (head)->sqh_last = &(elm)->field.sqe_next;             \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_AFTER_RCU(head, listelm, elm, field) do {       \
+    (elm)->field.sqe_next = (listelm)->field.sqe_next;                  \
+    if ((elm)->field.sqe_next == NULL) {                                \
+        (head)->sqh_last = &(elm)->field.sqe_next;                      \
+    }                                                                   \
+    atomic_rcu_set(&(listelm)->field.sqe_next, (elm));                  \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE_HEAD_RCU(head, field) do {                     \
+    atomic_set(&(head)->sqh_first, (head)->sqh_first->field.sqe_next); \
+    if ((head)->sqh_first == NULL) {                                   \
+        (head)->sqh_last = &(head)->sqh_first;                         \
+    }                                                                  \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE_RCU(head, elm, type, field) do {            \
+    if ((head)->sqh_first == (elm)) {                               \
+        QSIMPLEQ_REMOVE_HEAD_RCU((head), field);                    \
+    } else {                                                        \
+        struct type *curr = (head)->sqh_first;                      \
+        while (curr->field.sqe_next != (elm)) {                     \
+            curr = curr->field.sqe_next;                            \
+        }                                                           \
+        atomic_set(&curr->field.sqe_next,                           \
+                   curr->field.sqe_next->field.sqe_next);           \
+        if (curr->field.sqe_next == NULL) {                         \
+            (head)->sqh_last = &(curr)->field.sqe_next;             \
+        }                                                           \
+    }                                                               \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_FOREACH_RCU(var, head, field)                          \
+    for ((var) = atomic_rcu_read(&(head)->sqh_first);                   \
+         (var);                                                         \
+         (var) = atomic_rcu_read(&(var)->field.sqe_next))
+
+#define QSIMPLEQ_FOREACH_SAFE_RCU(var, head, field, next)                \
+    for ((var) = atomic_rcu_read(&(head)->sqh_first);                    \
+         (var) && ((next) = atomic_rcu_read(&(var)->field.sqe_next), 1); \
+         (var) = (next))
+
+/*
+ * RCU tail queue
+ */
+
+/* Tail queue access methods */
+#define QTAILQ_EMPTY_RCU(head)      (atomic_read(&(head)->tqh_first) == NULL)
+#define QTAILQ_FIRST_RCU(head)       atomic_rcu_read(&(head)->tqh_first)
+#define QTAILQ_NEXT_RCU(elm, field)  atomic_rcu_read(&(elm)->field.tqe_next)
+
+/* Tail queue functions */
+#define QTAILQ_INSERT_HEAD_RCU(head, elm, field) do {                   \
+    (elm)->field.tqe_next = (head)->tqh_first;                          \
+    if ((elm)->field.tqe_next != NULL) {                                \
+        (head)->tqh_first->field.tqe_prev = &(elm)->field.tqe_next;     \
+    } else {                                                            \
+        (head)->tqh_last = &(elm)->field.tqe_next;                      \
+    }                                                                   \
+    atomic_rcu_set(&(head)->tqh_first, (elm));                          \
+    (elm)->field.tqe_prev = &(head)->tqh_first;                         \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_TAIL_RCU(head, elm, field) do {               \
+    (elm)->field.tqe_next = NULL;                                   \
+    (elm)->field.tqe_prev = (head)->tqh_last;                       \
+    atomic_rcu_set((head)->tqh_last, (elm));                        \
+    (head)->tqh_last = &(elm)->field.tqe_next;                      \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_AFTER_RCU(head, listelm, elm, field) do {         \
+    (elm)->field.tqe_next = (listelm)->field.tqe_next;                  \
+    if ((elm)->field.tqe_next != NULL) {                                \
+        (elm)->field.tqe_next->field.tqe_prev = &(elm)->field.tqe_next; \
+    } else {                                                            \
+        (head)->tqh_last = &(elm)->field.tqe_next;                      \
+    }                                                                   \
+    atomic_rcu_set(&(listelm)->field.tqe_next, (elm));                  \
+    (elm)->field.tqe_prev = &(listelm)->field.tqe_next;                 \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_BEFORE_RCU(listelm, elm, field) do {          \
+    (elm)->field.tqe_prev = (listelm)->field.tqe_prev;              \
+    (elm)->field.tqe_next = (listelm);                              \
+    atomic_rcu_set((listelm)->field.tqe_prev, (elm));               \
+    (listelm)->field.tqe_prev = &(elm)->field.tqe_next;             \
+    } while (/*CONSTCOND*/0)
+
+#define QTAILQ_REMOVE_RCU(head, elm, field) do {                        \
+    if (((elm)->field.tqe_next) != NULL) {                              \
+        (elm)->field.tqe_next->field.tqe_prev = (elm)->field.tqe_prev;  \
+    } else {                                                            \
+        (head)->tqh_last = (elm)->field.tqe_prev;                       \
+    }                                                                   \
+    atomic_set((elm)->field.tqe_prev, (elm)->field.tqe_next);           \
+    (elm)->field.tqe_prev = NULL;                                       \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_FOREACH_RCU(var, head, field)                            \
+    for ((var) = atomic_rcu_read(&(head)->tqh_first);                   \
+         (var);                                                         \
+         (var) = atomic_rcu_read(&(var)->field.tqe_next))
+
+#define QTAILQ_FOREACH_SAFE_RCU(var, head, field, next)                  \
+    for ((var) = atomic_rcu_read(&(head)->tqh_first);                    \
+         (var) && ((next) = atomic_rcu_read(&(var)->field.tqe_next), 1); \
+         (var) = (next))
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/qemu/seqlock.h b/include/qemu/seqlock.h
index 8dee11d101..fd408b7ec5 100644
--- a/include/qemu/seqlock.h
+++ b/include/qemu/seqlock.h
@@ -16,6 +16,7 @@
 
 #include "qemu/atomic.h"
 #include "qemu/thread.h"
+#include "qemu/lockable.h"
 
 typedef struct QemuSeqLock QemuSeqLock;
 
@@ -45,7 +46,26 @@ static inline void seqlock_write_end(QemuSeqLock *sl)
     atomic_set(&sl->sequence, sl->sequence + 1);
 }
 
-static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
+/* Lock out other writers and update the count.  */
+static inline void seqlock_write_lock_impl(QemuSeqLock *sl, QemuLockable *lock)
+{
+    qemu_lockable_lock(lock);
+    seqlock_write_begin(sl);
+}
+#define seqlock_write_lock(sl, lock) \
+    seqlock_write_lock_impl(sl, QEMU_MAKE_LOCKABLE(lock))
+
+/* Lock out other writers and update the count.  */
+static inline void seqlock_write_unlock_impl(QemuSeqLock *sl, QemuLockable *lock)
+{
+    qemu_lockable_unlock(lock);
+    seqlock_write_begin(sl);
+}
+#define seqlock_write_unlock(sl, lock) \
+    seqlock_write_unlock_impl(sl, QEMU_MAKE_LOCKABLE(lock))
+
+
+static inline unsigned seqlock_read_begin(const QemuSeqLock *sl)
 {
     /* Always fail if a write is in progress.  */
     unsigned ret = atomic_read(&sl->sequence);
diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h
index fd27b34128..c903525062 100644
--- a/include/qemu/thread-posix.h
+++ b/include/qemu/thread-posix.h
@@ -6,8 +6,8 @@
 
 typedef QemuMutex QemuRecMutex;
 #define qemu_rec_mutex_destroy qemu_mutex_destroy
-#define qemu_rec_mutex_lock qemu_mutex_lock
-#define qemu_rec_mutex_trylock qemu_mutex_trylock
+#define qemu_rec_mutex_lock_impl    qemu_mutex_lock_impl
+#define qemu_rec_mutex_trylock_impl qemu_mutex_trylock_impl
 #define qemu_rec_mutex_unlock qemu_mutex_unlock
 
 struct QemuMutex {
diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h
index d668d789b4..50af5dd7ab 100644
--- a/include/qemu/thread-win32.h
+++ b/include/qemu/thread-win32.h
@@ -19,8 +19,9 @@ struct QemuRecMutex {
 };
 
 void qemu_rec_mutex_destroy(QemuRecMutex *mutex);
-void qemu_rec_mutex_lock(QemuRecMutex *mutex);
-int qemu_rec_mutex_trylock(QemuRecMutex *mutex);
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line);
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file,
+                                int line);
 void qemu_rec_mutex_unlock(QemuRecMutex *mutex);
 
 struct QemuCond {
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index ef7bd16123..dacebcfff0 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -16,6 +16,9 @@ typedef struct QemuThread QemuThread;
 #include "qemu/thread-posix.h"
 #endif
 
+/* include QSP header once QemuMutex, QemuCond etc. are defined */
+#include "qemu/qsp.h"
+
 #define QEMU_THREAD_JOINABLE 0
 #define QEMU_THREAD_DETACHED 1
 
@@ -25,10 +28,52 @@ int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line);
 void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line);
 void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line);
 
-#define qemu_mutex_lock(mutex) \
-        qemu_mutex_lock_impl(mutex, __FILE__, __LINE__)
-#define qemu_mutex_trylock(mutex) \
-        qemu_mutex_trylock_impl(mutex, __FILE__, __LINE__)
+typedef void (*QemuMutexLockFunc)(QemuMutex *m, const char *f, int l);
+typedef int (*QemuMutexTrylockFunc)(QemuMutex *m, const char *f, int l);
+typedef void (*QemuRecMutexLockFunc)(QemuRecMutex *m, const char *f, int l);
+typedef int (*QemuRecMutexTrylockFunc)(QemuRecMutex *m, const char *f, int l);
+typedef void (*QemuCondWaitFunc)(QemuCond *c, QemuMutex *m, const char *f,
+                                 int l);
+
+extern QemuMutexLockFunc qemu_bql_mutex_lock_func;
+extern QemuMutexLockFunc qemu_mutex_lock_func;
+extern QemuMutexTrylockFunc qemu_mutex_trylock_func;
+extern QemuRecMutexLockFunc qemu_rec_mutex_lock_func;
+extern QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func;
+extern QemuCondWaitFunc qemu_cond_wait_func;
+
+/* convenience macros to bypass the profiler */
+#define qemu_mutex_lock__raw(m)                         \
+        qemu_mutex_lock_impl(m, __FILE__, __LINE__)
+#define qemu_mutex_trylock__raw(m)                      \
+        qemu_mutex_trylock_impl(m, __FILE__, __LINE__)
+
+#define qemu_mutex_lock(m) ({                                           \
+            QemuMutexLockFunc _f = atomic_read(&qemu_mutex_lock_func);  \
+            _f(m, __FILE__, __LINE__);                                  \
+        })
+
+#define qemu_mutex_trylock(m) ({                                        \
+            QemuMutexTrylockFunc _f = atomic_read(&qemu_mutex_trylock_func); \
+            _f(m, __FILE__, __LINE__);                                  \
+        })
+
+#define qemu_rec_mutex_lock(m) ({                                       \
+            QemuRecMutexLockFunc _f = atomic_read(&qemu_rec_mutex_lock_func); \
+            _f(m, __FILE__, __LINE__);                                  \
+        })
+
+#define qemu_rec_mutex_trylock(m) ({                            \
+            QemuRecMutexTrylockFunc _f;                         \
+            _f = atomic_read(&qemu_rec_mutex_trylock_func);     \
+            _f(m, __FILE__, __LINE__);                          \
+        })
+
+#define qemu_cond_wait(c, m) ({                                         \
+            QemuCondWaitFunc _f = atomic_read(&qemu_cond_wait_func);    \
+            _f(c, m, __FILE__, __LINE__);                               \
+        })
+
 #define qemu_mutex_unlock(mutex) \
         qemu_mutex_unlock_impl(mutex, __FILE__, __LINE__)
 
@@ -47,6 +92,16 @@ static inline void (qemu_mutex_unlock)(QemuMutex *mutex)
     qemu_mutex_unlock(mutex);
 }
 
+static inline void (qemu_rec_mutex_lock)(QemuRecMutex *mutex)
+{
+    qemu_rec_mutex_lock(mutex);
+}
+
+static inline int (qemu_rec_mutex_trylock)(QemuRecMutex *mutex)
+{
+    return qemu_rec_mutex_trylock(mutex);
+}
+
 /* Prototypes for other functions are in thread-posix.h/thread-win32.h.  */
 void qemu_rec_mutex_init(QemuRecMutex *mutex);
 
@@ -63,9 +118,6 @@ void qemu_cond_broadcast(QemuCond *cond);
 void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex,
                          const char *file, const int line);
 
-#define qemu_cond_wait(cond, mutex) \
-        qemu_cond_wait_impl(cond, mutex, __FILE__, __LINE__)
-
 static inline void (qemu_cond_wait)(QemuCond *cond, QemuMutex *mutex)
 {
     qemu_cond_wait(cond, mutex);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index ecf6ed556a..dc130cd307 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -26,6 +26,7 @@
 #include "exec/memattrs.h"
 #include "qapi/qapi-types-run-state.h"
 #include "qemu/bitmap.h"
+#include "qemu/rcu_queue.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
 
@@ -442,13 +443,11 @@ struct CPUState {
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
 extern struct CPUTailQ cpus;
-#define CPU_NEXT(cpu) QTAILQ_NEXT(cpu, node)
-#define CPU_FOREACH(cpu) QTAILQ_FOREACH(cpu, &cpus, node)
+#define first_cpu        QTAILQ_FIRST_RCU(&cpus)
+#define CPU_NEXT(cpu)    QTAILQ_NEXT_RCU(cpu, node)
+#define CPU_FOREACH(cpu) QTAILQ_FOREACH_RCU(cpu, &cpus, node)
 #define CPU_FOREACH_SAFE(cpu, next_cpu) \
-    QTAILQ_FOREACH_SAFE(cpu, &cpus, node, next_cpu)
-#define CPU_FOREACH_REVERSE(cpu) \
-    QTAILQ_FOREACH_REVERSE(cpu, &cpus, CPUTailQ, node)
-#define first_cpu QTAILQ_FIRST(&cpus)
+    QTAILQ_FOREACH_SAFE_RCU(cpu, &cpus, node, next_cpu)
 
 extern __thread CPUState *current_cpu;
 
diff --git a/include/standard-headers/linux/qemu_fw_cfg.h b/include/standard-headers/linux/qemu_fw_cfg.h
new file mode 100644
index 0000000000..cb93f6678d
--- /dev/null
+++ b/include/standard-headers/linux/qemu_fw_cfg.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+#ifndef _LINUX_FW_CFG_H
+#define _LINUX_FW_CFG_H
+
+#include "standard-headers/linux/types.h"
+
+#define FW_CFG_ACPI_DEVICE_ID	"QEMU0002"
+
+/* selector key values for "well-known" fw_cfg entries */
+#define FW_CFG_SIGNATURE	0x00
+#define FW_CFG_ID		0x01
+#define FW_CFG_UUID		0x02
+#define FW_CFG_RAM_SIZE		0x03
+#define FW_CFG_NOGRAPHIC	0x04
+#define FW_CFG_NB_CPUS		0x05
+#define FW_CFG_MACHINE_ID	0x06
+#define FW_CFG_KERNEL_ADDR	0x07
+#define FW_CFG_KERNEL_SIZE	0x08
+#define FW_CFG_KERNEL_CMDLINE	0x09
+#define FW_CFG_INITRD_ADDR	0x0a
+#define FW_CFG_INITRD_SIZE	0x0b
+#define FW_CFG_BOOT_DEVICE	0x0c
+#define FW_CFG_NUMA		0x0d
+#define FW_CFG_BOOT_MENU	0x0e
+#define FW_CFG_MAX_CPUS		0x0f
+#define FW_CFG_KERNEL_ENTRY	0x10
+#define FW_CFG_KERNEL_DATA	0x11
+#define FW_CFG_INITRD_DATA	0x12
+#define FW_CFG_CMDLINE_ADDR	0x13
+#define FW_CFG_CMDLINE_SIZE	0x14
+#define FW_CFG_CMDLINE_DATA	0x15
+#define FW_CFG_SETUP_ADDR	0x16
+#define FW_CFG_SETUP_SIZE	0x17
+#define FW_CFG_SETUP_DATA	0x18
+#define FW_CFG_FILE_DIR		0x19
+
+#define FW_CFG_FILE_FIRST	0x20
+#define FW_CFG_FILE_SLOTS_MIN	0x10
+
+#define FW_CFG_WRITE_CHANNEL	0x4000
+#define FW_CFG_ARCH_LOCAL	0x8000
+#define FW_CFG_ENTRY_MASK	(~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
+
+#define FW_CFG_INVALID		0xffff
+
+/* width in bytes of fw_cfg control register */
+#define FW_CFG_CTL_SIZE		0x02
+
+/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
+#define FW_CFG_MAX_FILE_PATH	56
+
+/* size in bytes of fw_cfg signature */
+#define FW_CFG_SIG_SIZE 4
+
+/* FW_CFG_ID bits */
+#define FW_CFG_VERSION		0x01
+#define FW_CFG_VERSION_DMA	0x02
+
+/* fw_cfg file directory entry type */
+struct fw_cfg_file {
+	uint32_t size;
+	uint16_t select;
+	uint16_t reserved;
+	char name[FW_CFG_MAX_FILE_PATH];
+};
+
+/* FW_CFG_DMA_CONTROL bits */
+#define FW_CFG_DMA_CTL_ERROR	0x01
+#define FW_CFG_DMA_CTL_READ	0x02
+#define FW_CFG_DMA_CTL_SKIP	0x04
+#define FW_CFG_DMA_CTL_SELECT	0x08
+#define FW_CFG_DMA_CTL_WRITE	0x10
+
+#define FW_CFG_DMA_SIGNATURE    0x51454d5520434647ULL /* "QEMU CFG" */
+
+/* Control as first field allows for different structures selected by this
+ * field, which might be useful in the future
+ */
+struct fw_cfg_dma_access {
+	uint32_t control;
+	uint32_t length;
+	uint64_t address;
+};
+
+#define FW_CFG_VMCOREINFO_FILENAME "etc/vmcoreinfo"
+
+#define FW_CFG_VMCOREINFO_FORMAT_NONE 0x0
+#define FW_CFG_VMCOREINFO_FORMAT_ELF 0x1
+
+struct fw_cfg_vmcoreinfo {
+	uint16_t host_format;
+	uint16_t guest_format;
+	uint32_t size;
+	uint64_t paddr;
+};
+
+#endif
diff --git a/linux-user/main.c b/linux-user/main.c
index ea00dd9057..923cbb753a 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -126,7 +126,7 @@ void fork_end(int child)
            Discard information about the parent threads.  */
         CPU_FOREACH_SAFE(cpu, next_cpu) {
             if (cpu != thread_cpu) {
-                QTAILQ_REMOVE(&cpus, cpu, node);
+                QTAILQ_REMOVE_RCU(&cpus, cpu, node);
             }
         }
         qemu_init_cpu_list();
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 02fba7606d..850b72a0c7 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8132,7 +8132,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
             TaskState *ts;
 
             /* Remove the CPU from the list.  */
-            QTAILQ_REMOVE(&cpus, cpu, node);
+            QTAILQ_REMOVE_RCU(&cpus, cpu, node);
 
             cpu_list_unlock();
 
diff --git a/monitor.c b/monitor.c
index a1999e396c..94f673511b 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1454,6 +1454,17 @@ static void hmp_info_opcount(Monitor *mon, const QDict *qdict)
 }
 #endif
 
+static void hmp_info_sync_profile(Monitor *mon, const QDict *qdict)
+{
+    int64_t max = qdict_get_try_int(qdict, "max", 10);
+    bool mean = qdict_get_try_bool(qdict, "mean", false);
+    bool coalesce = !qdict_get_try_bool(qdict, "no_coalesce", false);
+    enum QSPSortBy sort_by;
+
+    sort_by = mean ? QSP_SORT_BY_AVG_WAIT_TIME : QSP_SORT_BY_TOTAL_WAIT_TIME;
+    qsp_report((FILE *)mon, monitor_fprintf, max, sort_by, coalesce);
+}
+
 static void hmp_info_history(Monitor *mon, const QDict *qdict)
 {
     int i;
diff --git a/pc-bios/optionrom/linuxboot_dma.c b/pc-bios/optionrom/linuxboot_dma.c
index 4754282ad7..d856d41b55 100644
--- a/pc-bios/optionrom/linuxboot_dma.c
+++ b/pc-bios/optionrom/linuxboot_dma.c
@@ -58,8 +58,6 @@ asm(
 "   jmp load_kernel\n"
 );
 
-#include "../../include/hw/nvram/fw_cfg_keys.h"
-
 /* QEMU_CFG_DMA_CONTROL bits */
 #define BIOS_CFG_DMA_CTL_ERROR   0x01
 #define BIOS_CFG_DMA_CTL_READ    0x02
@@ -73,6 +71,8 @@ asm(
 #define uint32_t unsigned int
 #define uint16_t unsigned short
 
+#include "../../include/standard-headers/linux/qemu_fw_cfg.h"
+
 #define barrier() asm("" : : : "memory")
 
 typedef struct FWCfgDmaAccess {
diff --git a/pc-bios/optionrom/optionrom.h b/pc-bios/optionrom/optionrom.h
index 6c4c2c82f4..a2b612f1a7 100644
--- a/pc-bios/optionrom/optionrom.h
+++ b/pc-bios/optionrom/optionrom.h
@@ -19,7 +19,20 @@
  */
 
 
-#include "../../include/hw/nvram/fw_cfg_keys.h"
+#define FW_CFG_KERNEL_ADDR      0x07
+#define FW_CFG_KERNEL_SIZE      0x08
+#define FW_CFG_KERNEL_CMDLINE   0x09
+#define FW_CFG_INITRD_ADDR      0x0a
+#define FW_CFG_INITRD_SIZE      0x0b
+#define FW_CFG_KERNEL_ENTRY     0x10
+#define FW_CFG_KERNEL_DATA      0x11
+#define FW_CFG_INITRD_DATA      0x12
+#define FW_CFG_CMDLINE_ADDR     0x13
+#define FW_CFG_CMDLINE_SIZE     0x14
+#define FW_CFG_CMDLINE_DATA     0x15
+#define FW_CFG_SETUP_ADDR       0x16
+#define FW_CFG_SETUP_SIZE       0x17
+#define FW_CFG_SETUP_DATA       0x18
 
 #define BIOS_CFG_IOPORT_CFG	0x510
 #define BIOS_CFG_IOPORT_DATA	0x511
diff --git a/qemu-options.hx b/qemu-options.hx
index 5515dfaba5..d66ab1bddb 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3953,6 +3953,16 @@ Dump json-encoded vmstate information for current machine type to file
 in @var{file}
 ETEXI
 
+DEF("enable-sync-profile", 0, QEMU_OPTION_enable_sync_profile,
+    "-enable-sync-profile\n"
+    "                enable synchronization profiling\n",
+    QEMU_ARCH_ALL)
+STEXI
+@item -enable-sync-profile
+@findex -enable-sync-profile
+Enable synchronization profiling.
+ETEXI
+
 STEXI
 @end table
 ETEXI
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 42e1c50dd8..3765b0e35e 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1367,10 +1367,10 @@ sub process {
 		# extract the filename as it passes
 		if ($line =~ /^diff --git.*?(\S+)$/) {
 			$realfile = $1;
-			$realfile =~ s@^([^/]*)/@@;
+			$realfile =~ s@^([^/]*)/@@ if (!$file);
 		} elsif ($line =~ /^\+\+\+\s+(\S+)/) {
 			$realfile = $1;
-			$realfile =~ s@^([^/]*)/@@;
+			$realfile =~ s@^([^/]*)/@@ if (!$file);
 
 			$p1_prefix = $1;
 			if (!$file && $tree && $p1_prefix ne '' &&
@@ -1929,9 +1929,8 @@ sub process {
 			my ($where, $prefix) = ($-[1], $1);
 			if ($prefix !~ /$Type\s+$/ &&
 			    ($where != 0 || $prefix !~ /^.\s+$/) &&
-			    $prefix !~ /{\s+$/ &&
 			    $prefix !~ /\#\s*define[^(]*\([^)]*\)\s+$/ &&
-			    $prefix !~ /,\s+$/) {
+			    $prefix !~ /[,{:]\s+$/) {
 				ERROR("space prohibited before open square bracket '['\n" . $herecurr);
 			}
 		}
diff --git a/scripts/qemu-guest-agent/fsfreeze-hook b/scripts/qemu-guest-agent/fsfreeze-hook
index c27b29f282..13aafd4845 100755
--- a/scripts/qemu-guest-agent/fsfreeze-hook
+++ b/scripts/qemu-guest-agent/fsfreeze-hook
@@ -13,7 +13,7 @@ FSFREEZE_D=$(dirname -- "$0")/fsfreeze-hook.d
 # Check whether file $1 is a backup or rpm-generated file and should be ignored
 is_ignored_file() {
     case "$1" in
-        *~ | *.bak | *.orig | *.rpmnew | *.rpmorig | *.rpmsave | *.sample)
+        *~ | *.bak | *.orig | *.rpmnew | *.rpmorig | *.rpmsave | *.sample | *.dpkg-old | *.dpkg-new | *.dpkg-tmp | *.dpkg-dist | *.dpkg-bak | *.dpkg-backup | *.dpkg-remove)
             return 0 ;;
     esac
     return 1
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index feb75390aa..0a964fe240 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -165,7 +165,9 @@ EOF
 
 rm -rf "$output/include/standard-headers/linux"
 mkdir -p "$output/include/standard-headers/linux"
-for i in "$tmpdir"/include/linux/*virtio*.h "$tmpdir/include/linux/input.h" \
+for i in "$tmpdir"/include/linux/*virtio*.h \
+         "$tmpdir/include/linux/qemu_fw_cfg.h" \
+         "$tmpdir/include/linux/input.h" \
          "$tmpdir/include/linux/input-event-codes.h" \
          "$tmpdir/include/linux/pci_regs.h" \
          "$tmpdir/include/linux/ethtool.h" "$tmpdir/include/linux/kernel.h" \
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 1528a712a0..ed037aabee 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -301,7 +301,11 @@ void put_multipath_config(struct config *conf)
 static void multipath_pr_init(void)
 {
     udev = udev_new();
+#ifdef CONFIG_MPATH_NEW_API
     multipath_conf = mpath_lib_init();
+#else
+    mpath_lib_init(udev);
+#endif
 }
 
 static int is_mpath(int fd)
diff --git a/stubs/iothread-lock.c b/stubs/iothread-lock.c
index 9b6db2e740..eb745d7d6a 100644
--- a/stubs/iothread-lock.c
+++ b/stubs/iothread-lock.c
@@ -7,7 +7,7 @@ bool qemu_mutex_iothread_locked(void)
     return true;
 }
 
-void qemu_mutex_lock_iothread(void)
+void qemu_mutex_lock_iothread_impl(const char *file, int line)
 {
 }
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index c9bce1efcb..1b0548084a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -12331,6 +12331,7 @@ int arm_rmode_to_sf(int rmode)
         /* FIXME: add support for TIEAWAY and ODD */
         qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
                       rmode);
+        /* fall through for now */
     case FPROUNDING_TIEEVEN:
     default:
         rmode = float_round_nearest_even;
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 4e4fe8fa8b..f24295e6e4 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3880,6 +3880,9 @@ arch_query_cpu_model_expansion(CpuModelExpansionType type,
     }
 
     props = qdict_new();
+    ret->model = g_new0(CpuModelInfo, 1);
+    ret->model->props = QOBJECT(props);
+    ret->model->has_props = true;
 
     switch (type) {
     case CPU_MODEL_EXPANSION_TYPE_STATIC:
@@ -3900,15 +3903,9 @@ arch_query_cpu_model_expansion(CpuModelExpansionType type,
         goto out;
     }
 
-    if (!props) {
-        props = qdict_new();
-    }
     x86_cpu_to_dict(xc, props);
 
-    ret->model = g_new0(CpuModelInfo, 1);
     ret->model->name = g_strdup(base_name);
-    ret->model->props = QOBJECT(props);
-    ret->model->has_props = true;
 
 out:
     object_unref(OBJECT(xc));
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 9cad5812cd..b572a8e4aa 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1515,6 +1515,8 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env);
 int cpu_get_pic_interrupt(CPUX86State *s);
 /* MSDOS compatibility mode FPU exception support */
 void cpu_set_ferr(CPUX86State *s);
+/* mpx_helper.c */
+void cpu_sync_bndcs_hflags(CPUX86State *env);
 
 /* this function must always be used to load data in the segment
    cache: it synchronizes the hflags with the segment cache values */
@@ -1557,6 +1559,8 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
 #error HF_CPL_MASK is hardcoded
 #endif
             env->hflags = (env->hflags & ~HF_CPL_MASK) | cpl;
+            /* Possibly switch between BNDCFGS and BNDCFGU */
+            cpu_sync_bndcs_hflags(env);
         }
         new_hflags = (env->segs[R_SS].flags & DESC_B_MASK)
             >> (DESC_B_SHIFT - HF_SS32_SHIFT);
@@ -1889,9 +1893,6 @@ void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip,
  */
 void x86_cpu_change_kvm_default(const char *prop, const char *value);
 
-/* mpx_helper.c */
-void cpu_sync_bndcs_hflags(CPUX86State *env);
-
 /* Return name of 32-bit register, from a R_* constant */
 const char *get_register_name_32(unsigned int reg);
 
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 9313602d3d..0b2a07d3a4 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -1381,17 +1381,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     int ret;
     struct utsname utsname;
 
-#ifdef KVM_CAP_XSAVE
     has_xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
-#endif
-
-#ifdef KVM_CAP_XCRS
     has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
-#endif
-
-#ifdef KVM_CAP_PIT_STATE2
     has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
-#endif
 
     hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX);
 
diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c
index 00301a0c04..d1cbc6ebf0 100644
--- a/target/i386/seg_helper.c
+++ b/target/i386/seg_helper.c
@@ -518,6 +518,11 @@ static void switch_tss(CPUX86State *env, int tss_selector,
 
 static inline unsigned int get_sp_mask(unsigned int e2)
 {
+#ifdef TARGET_X86_64
+    if (e2 & DESC_L_MASK) {
+        return 0;
+    } else
+#endif
     if (e2 & DESC_B_MASK) {
         return 0xffffffff;
     } else {
@@ -1628,8 +1633,8 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
         }
         limit = get_seg_limit(e1, e2);
         if (new_eip > limit &&
-            !(env->hflags & HF_LMA_MASK) && !(e2 & DESC_L_MASK)) {
-            raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
+            (!(env->hflags & HF_LMA_MASK) || !(e2 & DESC_L_MASK))) {
+            raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
         }
         cpu_x86_load_seg_cache(env, R_CS, (new_cs & 0xfffc) | cpl,
                        get_seg_base(e1, e2), limit, e2);
@@ -1640,6 +1645,14 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
         rpl = new_cs & 3;
         cpl = env->hflags & HF_CPL_MASK;
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
+
+#ifdef TARGET_X86_64
+        if (env->efer & MSR_EFER_LMA) {
+            if (type != 12) {
+                raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
+            }
+        }
+#endif
         switch (type) {
         case 1: /* 286 TSS */
         case 9: /* 386 TSS */
@@ -1662,6 +1675,23 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
             if (type == 12) {
                 new_eip |= (e2 & 0xffff0000);
             }
+
+#ifdef TARGET_X86_64
+            if (env->efer & MSR_EFER_LMA) {
+                /* load the upper 8 bytes of the 64-bit call gate */
+                if (load_segment_ra(env, &e1, &e2, new_cs + 8, GETPC())) {
+                    raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc,
+                                           GETPC());
+                }
+                type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+                if (type != 0) {
+                    raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc,
+                                           GETPC());
+                }
+                new_eip |= ((target_ulong)e1) << 32;
+            }
+#endif
+
             if (load_segment_ra(env, &e1, &e2, gate_cs, GETPC()) != 0) {
                 raise_exception_err_ra(env, EXCP0D_GPF, gate_cs & 0xfffc, GETPC());
             }
@@ -1675,11 +1705,22 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
                 (!(e2 & DESC_C_MASK) && (dpl != cpl))) {
                 raise_exception_err_ra(env, EXCP0D_GPF, gate_cs & 0xfffc, GETPC());
             }
+#ifdef TARGET_X86_64
+            if (env->efer & MSR_EFER_LMA) {
+                if (!(e2 & DESC_L_MASK)) {
+                    raise_exception_err_ra(env, EXCP0D_GPF, gate_cs & 0xfffc, GETPC());
+                }
+                if (e2 & DESC_B_MASK) {
+                    raise_exception_err_ra(env, EXCP0D_GPF, gate_cs & 0xfffc, GETPC());
+                }
+            }
+#endif
             if (!(e2 & DESC_P_MASK)) {
                 raise_exception_err_ra(env, EXCP0D_GPF, gate_cs & 0xfffc, GETPC());
             }
             limit = get_seg_limit(e1, e2);
-            if (new_eip > limit) {
+            if (new_eip > limit &&
+                (!(env->hflags & HF_LMA_MASK) || !(e2 & DESC_L_MASK))) {
                 raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
             }
             cpu_x86_load_seg_cache(env, R_CS, (gate_cs & 0xfffc) | cpl,
@@ -1724,12 +1765,12 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
                             int shift, target_ulong next_eip)
 {
     int new_stack, i;
-    uint32_t e1, e2, cpl, dpl, rpl, selector, offset, param_count;
-    uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, sp, type, ss_dpl, sp_mask;
+    uint32_t e1, e2, cpl, dpl, rpl, selector, param_count;
+    uint32_t ss = 0, ss_e1 = 0, ss_e2 = 0, type, ss_dpl, sp_mask;
     uint32_t val, limit, old_sp_mask;
-    target_ulong ssp, old_ssp;
+    target_ulong ssp, old_ssp, offset, sp;
 
-    LOG_PCALL("lcall %04x:%08x s=%d\n", new_cs, (uint32_t)new_eip, shift);
+    LOG_PCALL("lcall %04x:" TARGET_FMT_lx " s=%d\n", new_cs, new_eip, shift);
     LOG_PCALL_STATE(CPU(x86_env_get_cpu(env)));
     if ((new_cs & 0xfffc) == 0) {
         raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
@@ -1807,6 +1848,15 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
         type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
         dpl = (e2 >> DESC_DPL_SHIFT) & 3;
         rpl = new_cs & 3;
+
+#ifdef TARGET_X86_64
+        if (env->efer & MSR_EFER_LMA) {
+            if (type != 12) {
+                raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, GETPC());
+            }
+        }
+#endif
+
         switch (type) {
         case 1: /* available 286 TSS */
         case 9: /* available 386 TSS */
@@ -1833,8 +1883,23 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
             raise_exception_err_ra(env, EXCP0B_NOSEG,  new_cs & 0xfffc, GETPC());
         }
         selector = e1 >> 16;
-        offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
         param_count = e2 & 0x1f;
+        offset = (e2 & 0xffff0000) | (e1 & 0x0000ffff);
+#ifdef TARGET_X86_64
+        if (env->efer & MSR_EFER_LMA) {
+            /* load the upper 8 bytes of the 64-bit call gate */
+            if (load_segment_ra(env, &e1, &e2, new_cs + 8, GETPC())) {
+                raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc,
+                                       GETPC());
+            }
+            type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
+            if (type != 0) {
+                raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc,
+                                       GETPC());
+            }
+            offset |= ((target_ulong)e1) << 32;
+        }
+#endif
         if ((selector & 0xfffc) == 0) {
             raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
         }
@@ -1849,46 +1914,80 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
         if (dpl > cpl) {
             raise_exception_err_ra(env, EXCP0D_GPF, selector & 0xfffc, GETPC());
         }
+#ifdef TARGET_X86_64
+        if (env->efer & MSR_EFER_LMA) {
+            if (!(e2 & DESC_L_MASK)) {
+                raise_exception_err_ra(env, EXCP0D_GPF, selector & 0xfffc, GETPC());
+            }
+            if (e2 & DESC_B_MASK) {
+                raise_exception_err_ra(env, EXCP0D_GPF, selector & 0xfffc, GETPC());
+            }
+            shift++;
+        }
+#endif
         if (!(e2 & DESC_P_MASK)) {
             raise_exception_err_ra(env, EXCP0B_NOSEG, selector & 0xfffc, GETPC());
         }
 
         if (!(e2 & DESC_C_MASK) && dpl < cpl) {
             /* to inner privilege */
-            get_ss_esp_from_tss(env, &ss, &sp, dpl, GETPC());
-            LOG_PCALL("new ss:esp=%04x:%08x param_count=%d env->regs[R_ESP]="
-                      TARGET_FMT_lx "\n", ss, sp, param_count,
-                      env->regs[R_ESP]);
-            if ((ss & 0xfffc) == 0) {
-                raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
-            }
-            if ((ss & 3) != dpl) {
-                raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
-            }
-            if (load_segment_ra(env, &ss_e1, &ss_e2, ss, GETPC()) != 0) {
-                raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
-            }
-            ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
-            if (ss_dpl != dpl) {
-                raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
-            }
-            if (!(ss_e2 & DESC_S_MASK) ||
-                (ss_e2 & DESC_CS_MASK) ||
-                !(ss_e2 & DESC_W_MASK)) {
-                raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
-            }
-            if (!(ss_e2 & DESC_P_MASK)) {
-                raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
+#ifdef TARGET_X86_64
+            if (shift == 2) {
+                sp = get_rsp_from_tss(env, dpl);
+                ss = dpl;  /* SS = NULL selector with RPL = new CPL */
+                new_stack = 1;
+                sp_mask = 0;
+                ssp = 0;  /* SS base is always zero in IA-32e mode */
+                LOG_PCALL("new ss:rsp=%04x:%016llx env->regs[R_ESP]="
+                          TARGET_FMT_lx "\n", ss, sp, env->regs[R_ESP]);
+            } else
+#endif
+            {
+                uint32_t sp32;
+                get_ss_esp_from_tss(env, &ss, &sp32, dpl, GETPC());
+                LOG_PCALL("new ss:esp=%04x:%08x param_count=%d env->regs[R_ESP]="
+                          TARGET_FMT_lx "\n", ss, sp32, param_count,
+                          env->regs[R_ESP]);
+                sp = sp32;
+                if ((ss & 0xfffc) == 0) {
+                    raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
+                }
+                if ((ss & 3) != dpl) {
+                    raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
+                }
+                if (load_segment_ra(env, &ss_e1, &ss_e2, ss, GETPC()) != 0) {
+                    raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
+                }
+                ss_dpl = (ss_e2 >> DESC_DPL_SHIFT) & 3;
+                if (ss_dpl != dpl) {
+                    raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
+                }
+                if (!(ss_e2 & DESC_S_MASK) ||
+                    (ss_e2 & DESC_CS_MASK) ||
+                    !(ss_e2 & DESC_W_MASK)) {
+                    raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
+                }
+                if (!(ss_e2 & DESC_P_MASK)) {
+                    raise_exception_err_ra(env, EXCP0A_TSS, ss & 0xfffc, GETPC());
+                }
+
+                sp_mask = get_sp_mask(ss_e2);
+                ssp = get_seg_base(ss_e1, ss_e2);
             }
 
             /* push_size = ((param_count * 2) + 8) << shift; */
 
             old_sp_mask = get_sp_mask(env->segs[R_SS].flags);
             old_ssp = env->segs[R_SS].base;
-
-            sp_mask = get_sp_mask(ss_e2);
-            ssp = get_seg_base(ss_e1, ss_e2);
-            if (shift) {
+#ifdef TARGET_X86_64
+            if (shift == 2) {
+                /* XXX: verify if new stack address is canonical */
+                PUSHQ_RA(sp, env->segs[R_SS].selector, GETPC());
+                PUSHQ_RA(sp, env->regs[R_ESP], GETPC());
+                /* parameters aren't supported for 64-bit call gates */
+            } else
+#endif
+            if (shift == 1) {
                 PUSHL_RA(ssp, sp, sp_mask, env->segs[R_SS].selector, GETPC());
                 PUSHL_RA(ssp, sp, sp_mask, env->regs[R_ESP], GETPC());
                 for (i = param_count - 1; i >= 0; i--) {
@@ -1917,7 +2016,13 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
             new_stack = 0;
         }
 
-        if (shift) {
+#ifdef TARGET_X86_64
+        if (shift == 2) {
+            PUSHQ_RA(sp, env->segs[R_CS].selector, GETPC());
+            PUSHQ_RA(sp, next_eip, GETPC());
+        } else
+#endif
+        if (shift == 1) {
             PUSHL_RA(ssp, sp, sp_mask, env->segs[R_CS].selector, GETPC());
             PUSHL_RA(ssp, sp, sp_mask, next_eip, GETPC());
         } else {
@@ -1928,11 +2033,18 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
         /* from this point, not restartable */
 
         if (new_stack) {
-            ss = (ss & ~3) | dpl;
-            cpu_x86_load_seg_cache(env, R_SS, ss,
-                                   ssp,
-                                   get_seg_limit(ss_e1, ss_e2),
-                                   ss_e2);
+#ifdef TARGET_X86_64
+            if (shift == 2) {
+                cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
+            } else
+#endif
+            {
+                ss = (ss & ~3) | dpl;
+                cpu_x86_load_seg_cache(env, R_SS, ss,
+                                       ssp,
+                                       get_seg_limit(ss_e1, ss_e2),
+                                       ss_e2);
+            }
         }
 
         selector = (selector & ~3) | dpl;
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 07d185e7b6..1f9d1d9b24 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4689,6 +4689,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x82:
         if (CODE64(s))
             goto illegal_op;
+        /* fall through */
     case 0x80: /* GRP1 */
     case 0x81:
     case 0x83:
@@ -8292,6 +8293,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x10e ... 0x10f:
         /* 3DNow! instructions, ignore prefixes */
         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
+        /* fall through */
     case 0x110 ... 0x117:
     case 0x128 ... 0x12f:
     case 0x138 ... 0x13a:
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 12e765ba1f..265d25c937 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -1096,7 +1096,7 @@ void s390_set_qemu_cpu_model(uint16_t type, uint8_t gen, uint8_t ec_ga,
     const S390CPUDef *def = s390_find_cpu_def(type, gen, ec_ga, NULL);
 
     g_assert(def);
-    g_assert(QTAILQ_EMPTY(&cpus));
+    g_assert(QTAILQ_EMPTY_RCU(&cpus));
 
     /* TCG emulates some features that can usually not be enabled with
      * the emulated machine generation. Make sure they can be enabled
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 760a0f18b6..961b28ba0b 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -116,6 +116,10 @@ check-unit-y += tests/rcutorture$(EXESUF)
 gcov-files-rcutorture-y = util/rcu.c
 check-unit-y += tests/test-rcu-list$(EXESUF)
 gcov-files-test-rcu-list-y = util/rcu.c
+check-unit-y += tests/test-rcu-simpleq$(EXESUF)
+gcov-files-test-rcu-simpleq-y = util/rcu.c
+check-unit-y += tests/test-rcu-tailq$(EXESUF)
+gcov-files-test-rcu-tailq-y = util/rcu.c
 check-unit-y += tests/test-qdist$(EXESUF)
 gcov-files-test-qdist-y = util/qdist.c
 check-unit-y += tests/test-qht$(EXESUF)
@@ -124,7 +128,7 @@ check-unit-y += tests/test-qht-par$(EXESUF)
 gcov-files-test-qht-par-y = util/qht.c
 check-unit-y += tests/test-bitops$(EXESUF)
 check-unit-y += tests/test-bitcnt$(EXESUF)
-check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF)
+check-unit-y += tests/test-qdev-global-props$(EXESUF)
 check-unit-y += tests/check-qom-interface$(EXESUF)
 gcov-files-check-qom-interface-y = qom/object.c
 check-unit-y += tests/check-qom-proplist$(EXESUF)
@@ -402,9 +406,7 @@ check-qtest-s390x-$(CONFIG_SLIRP) += tests/test-netfilter$(EXESUF)
 check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-mirror$(EXESUF)
 check-qtest-s390x-$(CONFIG_POSIX) += tests/test-filter-redirector$(EXESUF)
 check-qtest-s390x-y += tests/drive_del-test$(EXESUF)
-check-qtest-s390x-y += tests/virtio-balloon-test$(EXESUF)
-check-qtest-s390x-y += tests/virtio-console-test$(EXESUF)
-check-qtest-s390x-y += tests/virtio-serial-test$(EXESUF)
+check-qtest-s390x-y += tests/virtio-ccw-test$(EXESUF)
 check-qtest-s390x-y += tests/cpu-plug-test$(EXESUF)
 
 check-qtest-generic-y += tests/machine-none-test$(EXESUF)
@@ -600,6 +602,8 @@ test-obj-y = tests/check-qnum.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \
 	tests/test-opts-visitor.o tests/test-qmp-event.o \
 	tests/rcutorture.o tests/test-rcu-list.o \
+	tests/test-rcu-simpleq.o \
+	tests/test-rcu-tailq.o \
 	tests/test-qdist.o tests/test-shift128.o \
 	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
 	tests/atomic_add-bench.o
@@ -649,6 +653,8 @@ tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o $(test-util-obj-y)
 tests/test-int128$(EXESUF): tests/test-int128.o
 tests/rcutorture$(EXESUF): tests/rcutorture.o $(test-util-obj-y)
 tests/test-rcu-list$(EXESUF): tests/test-rcu-list.o $(test-util-obj-y)
+tests/test-rcu-simpleq$(EXESUF): tests/test-rcu-simpleq.o $(test-util-obj-y)
+tests/test-rcu-tailq$(EXESUF): tests/test-rcu-tailq.o $(test-util-obj-y)
 tests/test-qdist$(EXESUF): tests/test-qdist.o $(test-util-obj-y)
 tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
 tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y)
@@ -809,6 +815,7 @@ tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o
 tests/tco-test$(EXESUF): tests/tco-test.o $(libqos-pc-obj-y)
 tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o $(libqos-virtio-obj-y)
 tests/virtio-blk-test$(EXESUF): tests/virtio-blk-test.o $(libqos-virtio-obj-y)
+tests/virtio-ccw-test$(EXESUF): tests/virtio-ccw-test.o
 tests/virtio-net-test$(EXESUF): tests/virtio-net-test.o $(libqos-pc-obj-y) $(libqos-virtio-obj-y)
 tests/virtio-rng-test$(EXESUF): tests/virtio-rng-test.o $(libqos-pc-obj-y)
 tests/virtio-scsi-test$(EXESUF): tests/virtio-scsi-test.o $(libqos-virtio-obj-y)
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
index f96d448f77..2f6c72f63a 100644
--- a/tests/atomic_add-bench.c
+++ b/tests/atomic_add-bench.c
@@ -26,6 +26,7 @@ static bool test_stop;
 static const char commands_string[] =
     " -n = number of threads\n"
     " -m = use mutexes instead of atomic increments\n"
+    " -p = enable sync profiler\n"
     " -d = duration in seconds\n"
     " -r = range (will be rounded up to pow2)";
 
@@ -143,7 +144,7 @@ static void parse_args(int argc, char *argv[])
     int c;
 
     for (;;) {
-        c = getopt(argc, argv, "hd:n:mr:");
+        c = getopt(argc, argv, "hd:n:mpr:");
         if (c < 0) {
             break;
         }
@@ -160,6 +161,9 @@ static void parse_args(int argc, char *argv[])
         case 'm':
             use_mutex = true;
             break;
+        case 'p':
+            qsp_enable();
+            break;
         case 'r':
             range = pow2ceil(atoi(optarg));
             break;
diff --git a/tests/boot-order-test.c b/tests/boot-order-test.c
index 9d98c48a3d..c60ebcf9d9 100644
--- a/tests/boot-order-test.c
+++ b/tests/boot-order-test.c
@@ -14,7 +14,7 @@
 #include "libqos/fw_cfg.h"
 #include "libqtest.h"
 #include "qapi/qmp/qdict.h"
-#include "hw/nvram/fw_cfg_keys.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 
 /* TODO actually test the results and get rid of this */
 #define qmp_discard_response(...) qobject_unref(qmp(__VA_ARGS__))
diff --git a/tests/cpu-plug-test.c b/tests/cpu-plug-test.c
index f5d57da60e..3e93c8e096 100644
--- a/tests/cpu-plug-test.c
+++ b/tests/cpu-plug-test.c
@@ -257,11 +257,11 @@ int main(int argc, char **argv)
     g_test_init(&argc, &argv, NULL);
 
     if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
-        qtest_cb_for_every_machine(add_pc_test_case);
+        qtest_cb_for_every_machine(add_pc_test_case, g_test_quick());
     } else if (g_str_equal(arch, "ppc64")) {
-        qtest_cb_for_every_machine(add_pseries_test_case);
+        qtest_cb_for_every_machine(add_pseries_test_case, g_test_quick());
     } else if (g_str_equal(arch, "s390x")) {
-        qtest_cb_for_every_machine(add_s390x_test_case);
+        qtest_cb_for_every_machine(add_s390x_test_case, g_test_quick());
     }
 
     return g_test_run();
diff --git a/tests/device-introspect-test.c b/tests/device-introspect-test.c
index 0b4f221c29..a25092dfaa 100644
--- a/tests/device-introspect-test.c
+++ b/tests/device-introspect-test.c
@@ -103,7 +103,14 @@ static QList *device_type_list(bool abstract)
 static void test_one_device(const char *type)
 {
     QDict *resp;
-    char *help, *qom_tree;
+    char *help;
+    char *qom_tree_start, *qom_tree_end;
+    char *qtree_start, *qtree_end;
+
+    g_test_message("Testing device '%s'", type);
+
+    qom_tree_start = hmp("info qom-tree");
+    qtree_start = hmp("info qtree");
 
     resp = qmp("{'execute': 'device-list-properties',"
                " 'arguments': {'typename': %s}}",
@@ -115,10 +122,18 @@ static void test_one_device(const char *type)
 
     /*
      * Some devices leave dangling pointers in QOM behind.
-     * "info qom-tree" has a good chance at crashing then
+     * "info qom-tree" or "info qtree" have a good chance at crashing then.
+     * Also make sure that the tree did not change.
      */
-    qom_tree = hmp("info qom-tree");
-    g_free(qom_tree);
+    qom_tree_end = hmp("info qom-tree");
+    g_assert_cmpstr(qom_tree_start, ==, qom_tree_end);
+    g_free(qom_tree_start);
+    g_free(qom_tree_end);
+
+    qtree_end = hmp("info qtree");
+    g_assert_cmpstr(qtree_start, ==, qtree_end);
+    g_free(qtree_start);
+    g_free(qtree_end);
 }
 
 static void test_device_intro_list(void)
@@ -206,13 +221,13 @@ static void test_device_intro_abstract(void)
     qtest_end();
 }
 
-static void test_device_intro_concrete(void)
+static void test_device_intro_concrete(const void *args)
 {
     QList *types;
     QListEntry *entry;
     const char *type;
 
-    qtest_start(common_args);
+    qtest_start(args);
     types = device_type_list(false);
 
     QLIST_FOREACH_ENTRY(types, entry) {
@@ -224,6 +239,7 @@ static void test_device_intro_concrete(void)
 
     qobject_unref(types);
     qtest_end();
+    g_free((void *)args);
 }
 
 static void test_abstract_interfaces(void)
@@ -260,6 +276,26 @@ static void test_abstract_interfaces(void)
     qtest_end();
 }
 
+static void add_machine_test_case(const char *mname)
+{
+    char *path, *args;
+
+    /* Ignore blacklisted machines */
+    if (g_str_equal("xenfv", mname) || g_str_equal("xenpv", mname)) {
+        return;
+    }
+
+    path = g_strdup_printf("device/introspect/concrete/defaults/%s", mname);
+    args = g_strdup_printf("-M %s", mname);
+    qtest_add_data_func(path, args, test_device_intro_concrete);
+    g_free(path);
+
+    path = g_strdup_printf("device/introspect/concrete/nodefaults/%s", mname);
+    args = g_strdup_printf("-nodefaults -M %s", mname);
+    qtest_add_data_func(path, args, test_device_intro_concrete);
+    g_free(path);
+}
+
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
@@ -268,8 +304,13 @@ int main(int argc, char **argv)
     qtest_add_func("device/introspect/list-fields", test_qom_list_fields);
     qtest_add_func("device/introspect/none", test_device_intro_none);
     qtest_add_func("device/introspect/abstract", test_device_intro_abstract);
-    qtest_add_func("device/introspect/concrete", test_device_intro_concrete);
     qtest_add_func("device/introspect/abstract-interfaces", test_abstract_interfaces);
+    if (g_test_quick()) {
+        qtest_add_data_func("device/introspect/concrete/defaults/none",
+                            g_strdup(common_args), test_device_intro_concrete);
+    } else {
+        qtest_cb_for_every_machine(add_machine_test_case, true);
+    }
 
     return g_test_run();
 }
diff --git a/tests/fw_cfg-test.c b/tests/fw_cfg-test.c
index 1548bf14b2..1c5103fe1c 100644
--- a/tests/fw_cfg-test.c
+++ b/tests/fw_cfg-test.c
@@ -13,7 +13,7 @@
 #include "qemu/osdep.h"
 
 #include "libqtest.h"
-#include "hw/nvram/fw_cfg_keys.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 #include "libqos/fw_cfg.h"
 
 static uint64_t ram_size = 128 << 20;
diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c
index 634b9c288a..b83cb8f0af 100644
--- a/tests/libqos/malloc-pc.c
+++ b/tests/libqos/malloc-pc.c
@@ -14,7 +14,7 @@
 #include "libqos/malloc-pc.h"
 #include "libqos/fw_cfg.h"
 
-#include "hw/nvram/fw_cfg_keys.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 
 #include "qemu-common.h"
 
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 852ccff1ce..1105c37e08 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -991,7 +991,53 @@ bool qtest_big_endian(QTestState *s)
     return s->big_endian;
 }
 
-void qtest_cb_for_every_machine(void (*cb)(const char *machine))
+static bool qtest_check_machine_version(const char *mname, const char *basename,
+                                        int major, int minor)
+{
+    char *newname;
+    bool is_equal;
+
+    newname = g_strdup_printf("%s-%i.%i", basename, major, minor);
+    is_equal = g_str_equal(mname, newname);
+    g_free(newname);
+
+    return is_equal;
+}
+
+static bool qtest_is_old_versioned_machine(const char *mname)
+{
+    const char *dash = strrchr(mname, '-');
+    const char *dot = strrchr(mname, '.');
+    const char *chr;
+    char *bname;
+    const int major = QEMU_VERSION_MAJOR;
+    const int minor = QEMU_VERSION_MINOR;
+    bool res = false;
+
+    if (dash && dot && dot > dash) {
+        for (chr = dash + 1; *chr; chr++) {
+            if (!qemu_isdigit(*chr) && *chr != '.') {
+                return false;
+            }
+        }
+        /*
+         * Now check if it is one of the latest versions. Check major + 1
+         * and minor + 1 versions as well, since they might already exist
+         * in the development branch.
+         */
+        bname = g_strdup(mname);
+        bname[dash - mname] = 0;
+        res = !qtest_check_machine_version(mname, bname, major + 1, 0) &&
+              !qtest_check_machine_version(mname, bname, major, minor + 1) &&
+              !qtest_check_machine_version(mname, bname, major, minor);
+        g_free(bname);
+    }
+
+    return res;
+}
+
+void qtest_cb_for_every_machine(void (*cb)(const char *machine),
+                                bool skip_old_versioned)
 {
     QDict *response, *minfo;
     QList *list;
@@ -1014,7 +1060,9 @@ void qtest_cb_for_every_machine(void (*cb)(const char *machine))
         qstr = qobject_to(QString, qobj);
         g_assert(qstr);
         mname = qstring_get_str(qstr);
-        cb(mname);
+        if (!skip_old_versioned || !qtest_is_old_versioned_machine(mname)) {
+            cb(mname);
+        }
     }
 
     qtest_end();
diff --git a/tests/libqtest.h b/tests/libqtest.h
index def1edaafa..1159b73d15 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -954,10 +954,12 @@ QDict *qmp_fd(int fd, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
 /**
  * qtest_cb_for_every_machine:
  * @cb: Pointer to the callback function
+ * @skip_old_versioned: true if versioned old machine types should be skipped
  *
  *  Call a callback function for every name of all available machines.
  */
-void qtest_cb_for_every_machine(void (*cb)(const char *machine));
+void qtest_cb_for_every_machine(void (*cb)(const char *machine),
+                                bool skip_old_versioned);
 
 /**
  * qtest_qmp_device_add:
diff --git a/tests/migration-test.c b/tests/migration-test.c
index eb58d0a48e..0e687b7512 100644
--- a/tests/migration-test.c
+++ b/tests/migration-test.c
@@ -438,15 +438,6 @@ static int test_migrate_start(QTestState **from, QTestState **to,
                                   " -incoming %s",
                                   accel, tmpfs, bootpath, uri);
     } else if (strcmp(arch, "ppc64") == 0) {
-
-        /* On ppc64, the test only works with kvm-hv, but not with kvm-pr
-         * and TCG is touchy due to race conditions on dirty bits
-         * (especially on PPC for some reason)
-         */
-        if (access("/sys/module/kvm_hv", F_OK)) {
-            g_print("Skipping test: kvm_hv not available ");
-            return -1;
-        }
         cmd_src = g_strdup_printf("-machine accel=%s -m 256M"
                                   " -name source,debug-threads=on"
                                   " -serial file:%s/src_serial"
@@ -750,6 +741,17 @@ int main(int argc, char **argv)
         return 0;
     }
 
+    /*
+     * On ppc64, the test only works with kvm-hv, but not with kvm-pr and TCG
+     * is touchy due to race conditions on dirty bits (especially on PPC for
+     * some reason)
+     */
+    if (g_str_equal(qtest_get_arch(), "ppc64") &&
+        access("/sys/module/kvm_hv", F_OK)) {
+        g_test_message("Skipping test: kvm_hv not available");
+        return 0;
+    }
+
     tmpfs = mkdtemp(template);
     if (!tmpfs) {
         g_test_message("mkdtemp on path (%s): %s\n", template, strerror(errno));
diff --git a/tests/qom-test.c b/tests/qom-test.c
index e6f712cbd3..73c52af3bb 100644
--- a/tests/qom-test.c
+++ b/tests/qom-test.c
@@ -123,7 +123,7 @@ int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
 
-    qtest_cb_for_every_machine(add_machine_test_case);
+    qtest_cb_for_every_machine(add_machine_test_case, g_test_quick());
 
     return g_test_run();
 }
diff --git a/tests/test-char.c b/tests/test-char.c
index 5905d31441..2a2ff32904 100644
--- a/tests/test-char.c
+++ b/tests/test-char.c
@@ -56,7 +56,6 @@ static void fe_event(void *opaque, int event)
     }
 }
 
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 #ifdef _WIN32
 static void char_console_test_subprocess(void)
 {
@@ -106,7 +105,6 @@ static void char_stdio_test(void)
     g_test_trap_assert_passed();
     g_test_trap_assert_stdout("buf");
 }
-#endif
 
 static void char_ringbuf_test(void)
 {
@@ -807,14 +805,12 @@ int main(int argc, char **argv)
     g_test_add_func("/char/invalid", char_invalid_test);
     g_test_add_func("/char/ringbuf", char_ringbuf_test);
     g_test_add_func("/char/mux", char_mux_test);
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 #ifdef _WIN32
     g_test_add_func("/char/console/subprocess", char_console_test_subprocess);
     g_test_add_func("/char/console", char_console_test);
 #endif
     g_test_add_func("/char/stdio/subprocess", char_stdio_test_subprocess);
     g_test_add_func("/char/stdio", char_stdio_test);
-#endif
 #ifndef _WIN32
     g_test_add_func("/char/pipe", char_pipe_test);
 #endif
diff --git a/tests/test-hmp.c b/tests/test-hmp.c
index 5352c9c088..1a3a9c5099 100644
--- a/tests/test-hmp.c
+++ b/tests/test-hmp.c
@@ -158,7 +158,7 @@ int main(int argc, char **argv)
 
     g_test_init(&argc, &argv, NULL);
 
-    qtest_cb_for_every_machine(add_machine_test_case);
+    qtest_cb_for_every_machine(add_machine_test_case, g_test_quick());
 
     /* as none machine has no memory by default, add a test case with memory */
     qtest_add_data_func("hmp/none+2MB", g_strdup("none -m 2"), test_machine);
diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c
index 1514d7ec97..192bfbf02e 100644
--- a/tests/test-rcu-list.c
+++ b/tests/test-rcu-list.c
@@ -44,7 +44,7 @@ static int nthreadsrunning;
 #define GOFLAG_RUN  1
 #define GOFLAG_STOP 2
 
-static volatile int goflag = GOFLAG_INIT;
+static int goflag = GOFLAG_INIT;
 
 #define RCU_READ_RUN 1000
 #define RCU_UPDATE_RUN 10
@@ -82,9 +82,20 @@ static void wait_all_threads(void)
     n_threads = 0;
 }
 
+#ifndef TEST_LIST_TYPE
+#define TEST_LIST_TYPE 1
+#endif
 
 struct list_element {
+#if TEST_LIST_TYPE == 1
     QLIST_ENTRY(list_element) entry;
+#elif TEST_LIST_TYPE == 2
+    QSIMPLEQ_ENTRY(list_element) entry;
+#elif TEST_LIST_TYPE == 3
+    QTAILQ_ENTRY(list_element) entry;
+#else
+#error Invalid TEST_LIST_TYPE
+#endif
     struct rcu_head rcu;
 };
 
@@ -96,8 +107,47 @@ static void reclaim_list_el(struct rcu_head *prcu)
     n_reclaims++;
 }
 
+#if TEST_LIST_TYPE == 1
 static QLIST_HEAD(q_list_head, list_element) Q_list_head;
 
+#define TEST_NAME "qlist"
+#define TEST_LIST_REMOVE_RCU        QLIST_REMOVE_RCU
+#define TEST_LIST_INSERT_AFTER_RCU  QLIST_INSERT_AFTER_RCU
+#define TEST_LIST_INSERT_HEAD_RCU   QLIST_INSERT_HEAD_RCU
+#define TEST_LIST_FOREACH_RCU       QLIST_FOREACH_RCU
+#define TEST_LIST_FOREACH_SAFE_RCU  QLIST_FOREACH_SAFE_RCU
+
+#elif TEST_LIST_TYPE == 2
+static QSIMPLEQ_HEAD(, list_element) Q_list_head =
+    QSIMPLEQ_HEAD_INITIALIZER(Q_list_head);
+
+#define TEST_NAME "qsimpleq"
+#define TEST_LIST_REMOVE_RCU(el, f)                             \
+         QSIMPLEQ_REMOVE_RCU(&Q_list_head, el, list_element, f)
+
+#define TEST_LIST_INSERT_AFTER_RCU(list_el, el, f)               \
+         QSIMPLEQ_INSERT_AFTER_RCU(&Q_list_head, list_el, el, f)
+
+#define TEST_LIST_INSERT_HEAD_RCU   QSIMPLEQ_INSERT_HEAD_RCU
+#define TEST_LIST_FOREACH_RCU       QSIMPLEQ_FOREACH_RCU
+#define TEST_LIST_FOREACH_SAFE_RCU  QSIMPLEQ_FOREACH_SAFE_RCU
+
+#elif TEST_LIST_TYPE == 3
+static QTAILQ_HEAD(, list_element) Q_list_head;
+
+#define TEST_NAME "qtailq"
+#define TEST_LIST_REMOVE_RCU(el, f) QTAILQ_REMOVE_RCU(&Q_list_head, el, f)
+
+#define TEST_LIST_INSERT_AFTER_RCU(list_el, el, f)               \
+           QTAILQ_INSERT_AFTER_RCU(&Q_list_head, list_el, el, f)
+
+#define TEST_LIST_INSERT_HEAD_RCU   QTAILQ_INSERT_HEAD_RCU
+#define TEST_LIST_FOREACH_RCU       QTAILQ_FOREACH_RCU
+#define TEST_LIST_FOREACH_SAFE_RCU  QTAILQ_FOREACH_SAFE_RCU
+#else
+#error Invalid TEST_LIST_TYPE
+#endif
+
 static void *rcu_q_reader(void *arg)
 {
     long long n_reads_local = 0;
@@ -107,15 +157,15 @@ static void *rcu_q_reader(void *arg)
 
     *(struct rcu_reader_data **)arg = &rcu_reader;
     atomic_inc(&nthreadsrunning);
-    while (goflag == GOFLAG_INIT) {
+    while (atomic_read(&goflag) == GOFLAG_INIT) {
         g_usleep(1000);
     }
 
-    while (goflag == GOFLAG_RUN) {
+    while (atomic_read(&goflag) == GOFLAG_RUN) {
         rcu_read_lock();
-        QLIST_FOREACH_RCU(el, &Q_list_head, entry) {
+        TEST_LIST_FOREACH_RCU(el, &Q_list_head, entry) {
             n_reads_local++;
-            if (goflag == GOFLAG_STOP) {
+            if (atomic_read(&goflag) == GOFLAG_STOP) {
                 break;
             }
         }
@@ -142,35 +192,35 @@ static void *rcu_q_updater(void *arg)
 
     *(struct rcu_reader_data **)arg = &rcu_reader;
     atomic_inc(&nthreadsrunning);
-    while (goflag == GOFLAG_INIT) {
+    while (atomic_read(&goflag) == GOFLAG_INIT) {
         g_usleep(1000);
     }
 
-    while (goflag == GOFLAG_RUN) {
+    while (atomic_read(&goflag) == GOFLAG_RUN) {
         target_el = select_random_el(RCU_Q_LEN);
         j = 0;
         /* FOREACH_RCU could work here but let's use both macros */
-        QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) {
+        TEST_LIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) {
             j++;
             if (target_el == j) {
-                QLIST_REMOVE_RCU(prev_el, entry);
+                TEST_LIST_REMOVE_RCU(prev_el, entry);
                 /* may be more than one updater in the future */
                 call_rcu1(&prev_el->rcu, reclaim_list_el);
                 n_removed_local++;
                 break;
             }
         }
-        if (goflag == GOFLAG_STOP) {
+        if (atomic_read(&goflag) == GOFLAG_STOP) {
             break;
         }
         target_el = select_random_el(RCU_Q_LEN);
         j = 0;
-        QLIST_FOREACH_RCU(el, &Q_list_head, entry) {
+        TEST_LIST_FOREACH_RCU(el, &Q_list_head, entry) {
             j++;
             if (target_el == j) {
-                prev_el = g_new(struct list_element, 1);
+                struct list_element *new_el = g_new(struct list_element, 1);
                 n_nodes += n_nodes_local;
-                QLIST_INSERT_BEFORE_RCU(el, prev_el, entry);
+                TEST_LIST_INSERT_AFTER_RCU(el, new_el, entry);
                 break;
             }
         }
@@ -195,7 +245,7 @@ static void rcu_qtest_init(void)
     srand(time(0));
     for (i = 0; i < RCU_Q_LEN; i++) {
         new_el = g_new(struct list_element, 1);
-        QLIST_INSERT_HEAD_RCU(&Q_list_head, new_el, entry);
+        TEST_LIST_INSERT_HEAD_RCU(&Q_list_head, new_el, entry);
     }
     qemu_mutex_lock(&counts_mutex);
     n_nodes += RCU_Q_LEN;
@@ -209,9 +259,9 @@ static void rcu_qtest_run(int duration, int nreaders)
         g_usleep(1000);
     }
 
-    goflag = GOFLAG_RUN;
+    atomic_set(&goflag, GOFLAG_RUN);
     sleep(duration);
-    goflag = GOFLAG_STOP;
+    atomic_set(&goflag, GOFLAG_STOP);
     wait_all_threads();
 }
 
@@ -230,8 +280,8 @@ static void rcu_qtest(const char *test, int duration, int nreaders)
     create_thread(rcu_q_updater);
     rcu_qtest_run(duration, nreaders);
 
-    QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) {
-        QLIST_REMOVE_RCU(prev_el, entry);
+    TEST_LIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) {
+        TEST_LIST_REMOVE_RCU(prev_el, entry);
         call_rcu1(&prev_el->rcu, reclaim_list_el);
         n_removed_local++;
     }
@@ -290,9 +340,9 @@ int main(int argc, char *argv[])
             } else {
                 gtest_seconds = 20;
             }
-            g_test_add_func("/rcu/qlist/single-threaded", gtest_rcuq_one);
-            g_test_add_func("/rcu/qlist/short-few", gtest_rcuq_few);
-            g_test_add_func("/rcu/qlist/long-many", gtest_rcuq_many);
+            g_test_add_func("/rcu/"TEST_NAME"/single-threaded", gtest_rcuq_one);
+            g_test_add_func("/rcu/"TEST_NAME"/short-few", gtest_rcuq_few);
+            g_test_add_func("/rcu/"TEST_NAME"/long-many", gtest_rcuq_many);
             g_test_in_charge = 1;
             return g_test_run();
         }
diff --git a/tests/test-rcu-simpleq.c b/tests/test-rcu-simpleq.c
new file mode 100644
index 0000000000..057f7d33f7
--- /dev/null
+++ b/tests/test-rcu-simpleq.c
@@ -0,0 +1,2 @@
+#define TEST_LIST_TYPE 2
+#include "test-rcu-list.c"
diff --git a/tests/test-rcu-tailq.c b/tests/test-rcu-tailq.c
new file mode 100644
index 0000000000..8d487e0ee0
--- /dev/null
+++ b/tests/test-rcu-tailq.c
@@ -0,0 +1,2 @@
+#define TEST_LIST_TYPE 3
+#include "test-rcu-list.c"
diff --git a/tests/test-x86-cpuid-compat.c b/tests/test-x86-cpuid-compat.c
index 84ce9c71ae..e75b959950 100644
--- a/tests/test-x86-cpuid-compat.c
+++ b/tests/test-x86-cpuid-compat.c
@@ -35,7 +35,6 @@ static QObject *qom_get(const char *path, const char *prop)
     return ret;
 }
 
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 static bool qom_get_bool(const char *path, const char *prop)
 {
     QBool *value = qobject_to(QBool, qom_get(path, prop));
@@ -44,7 +43,6 @@ static bool qom_get_bool(const char *path, const char *prop)
     qobject_unref(value);
     return b;
 }
-#endif
 
 typedef struct CpuidTestArgs {
     const char *cmdline;
@@ -168,7 +166,6 @@ static FeatureTestArgs *add_feature_test(const char *name, const char *cmdline,
     return args;
 }
 
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
 static void test_plus_minus_subprocess(void)
 {
     char *path;
@@ -210,17 +207,14 @@ static void test_plus_minus(void)
                               "Don't mix both \"+cx8\" and \"cx8=off\"*");
     g_test_trap_assert_stdout("");
 }
-#endif
 
 int main(int argc, char **argv)
 {
     g_test_init(&argc, &argv, NULL);
 
-#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
     g_test_add_func("/x86/cpuid/parsing-plus-minus/subprocess",
                     test_plus_minus_subprocess);
     g_test_add_func("/x86/cpuid/parsing-plus-minus", test_plus_minus);
-#endif
 
     /* Original level values for CPU models: */
     add_cpuid_test("x86/cpuid/phenom/level",
diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index ca6251f5f8..716aff7153 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -768,7 +768,6 @@ static void wait_for_rings_started(TestServer *s, size_t count)
     g_mutex_unlock(&s->data_mutex);
 }
 
-#if defined(CONFIG_HAS_GLIB_SUBPROCESS_TESTS)
 static inline void test_server_connect(TestServer *server)
 {
     test_server_create_chr(server, ",reconnect=1");
@@ -893,7 +892,6 @@ static void test_flags_mismatch(void)
     g_free(path);
 }
 
-#endif
 
 static void test_multiqueue(void)
 {
@@ -975,7 +973,6 @@ int main(int argc, char **argv)
     qtest_add_func("/vhost-user/migrate", test_migrate);
     qtest_add_func("/vhost-user/multiqueue", test_multiqueue);
 
-#if defined(CONFIG_HAS_GLIB_SUBPROCESS_TESTS)
     /* keeps failing on build-system since Aug 15 2017 */
     if (getenv("QTEST_VHOST_USER_FIXME")) {
         qtest_add_func("/vhost-user/reconnect/subprocess",
@@ -988,7 +985,6 @@ int main(int argc, char **argv)
                        test_flags_mismatch_subprocess);
         qtest_add_func("/vhost-user/flags-mismatch", test_flags_mismatch);
     }
-#endif
 
     ret = g_test_run();
 
diff --git a/tests/virtio-ccw-test.c b/tests/virtio-ccw-test.c
new file mode 100644
index 0000000000..48c714d84c
--- /dev/null
+++ b/tests/virtio-ccw-test.c
@@ -0,0 +1,110 @@
+/*
+ * QTest testcase for VirtIO CCW
+ *
+ * Copyright (c) 2014 SUSE LINUX Products GmbH
+ * Copyright (c) 2018 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/* Until we have a full libqos implementation of virtio-ccw (which requires
+ * also to add support for I/O channels to qtest), we can only do simple
+ * tests that initialize the devices.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "libqos/virtio.h"
+
+static void virtio_balloon_nop(void)
+{
+    global_qtest = qtest_initf("-device virtio-balloon-ccw");
+    qtest_end();
+}
+
+static void virtconsole_nop(void)
+{
+    global_qtest = qtest_initf("-device virtio-serial-ccw,id=vser0 "
+                                "-device virtconsole,bus=vser0.0");
+    qtest_end();
+}
+
+static void virtserialport_nop(void)
+{
+    global_qtest = qtest_initf("-device virtio-serial-ccw,id=vser0 "
+                                "-device virtserialport,bus=vser0.0");
+    qtest_end();
+}
+
+static void virtio_serial_nop(void)
+{
+    global_qtest = qtest_initf("-device virtio-serial-ccw");
+    qtest_end();
+}
+
+static void virtio_serial_hotplug(void)
+{
+    global_qtest = qtest_initf("-device virtio-serial-ccw");
+    qtest_qmp_device_add("virtserialport", "hp-port", "{}");
+    qtest_qmp_device_del("hp-port");
+    qtest_end();
+}
+
+static void virtio_blk_nop(void)
+{
+    global_qtest = qtest_initf("-drive if=none,id=drv0,file=null-co://,format=raw "
+                                "-device virtio-blk-ccw,drive=drv0");
+    qtest_end();
+}
+
+static void virtio_net_nop(void)
+{
+    global_qtest = qtest_initf("-device virtio-net-ccw");
+    qtest_end();
+}
+
+static void virtio_rng_nop(void)
+{
+    global_qtest = qtest_initf("-device virtio-rng-ccw");
+    qtest_end();
+}
+
+static void virtio_scsi_nop(void)
+{
+    global_qtest = qtest_initf("-device virtio-scsi-ccw");
+    qtest_end();
+}
+
+static void virtio_scsi_hotplug(void)
+{
+    global_qtest = qtest_initf("-drive if=none,id=drv0,file=null-co://,format=raw "
+                                "-drive if=none,id=drv1,file=null-co://,format=raw "
+                                "-device virtio-scsi-ccw "
+                                "-device scsi-hd,drive=drv0");
+    qtest_qmp_device_add("scsi-hd", "scsihd", "{'drive': 'drv1'}");
+    qtest_qmp_device_del("scsihd");
+
+    qtest_end();
+}
+
+int main(int argc, char **argv)
+{
+    int ret;
+
+    g_test_init(&argc, &argv, NULL);
+    qtest_add_func("/virtio/balloon/nop", virtio_balloon_nop);
+    qtest_add_func("/virtio/console/nop", virtconsole_nop);
+    qtest_add_func("/virtio/serialport/nop", virtserialport_nop);
+    qtest_add_func("/virtio/serial/nop", virtio_serial_nop);
+    qtest_add_func("/virtio/serial/hotplug", virtio_serial_hotplug);
+    qtest_add_func("/virtio/block/nop", virtio_blk_nop);
+    qtest_add_func("/virtio/net/nop", virtio_net_nop);
+    qtest_add_func("/virtio/rng/nop", virtio_rng_nop);
+    qtest_add_func("/virtio/scsi/nop", virtio_scsi_nop);
+    qtest_add_func("/virtio/scsi/hotplug", virtio_scsi_hotplug);
+
+    ret = g_test_run();
+
+    return ret;
+}
diff --git a/util/Makefile.objs b/util/Makefile.objs
index e1c3fed4dc..e958116c86 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -44,6 +44,7 @@ util-obj-y += log.o
 util-obj-y += pagesize.o
 util-obj-y += qdist.o
 util-obj-y += qht.o
+util-obj-y += qsp.o
 util-obj-y += range.o
 util-obj-y += stats64.o
 util-obj-y += systemd.o
diff --git a/util/module.c b/util/module.c
index c90973721f..1259dd3686 100644
--- a/util/module.c
+++ b/util/module.c
@@ -162,9 +162,10 @@ void module_load_one(const char *prefix, const char *lib_name)
 #ifdef CONFIG_MODULES
     char *fname = NULL;
     char *exec_dir;
-    char *dirs[3];
+    const char *search_dir;
+    char *dirs[4];
     char *module_name;
-    int i = 0;
+    int i = 0, n_dirs = 0;
     int ret;
     static GHashTable *loaded_modules;
 
@@ -186,14 +187,19 @@ void module_load_one(const char *prefix, const char *lib_name)
     g_hash_table_insert(loaded_modules, module_name, module_name);
 
     exec_dir = qemu_get_exec_dir();
-    dirs[i++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR);
-    dirs[i++] = g_strdup_printf("%s/..", exec_dir ? : "");
-    dirs[i++] = g_strdup_printf("%s", exec_dir ? : "");
-    assert(i == ARRAY_SIZE(dirs));
+    search_dir = getenv("QEMU_MODULE_DIR");
+    if (search_dir != NULL) {
+        dirs[n_dirs++] = g_strdup_printf("%s", search_dir);
+    }
+    dirs[n_dirs++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR);
+    dirs[n_dirs++] = g_strdup_printf("%s/..", exec_dir ? : "");
+    dirs[n_dirs++] = g_strdup_printf("%s", exec_dir ? : "");
+    assert(n_dirs <= ARRAY_SIZE(dirs));
+
     g_free(exec_dir);
     exec_dir = NULL;
 
-    for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+    for (i = 0; i < n_dirs; i++) {
         fname = g_strdup_printf("%s/%s%s",
                 dirs[i], module_name, HOST_DSOSUF);
         ret = module_load_file(fname);
@@ -205,7 +211,7 @@ void module_load_one(const char *prefix, const char *lib_name)
         }
     }
 
-    for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+    for (i = 0; i < n_dirs; i++) {
         g_free(dirs[i]);
     }
 
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index bb5ad28bd3..25dd1595ad 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -67,15 +67,24 @@ void *qemu_memalign(size_t alignment, size_t size)
     return qemu_oom_check(qemu_try_memalign(alignment, size));
 }
 
+static int get_allocation_granularity(void)
+{
+    SYSTEM_INFO system_info;
+
+    GetSystemInfo(&system_info);
+    return system_info.dwAllocationGranularity;
+}
+
 void *qemu_anon_ram_alloc(size_t size, uint64_t *align, bool shared)
 {
     void *ptr;
 
-    /* FIXME: this is not exactly optimal solution since VirtualAlloc
-       has 64Kb granularity, but at least it guarantees us that the
-       memory is page aligned. */
     ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
     trace_qemu_anon_ram_alloc(size, ptr);
+
+    if (ptr && align) {
+        *align = MAX(get_allocation_granularity(), getpagesize());
+    }
     return ptr;
 }
 
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index b303188a36..4a363ca675 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -97,13 +97,13 @@ void qemu_rec_mutex_destroy(QemuRecMutex *mutex)
     DeleteCriticalSection(&mutex->lock);
 }
 
-void qemu_rec_mutex_lock(QemuRecMutex *mutex)
+void qemu_rec_mutex_lock_impl(QemuRecMutex *mutex, const char *file, int line)
 {
     assert(mutex->initialized);
     EnterCriticalSection(&mutex->lock);
 }
 
-int qemu_rec_mutex_trylock(QemuRecMutex *mutex)
+int qemu_rec_mutex_trylock_impl(QemuRecMutex *mutex, const char *file, int line)
 {
     assert(mutex->initialized);
     return !TryEnterCriticalSection(&mutex->lock);
diff --git a/util/qht.c b/util/qht.c
index c138777a9c..1e3a072e25 100644
--- a/util/qht.c
+++ b/util/qht.c
@@ -90,6 +90,33 @@
 #endif
 
 /*
+ * Do _not_ use qemu_mutex_[try]lock directly! Use these macros, otherwise
+ * the profiler (QSP) will deadlock.
+ */
+static inline void qht_lock(struct qht *ht)
+{
+    if (ht->mode & QHT_MODE_RAW_MUTEXES) {
+        qemu_mutex_lock__raw(&ht->lock);
+    } else {
+        qemu_mutex_lock(&ht->lock);
+    }
+}
+
+static inline int qht_trylock(struct qht *ht)
+{
+    if (ht->mode & QHT_MODE_RAW_MUTEXES) {
+        return qemu_mutex_trylock__raw(&(ht)->lock);
+    }
+    return qemu_mutex_trylock(&(ht)->lock);
+}
+
+/* this inline is not really necessary, but it helps keep code consistent */
+static inline void qht_unlock(struct qht *ht)
+{
+    qemu_mutex_unlock(&ht->lock);
+}
+
+/*
  * Note: reading partially-updated pointers in @pointers could lead to
  * segfaults. We thus access them with atomic_read/set; this guarantees
  * that the compiler makes all those accesses atomic. We also need the
@@ -254,10 +281,10 @@ void qht_map_lock_buckets__no_stale(struct qht *ht, struct qht_map **pmap)
     qht_map_unlock_buckets(map);
 
     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     map = ht->map;
     qht_map_lock_buckets(map);
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
     *pmap = map;
     return;
 }
@@ -288,11 +315,11 @@ struct qht_bucket *qht_bucket_lock__no_stale(struct qht *ht, uint32_t hash,
     qemu_spin_unlock(&b->lock);
 
     /* we raced with a resize; acquire ht->lock to see the updated ht->map */
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     map = ht->map;
     b = qht_map_to_bucket(map, hash);
     qemu_spin_lock(&b->lock);
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
     *pmap = map;
     return b;
 }
@@ -430,13 +457,13 @@ bool qht_reset_size(struct qht *ht, size_t n_elems)
 
     n_buckets = qht_elems_to_buckets(n_elems);
 
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     map = ht->map;
     if (n_buckets != map->n_buckets) {
         new = qht_map_create(n_buckets);
     }
     qht_do_resize_and_reset(ht, new);
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
 
     return !!new;
 }
@@ -565,7 +592,7 @@ static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
      * If the lock is taken it probably means there's an ongoing resize,
      * so bail out.
      */
-    if (qemu_mutex_trylock(&ht->lock)) {
+    if (qht_trylock(ht)) {
         return;
     }
     map = ht->map;
@@ -575,7 +602,7 @@ static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
 
         qht_do_resize(ht, new);
     }
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
 }
 
 bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
@@ -788,7 +815,7 @@ bool qht_resize(struct qht *ht, size_t n_elems)
     size_t n_buckets = qht_elems_to_buckets(n_elems);
     size_t ret = false;
 
-    qemu_mutex_lock(&ht->lock);
+    qht_lock(ht);
     if (n_buckets != ht->map->n_buckets) {
         struct qht_map *new;
 
@@ -796,7 +823,7 @@ bool qht_resize(struct qht *ht, size_t n_elems)
         qht_do_resize(ht, new);
         ret = true;
     }
-    qemu_mutex_unlock(&ht->lock);
+    qht_unlock(ht);
 
     return ret;
 }
diff --git a/util/qsp.c b/util/qsp.c
new file mode 100644
index 0000000000..b0c2575d10
--- /dev/null
+++ b/util/qsp.c
@@ -0,0 +1,828 @@
+/*
+ * qsp.c - QEMU Synchronization Profiler
+ *
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * QSP profiles the time spent in synchronization primitives, which can
+ * help diagnose performance problems, e.g. scalability issues when
+ * contention is high.
+ *
+ * The primitives currently supported are mutexes, recursive mutexes and
+ * condition variables. Note that not all related functions are intercepted;
+ * instead we profile only those functions that can have a performance impact,
+ * either due to blocking (e.g. cond_wait, mutex_lock) or cache line
+ * contention (e.g. mutex_lock, mutex_trylock).
+ *
+ * QSP's design focuses on speed and scalability. This is achieved
+ * by having threads do their profiling entirely on thread-local data.
+ * The appropriate thread-local data is found via a QHT, i.e. a concurrent hash
+ * table. To aggregate data in order to generate a report, we iterate over
+ * all entries in the hash table. Depending on the number of threads and
+ * synchronization objects this might be expensive, but note that it is
+ * very rarely called -- reports are generated only when requested by users.
+ *
+ * Reports are generated as a table where each row represents a call site. A
+ * call site is the triplet formed by the __file__ and __LINE__ of the caller
+ * as well as the address of the "object" (i.e. mutex, rec. mutex or condvar)
+ * being operated on. Optionally, call sites that operate on different objects
+ * of the same type can be coalesced, which can be particularly useful when
+ * profiling dynamically-allocated objects.
+ *
+ * Alternative designs considered:
+ *
+ * - Use an off-the-shelf profiler such as mutrace. This is not a viable option
+ *   for us because QEMU has __malloc_hook set (by one of the libraries it
+ *   uses); leaving this hook unset is required to avoid deadlock in mutrace.
+ *
+ * - Use a glib HT for each thread, protecting each HT with its own lock.
+ *   This isn't simpler than the current design, and is 10% slower in the
+ *   atomic_add-bench microbenchmark (-m option).
+ *
+ * - For reports, just use a binary tree as we aggregate data, instead of having
+ *   an intermediate hash table. This would simplify the code only slightly, but
+ *   would perform badly if there were many threads and objects to track.
+ *
+ * - Wrap operations on qsp entries with RCU read-side critical sections, so
+ *   that qsp_reset() can delete entries. Unfortunately, the overhead of calling
+ *   rcu_read_lock/unlock slows down atomic_add-bench -m by 24%. Having
+ *   a snapshot that is updated on qsp_reset() avoids this overhead.
+ *
+ * Related Work:
+ * - Lennart Poettering's mutrace: http://0pointer.de/blog/projects/mutrace.html
+ * - Lozi, David, Thomas, Lawall and Muller. "Remote Core Locking: Migrating
+ *   Critical-Section Execution to Improve the Performance of Multithreaded
+ *   Applications", USENIX ATC'12.
+ */
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/timer.h"
+#include "qemu/qht.h"
+#include "qemu/rcu.h"
+#include "exec/tb-hash-xx.h"
+
+enum QSPType {
+    QSP_MUTEX,
+    QSP_BQL_MUTEX,
+    QSP_REC_MUTEX,
+    QSP_CONDVAR,
+};
+
+struct QSPCallSite {
+    const void *obj;
+    const char *file; /* i.e. __FILE__; shortened later */
+    int line;
+    enum QSPType type;
+};
+typedef struct QSPCallSite QSPCallSite;
+
+struct QSPEntry {
+    void *thread_ptr;
+    const QSPCallSite *callsite;
+    uint64_t n_acqs;
+    uint64_t ns;
+    unsigned int n_objs; /* count of coalesced objs; only used for reporting */
+#ifndef CONFIG_ATOMIC64
+    /*
+     * If we cannot update the counts atomically, then use a seqlock.
+     * We don't need an associated lock because the updates are thread-local.
+     */
+    QemuSeqLock sequence;
+#endif
+};
+typedef struct QSPEntry QSPEntry;
+
+struct QSPSnapshot {
+    struct rcu_head rcu;
+    struct qht ht;
+};
+typedef struct QSPSnapshot QSPSnapshot;
+
+/* initial sizing for hash tables */
+#define QSP_INITIAL_SIZE 64
+
+/* If this file is moved, QSP_REL_PATH should be updated accordingly */
+#define QSP_REL_PATH "util/qsp.c"
+
+/* this file's full path. Used to present all call sites with relative paths */
+static size_t qsp_qemu_path_len;
+
+/* the address of qsp_thread gives us a unique 'thread ID' */
+static __thread int qsp_thread;
+
+/*
+ * Call sites are the same for all threads, so we track them in a separate hash
+ * table to save memory.
+ */
+static struct qht qsp_callsite_ht;
+
+static struct qht qsp_ht;
+static QSPSnapshot *qsp_snapshot;
+static bool qsp_initialized, qsp_initializing;
+
+static const char * const qsp_typenames[] = {
+    [QSP_MUTEX]     = "mutex",
+    [QSP_BQL_MUTEX] = "BQL mutex",
+    [QSP_REC_MUTEX] = "rec_mutex",
+    [QSP_CONDVAR]   = "condvar",
+};
+
+QemuMutexLockFunc qemu_bql_mutex_lock_func = qemu_mutex_lock_impl;
+QemuMutexLockFunc qemu_mutex_lock_func = qemu_mutex_lock_impl;
+QemuMutexTrylockFunc qemu_mutex_trylock_func = qemu_mutex_trylock_impl;
+QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl;
+QemuRecMutexTrylockFunc qemu_rec_mutex_trylock_func =
+    qemu_rec_mutex_trylock_impl;
+QemuCondWaitFunc qemu_cond_wait_func = qemu_cond_wait_impl;
+
+/*
+ * It pays off to _not_ hash callsite->file; hashing a string is slow, and
+ * without it we still get a pretty unique hash.
+ */
+static inline
+uint32_t do_qsp_callsite_hash(const QSPCallSite *callsite, uint64_t a)
+{
+    uint64_t b = (uint64_t)(uintptr_t)callsite->obj;
+    uint32_t e = callsite->line;
+    uint32_t f = callsite->type;
+
+    return tb_hash_func7(a, b, e, f, 0);
+}
+
+static inline
+uint32_t qsp_callsite_hash(const QSPCallSite *callsite)
+{
+    return do_qsp_callsite_hash(callsite, 0);
+}
+
+static inline uint32_t do_qsp_entry_hash(const QSPEntry *entry, uint64_t a)
+{
+    return do_qsp_callsite_hash(entry->callsite, a);
+}
+
+static uint32_t qsp_entry_hash(const QSPEntry *entry)
+{
+    return do_qsp_entry_hash(entry, (uint64_t)(uintptr_t)entry->thread_ptr);
+}
+
+static uint32_t qsp_entry_no_thread_hash(const QSPEntry *entry)
+{
+    return do_qsp_entry_hash(entry, 0);
+}
+
+/* without the objects we need to hash the file name to get a decent hash */
+static uint32_t qsp_entry_no_thread_obj_hash(const QSPEntry *entry)
+{
+    const QSPCallSite *callsite = entry->callsite;
+    uint64_t a = g_str_hash(callsite->file);
+    uint64_t b = callsite->line;
+    uint32_t e = callsite->type;
+
+    return tb_hash_func7(a, b, e, 0, 0);
+}
+
+static bool qsp_callsite_cmp(const void *ap, const void *bp)
+{
+    const QSPCallSite *a = ap;
+    const QSPCallSite *b = bp;
+
+    return a == b ||
+        (a->obj == b->obj &&
+         a->line == b->line &&
+         a->type == b->type &&
+         (a->file == b->file || !strcmp(a->file, b->file)));
+}
+
+static bool qsp_callsite_no_obj_cmp(const void *ap, const void *bp)
+{
+    const QSPCallSite *a = ap;
+    const QSPCallSite *b = bp;
+
+    return a == b ||
+        (a->line == b->line &&
+         a->type == b->type &&
+         (a->file == b->file || !strcmp(a->file, b->file)));
+}
+
+static bool qsp_entry_no_thread_cmp(const void *ap, const void *bp)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+
+    return qsp_callsite_cmp(a->callsite, b->callsite);
+}
+
+static bool qsp_entry_no_thread_obj_cmp(const void *ap, const void *bp)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+
+    return qsp_callsite_no_obj_cmp(a->callsite, b->callsite);
+}
+
+static bool qsp_entry_cmp(const void *ap, const void *bp)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+
+    return a->thread_ptr == b->thread_ptr &&
+        qsp_callsite_cmp(a->callsite, b->callsite);
+}
+
+/*
+ * Normally we'd call this from a constructor function, but we want it to work
+ * via libutil as well.
+ */
+static void qsp_do_init(void)
+{
+    /* make sure this file's path in the tree is up to date with QSP_REL_PATH */
+    g_assert(strstr(__FILE__, QSP_REL_PATH));
+    qsp_qemu_path_len = strlen(__FILE__) - strlen(QSP_REL_PATH);
+
+    qht_init(&qsp_ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
+             QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+    qht_init(&qsp_callsite_ht, qsp_callsite_cmp, QSP_INITIAL_SIZE,
+             QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+}
+
+static __attribute__((noinline)) void qsp_init__slowpath(void)
+{
+    if (atomic_cmpxchg(&qsp_initializing, false, true) == false) {
+        qsp_do_init();
+        atomic_set(&qsp_initialized, true);
+    } else {
+        while (!atomic_read(&qsp_initialized)) {
+            cpu_relax();
+        }
+    }
+}
+
+/* qsp_init() must be called from _all_ exported functions */
+static inline void qsp_init(void)
+{
+    if (likely(atomic_read(&qsp_initialized))) {
+        return;
+    }
+    qsp_init__slowpath();
+}
+
+static QSPCallSite *qsp_callsite_find(const QSPCallSite *orig)
+{
+    QSPCallSite *callsite;
+    uint32_t hash;
+
+    hash = qsp_callsite_hash(orig);
+    callsite = qht_lookup(&qsp_callsite_ht, orig, hash);
+    if (callsite == NULL) {
+        void *existing = NULL;
+
+        callsite = g_new(QSPCallSite, 1);
+        memcpy(callsite, orig, sizeof(*callsite));
+        qht_insert(&qsp_callsite_ht, callsite, hash, &existing);
+        if (unlikely(existing)) {
+            g_free(callsite);
+            callsite = existing;
+        }
+    }
+    return callsite;
+}
+
+static QSPEntry *
+qsp_entry_create(struct qht *ht, const QSPEntry *entry, uint32_t hash)
+{
+    QSPEntry *e;
+    void *existing = NULL;
+
+    e = g_new0(QSPEntry, 1);
+    e->thread_ptr = entry->thread_ptr;
+    e->callsite = qsp_callsite_find(entry->callsite);
+
+    qht_insert(ht, e, hash, &existing);
+    if (unlikely(existing)) {
+        g_free(e);
+        e = existing;
+    }
+    return e;
+}
+
+static QSPEntry *
+qsp_entry_find(struct qht *ht, const QSPEntry *entry, uint32_t hash)
+{
+    QSPEntry *e;
+
+    e = qht_lookup(ht, entry, hash);
+    if (e == NULL) {
+        e = qsp_entry_create(ht, entry, hash);
+    }
+    return e;
+}
+
+/*
+ * Note: Entries are never removed, so callers do not have to be in an RCU
+ * read-side critical section.
+ */
+static QSPEntry *qsp_entry_get(const void *obj, const char *file, int line,
+                               enum QSPType type)
+{
+    QSPCallSite callsite = {
+        .obj = obj,
+        .file = file,
+        .line = line,
+        .type = type,
+    };
+    QSPEntry orig;
+    uint32_t hash;
+
+    qsp_init();
+
+    orig.thread_ptr = &qsp_thread;
+    orig.callsite = &callsite;
+
+    hash = qsp_entry_hash(&orig);
+    return qsp_entry_find(&qsp_ht, &orig, hash);
+}
+
+/*
+ * @from is in the global hash table; read it atomically if the host
+ * supports it, otherwise use the seqlock.
+ */
+static void qsp_entry_aggregate(QSPEntry *to, const QSPEntry *from)
+{
+#ifdef CONFIG_ATOMIC64
+    to->ns += atomic_read__nocheck(&from->ns);
+    to->n_acqs += atomic_read__nocheck(&from->n_acqs);
+#else
+    unsigned int version;
+    uint64_t ns, n_acqs;
+
+    do {
+        version = seqlock_read_begin(&from->sequence);
+        ns = atomic_read__nocheck(&from->ns);
+        n_acqs = atomic_read__nocheck(&from->n_acqs);
+    } while (seqlock_read_retry(&from->sequence, version));
+
+    to->ns += ns;
+    to->n_acqs += n_acqs;
+#endif
+}
+
+/*
+ * @e is in the global hash table; it is only written to by the current thread,
+ * so we write to it atomically (as in "write once") to prevent torn reads.
+ * If the host doesn't support u64 atomics, use the seqlock.
+ */
+static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
+{
+#ifndef CONFIG_ATOMIC64
+    seqlock_write_begin(&e->sequence);
+#endif
+    atomic_set__nocheck(&e->ns, e->ns + delta);
+    if (acq) {
+        atomic_set__nocheck(&e->n_acqs, e->n_acqs + 1);
+    }
+#ifndef CONFIG_ATOMIC64
+    seqlock_write_end(&e->sequence);
+#endif
+}
+
+static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
+{
+    do_qsp_entry_record(e, delta, true);
+}
+
+#define QSP_GEN_VOID(type_, qsp_t_, func_, impl_)                       \
+    static void func_(type_ *obj, const char *file, int line)           \
+    {                                                                   \
+        QSPEntry *e;                                                    \
+        int64_t t0, t1;                                                 \
+                                                                        \
+        t0 = get_clock();                                               \
+        impl_(obj, file, line);                                         \
+        t1 = get_clock();                                               \
+                                                                        \
+        e = qsp_entry_get(obj, file, line, qsp_t_);                     \
+        qsp_entry_record(e, t1 - t0);                                   \
+    }
+
+#define QSP_GEN_RET1(type_, qsp_t_, func_, impl_)                       \
+    static int func_(type_ *obj, const char *file, int line)            \
+    {                                                                   \
+        QSPEntry *e;                                                    \
+        int64_t t0, t1;                                                 \
+        int err;                                                        \
+                                                                        \
+        t0 = get_clock();                                               \
+        err = impl_(obj, file, line);                                   \
+        t1 = get_clock();                                               \
+                                                                        \
+        e = qsp_entry_get(obj, file, line, qsp_t_);                     \
+        do_qsp_entry_record(e, t1 - t0, !err);                          \
+        return err;                                                     \
+    }
+
+QSP_GEN_VOID(QemuMutex, QSP_BQL_MUTEX, qsp_bql_mutex_lock, qemu_mutex_lock_impl)
+QSP_GEN_VOID(QemuMutex, QSP_MUTEX, qsp_mutex_lock, qemu_mutex_lock_impl)
+QSP_GEN_RET1(QemuMutex, QSP_MUTEX, qsp_mutex_trylock, qemu_mutex_trylock_impl)
+
+QSP_GEN_VOID(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_lock,
+             qemu_rec_mutex_lock_impl)
+QSP_GEN_RET1(QemuRecMutex, QSP_REC_MUTEX, qsp_rec_mutex_trylock,
+             qemu_rec_mutex_trylock_impl)
+
+#undef QSP_GEN_RET1
+#undef QSP_GEN_VOID
+
+static void
+qsp_cond_wait(QemuCond *cond, QemuMutex *mutex, const char *file, int line)
+{
+    QSPEntry *e;
+    int64_t t0, t1;
+
+    t0 = get_clock();
+    qemu_cond_wait_impl(cond, mutex, file, line);
+    t1 = get_clock();
+
+    e = qsp_entry_get(cond, file, line, QSP_CONDVAR);
+    qsp_entry_record(e, t1 - t0);
+}
+
+bool qsp_is_enabled(void)
+{
+    return atomic_read(&qemu_mutex_lock_func) == qsp_mutex_lock;
+}
+
+void qsp_enable(void)
+{
+    atomic_set(&qemu_mutex_lock_func, qsp_mutex_lock);
+    atomic_set(&qemu_mutex_trylock_func, qsp_mutex_trylock);
+    atomic_set(&qemu_bql_mutex_lock_func, qsp_bql_mutex_lock);
+    atomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock);
+    atomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock);
+    atomic_set(&qemu_cond_wait_func, qsp_cond_wait);
+}
+
+void qsp_disable(void)
+{
+    atomic_set(&qemu_mutex_lock_func, qemu_mutex_lock_impl);
+    atomic_set(&qemu_mutex_trylock_func, qemu_mutex_trylock_impl);
+    atomic_set(&qemu_bql_mutex_lock_func, qemu_mutex_lock_impl);
+    atomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl);
+    atomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl);
+    atomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl);
+}
+
+static gint qsp_tree_cmp(gconstpointer ap, gconstpointer bp, gpointer up)
+{
+    const QSPEntry *a = ap;
+    const QSPEntry *b = bp;
+    enum QSPSortBy sort_by = *(enum QSPSortBy *)up;
+    const QSPCallSite *ca;
+    const QSPCallSite *cb;
+
+    switch (sort_by) {
+    case QSP_SORT_BY_TOTAL_WAIT_TIME:
+        if (a->ns > b->ns) {
+            return -1;
+        } else if (a->ns < b->ns) {
+            return 1;
+        }
+        break;
+    case QSP_SORT_BY_AVG_WAIT_TIME:
+    {
+        double avg_a = a->n_acqs ? a->ns / a->n_acqs : 0;
+        double avg_b = b->n_acqs ? b->ns / b->n_acqs : 0;
+
+        if (avg_a > avg_b) {
+            return -1;
+        } else if (avg_a < avg_b) {
+            return 1;
+        }
+        break;
+    }
+    default:
+        g_assert_not_reached();
+    }
+
+    ca = a->callsite;
+    cb = b->callsite;
+    /* Break the tie with the object's address */
+    if (ca->obj < cb->obj) {
+        return -1;
+    } else if (ca->obj > cb->obj) {
+        return 1;
+    } else {
+        int cmp;
+
+        /* same obj. Break the tie with the callsite's file */
+        cmp = strcmp(ca->file, cb->file);
+        if (cmp) {
+            return cmp;
+        }
+        /* same callsite file. Break the tie with the callsite's line */
+        g_assert(ca->line != cb->line);
+        if (ca->line < cb->line) {
+            return -1;
+        } else if (ca->line > cb->line) {
+            return 1;
+        } else {
+            /* break the tie with the callsite's type */
+            return cb->type - ca->type;
+        }
+    }
+}
+
+static void qsp_sort(struct qht *ht, void *p, uint32_t h, void *userp)
+{
+    QSPEntry *e = p;
+    GTree *tree = userp;
+
+    g_tree_insert(tree, e, NULL);
+}
+
+static void qsp_aggregate(struct qht *global_ht, void *p, uint32_t h, void *up)
+{
+    struct qht *ht = up;
+    const QSPEntry *e = p;
+    QSPEntry *agg;
+    uint32_t hash;
+
+    hash = qsp_entry_no_thread_hash(e);
+    agg = qsp_entry_find(ht, e, hash);
+    qsp_entry_aggregate(agg, e);
+}
+
+static void qsp_iter_diff(struct qht *orig, void *p, uint32_t hash, void *htp)
+{
+    struct qht *ht = htp;
+    QSPEntry *old = p;
+    QSPEntry *new;
+
+    new = qht_lookup(ht, old, hash);
+    /* entries are never deleted, so we must have this one */
+    g_assert(new != NULL);
+    /* our reading of the stats happened after the snapshot was taken */
+    g_assert(new->n_acqs >= old->n_acqs);
+    g_assert(new->ns >= old->ns);
+
+    new->n_acqs -= old->n_acqs;
+    new->ns -= old->ns;
+
+    /* No point in reporting an empty entry */
+    if (new->n_acqs == 0 && new->ns == 0) {
+        bool removed = qht_remove(ht, new, hash);
+
+        g_assert(removed);
+        g_free(new);
+    }
+}
+
+static void qsp_diff(struct qht *orig, struct qht *new)
+{
+    qht_iter(orig, qsp_iter_diff, new);
+}
+
+static void
+qsp_iter_callsite_coalesce(struct qht *orig, void *p, uint32_t h, void *htp)
+{
+    struct qht *ht = htp;
+    QSPEntry *old = p;
+    QSPEntry *e;
+    uint32_t hash;
+
+    hash = qsp_entry_no_thread_obj_hash(old);
+    e = qht_lookup(ht, old, hash);
+    if (e == NULL) {
+        e = qsp_entry_create(ht, old, hash);
+        e->n_objs = 1;
+    } else if (e->callsite->obj != old->callsite->obj) {
+        e->n_objs++;
+    }
+    e->ns += old->ns;
+    e->n_acqs += old->n_acqs;
+}
+
+static void qsp_ht_delete(struct qht *ht, void *p, uint32_t h, void *htp)
+{
+    g_free(p);
+}
+
+static void qsp_mktree(GTree *tree, bool callsite_coalesce)
+{
+    QSPSnapshot *snap;
+    struct qht ht, coalesce_ht;
+    struct qht *htp;
+
+    /*
+     * First, see if there's a prior snapshot, so that we read the global hash
+     * table _after_ the snapshot has been created, which guarantees that
+     * the entries we'll read will be a superset of the snapshot's entries.
+     *
+     * We must remain in an RCU read-side critical section until we're done
+     * with the snapshot.
+     */
+    rcu_read_lock();
+    snap = atomic_rcu_read(&qsp_snapshot);
+
+    /* Aggregate all results from the global hash table into a local one */
+    qht_init(&ht, qsp_entry_no_thread_cmp, QSP_INITIAL_SIZE,
+             QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+    qht_iter(&qsp_ht, qsp_aggregate, &ht);
+
+    /* compute the difference wrt the snapshot, if any */
+    if (snap) {
+        qsp_diff(&snap->ht, &ht);
+    }
+    /* done with the snapshot; RCU can reclaim it */
+    rcu_read_unlock();
+
+    htp = &ht;
+    if (callsite_coalesce) {
+        qht_init(&coalesce_ht, qsp_entry_no_thread_obj_cmp, QSP_INITIAL_SIZE,
+                 QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+        qht_iter(&ht, qsp_iter_callsite_coalesce, &coalesce_ht);
+
+        /* free the previous hash table, and point htp to coalesce_ht */
+        qht_iter(&ht, qsp_ht_delete, NULL);
+        qht_destroy(&ht);
+        htp = &coalesce_ht;
+    }
+
+    /* sort the hash table elements by using a tree */
+    qht_iter(htp, qsp_sort, tree);
+
+    /* free the hash table, but keep the elements (those are in the tree now) */
+    qht_destroy(htp);
+}
+
+/* free string with g_free */
+static char *qsp_at(const QSPCallSite *callsite)
+{
+    GString *s = g_string_new(NULL);
+    const char *shortened;
+
+    /* remove the absolute path to qemu */
+    if (unlikely(strlen(callsite->file) < qsp_qemu_path_len)) {
+        shortened = callsite->file;
+    } else {
+        shortened = callsite->file + qsp_qemu_path_len;
+    }
+    g_string_append_printf(s, "%s:%u", shortened, callsite->line);
+    return g_string_free(s, FALSE);
+}
+
+struct QSPReportEntry {
+    const void *obj;
+    char *callsite_at;
+    const char *typename;
+    double time_s;
+    double ns_avg;
+    uint64_t n_acqs;
+    unsigned int n_objs;
+};
+typedef struct QSPReportEntry QSPReportEntry;
+
+struct QSPReport {
+    QSPReportEntry *entries;
+    size_t n_entries;
+    size_t max_n_entries;
+};
+typedef struct QSPReport QSPReport;
+
+static gboolean qsp_tree_report(gpointer key, gpointer value, gpointer udata)
+{
+    const QSPEntry *e = key;
+    QSPReport *report = udata;
+    QSPReportEntry *entry;
+
+    if (report->n_entries == report->max_n_entries) {
+        return TRUE;
+    }
+    entry = &report->entries[report->n_entries];
+    report->n_entries++;
+
+    entry->obj = e->callsite->obj;
+    entry->n_objs = e->n_objs;
+    entry->callsite_at = qsp_at(e->callsite);
+    entry->typename = qsp_typenames[e->callsite->type];
+    entry->time_s = e->ns * 1e-9;
+    entry->n_acqs = e->n_acqs;
+    entry->ns_avg = e->n_acqs ? e->ns / e->n_acqs : 0;
+    return FALSE;
+}
+
+static void
+pr_report(const QSPReport *rep, FILE *f, fprintf_function pr)
+{
+    char *dashes;
+    size_t max_len = 0;
+    int callsite_len = 0;
+    int callsite_rspace;
+    int n_dashes;
+    size_t i;
+
+    /* find out the maximum length of all 'callsite' fields */
+    for (i = 0; i < rep->n_entries; i++) {
+        const QSPReportEntry *e = &rep->entries[i];
+        size_t len = strlen(e->callsite_at);
+
+        if (len > max_len) {
+            max_len = len;
+        }
+    }
+
+    callsite_len = MAX(max_len, strlen("Call site"));
+    /* white space to leave to the right of "Call site" */
+    callsite_rspace = callsite_len - strlen("Call site");
+
+    pr(f, "Type               Object  Call site%*s  Wait Time (s)  "
+       "       Count  Average (us)\n", callsite_rspace, "");
+
+    /* build a horizontal rule with dashes */
+    n_dashes = 79 + callsite_rspace;
+    dashes = g_malloc(n_dashes + 1);
+    memset(dashes, '-', n_dashes);
+    dashes[n_dashes] = '\0';
+    pr(f, "%s\n", dashes);
+
+    for (i = 0; i < rep->n_entries; i++) {
+        const QSPReportEntry *e = &rep->entries[i];
+        GString *s = g_string_new(NULL);
+
+        g_string_append_printf(s, "%-9s  ", e->typename);
+        if (e->n_objs > 1) {
+            g_string_append_printf(s, "[%12u]", e->n_objs);
+        } else {
+            g_string_append_printf(s, "%14p", e->obj);
+        }
+        g_string_append_printf(s, "  %s%*s  %13.5f  %12" PRIu64 "  %12.2f\n",
+                               e->callsite_at,
+                               callsite_len - (int)strlen(e->callsite_at), "",
+                               e->time_s, e->n_acqs, e->ns_avg * 1e-3);
+        pr(f, "%s", s->str);
+        g_string_free(s, TRUE);
+    }
+
+    pr(f, "%s\n", dashes);
+    g_free(dashes);
+}
+
+static void report_destroy(QSPReport *rep)
+{
+    size_t i;
+
+    for (i = 0; i < rep->n_entries; i++) {
+        QSPReportEntry *e = &rep->entries[i];
+
+        g_free(e->callsite_at);
+    }
+    g_free(rep->entries);
+}
+
+void qsp_report(FILE *f, fprintf_function cpu_fprintf, size_t max,
+                enum QSPSortBy sort_by, bool callsite_coalesce)
+{
+    GTree *tree = g_tree_new_full(qsp_tree_cmp, &sort_by, g_free, NULL);
+    QSPReport rep;
+
+    qsp_init();
+
+    rep.entries = g_new0(QSPReportEntry, max);
+    rep.n_entries = 0;
+    rep.max_n_entries = max;
+
+    qsp_mktree(tree, callsite_coalesce);
+    g_tree_foreach(tree, qsp_tree_report, &rep);
+    g_tree_destroy(tree);
+
+    pr_report(&rep, f, cpu_fprintf);
+    report_destroy(&rep);
+}
+
+static void qsp_snapshot_destroy(QSPSnapshot *snap)
+{
+    qht_iter(&snap->ht, qsp_ht_delete, NULL);
+    qht_destroy(&snap->ht);
+    g_free(snap);
+}
+
+void qsp_reset(void)
+{
+    QSPSnapshot *new = g_new(QSPSnapshot, 1);
+    QSPSnapshot *old;
+
+    qsp_init();
+
+    qht_init(&new->ht, qsp_entry_cmp, QSP_INITIAL_SIZE,
+             QHT_MODE_AUTO_RESIZE | QHT_MODE_RAW_MUTEXES);
+
+    /* take a snapshot of the current state */
+    qht_iter(&qsp_ht, qsp_aggregate, &new->ht);
+
+    /* replace the previous snapshot, if any */
+    old = atomic_xchg(&qsp_snapshot, new);
+    if (old) {
+        call_rcu(old, qsp_snapshot_destroy, rcu);
+    }
+}
diff --git a/vl.c b/vl.c
index 16b913f9d5..5ba06adf78 100644
--- a/vl.c
+++ b/vl.c
@@ -2987,6 +2987,7 @@ int main(int argc, char **argv, char **envp)
     qemu_add_opts(&qemu_object_opts);
     qemu_add_opts(&qemu_tpmdev_opts);
     qemu_add_opts(&qemu_realtime_opts);
+    qemu_add_opts(&qemu_overcommit_opts);
     qemu_add_opts(&qemu_msg_opts);
     qemu_add_opts(&qemu_name_opts);
     qemu_add_opts(&qemu_numa_opts);
@@ -3959,6 +3960,9 @@ int main(int argc, char **argv, char **envp)
                     exit(1);
                 }
                 break;
+            case QEMU_OPTION_enable_sync_profile:
+                qsp_enable();
+                break;
             case QEMU_OPTION_nodefconfig:
             case QEMU_OPTION_nouserconfig:
                 /* Nothing to be parsed here. Especially, do not error out below. */
@@ -4559,11 +4563,10 @@ int main(int argc, char **argv, char **envp)
      * (2) CONFIG_SLIRP not set, in which case the implicit "-net nic"
      * sets up a nic that isn't connected to anything.
      */
-    if (!default_net) {
+    if (!default_net && (!qtest_enabled() || has_defaults)) {
         net_check_clients();
     }
 
-
     if (boot_once) {
         qemu_boot_set(boot_once, &error_fatal);
         qemu_register_reset(restore_boot_order, g_strdup(boot_order));